   title       = {Unknown Data | Mining and consolidating research dataset metadata on the Web}, 
   url         = {https://unknowndataproject.github.io/},
   biburl      = {https://www.bibsonomy.org/url/91d22817bd34252e6a7030994d2bedd6/astrupp},
   keywords    = {crawl data dataset datasets web},
   added-at    = {2023-09-19T15:48:58.000+0200},
   description = {},
   interhash   = {91d22817bd34252e6a7030994d2bedd6}, 
   intrahash   = {91d22817bd34252e6a7030994d2bedd6}

   title       = {Meusel-etal-TheWDCMicrodataRdfaMicroformatsDataSeries-ISWC2014-rbds.pdf}, 
   url         = {https://www.uni-mannheim.de/media/Einrichtungen/dws/Files_Research/Web-based_Systems/pub/Meusel-etal-TheWDCMicrodataRdfaMicroformatsDataSeries-ISWC2014-rbds.pdf},
   biburl      = {https://www.bibsonomy.org/url/63c3dfbc9992c1008d45b994f1cf165f/astrupp},
   keywords    = {data metadata paper pdf web},
   added-at    = {2023-09-19T10:20:48.000+0200},
   description = {Abstract. In order to support web applications to understand the content of HTML pages an increasing number of websites have started to annotate structured data within their pages using markup formats such as Microdata, RDFa, Microformats. The annotations are used by Google, Yahoo!, Yandex, Bing and Facebook to enrich search results and to display entity descriptions within their applications. In this paper, we present a series of publicly accessible Microdata, RDFa, Microformats datasets that we have extracted from three large web corpora dating from 2010, 2012 and 2013.},
   interhash   = {63c3dfbc9992c1008d45b994f1cf165f}, 
   intrahash   = {63c3dfbc9992c1008d45b994f1cf165f}

   title       = {WDC - RDFa, Microdata, and Microformat Data Sets}, 
   url         = {http://webdatacommons.org/structureddata/index.html#references},
   biburl      = {https://www.bibsonomy.org/url/ce818a401f9451a2760d8ffab531144a/astrupp},
   keywords    = {crawl data metadata semantic web},
   added-at    = {2023-09-19T10:19:48.000+0200},
   description = {More and more websites have started to embed structured data describing products, people, organizations, places, and events into their HTML pages using markup standards such as Microdata, JSON-LD, RDFa, and Microformats. The Web Data Commons project extracts this data from several billion web pages. So far the project provides 11 different data set releases extracted from the Common Crawls 2010 to 2022. The project provides the extracted data for download and publishes statistics about the deployment of the different formats.},
   interhash   = {ce818a401f9451a2760d8ffab531144a}, 
   intrahash   = {ce818a401f9451a2760d8ffab531144a}

   title       = {Schema.org - Schema.org}, 
   url         = {https://schema.org/},
   biburl      = {https://www.bibsonomy.org/url/74cccdd8ca38bd843ce2fcc0061f3c8d/bshanks},
   keywords    = {data schema semweb web},
   added-at    = {2022-01-28T21:38:02.000+0100},
   description = {},
   interhash   = {74cccdd8ca38bd843ce2fcc0061f3c8d}, 
   intrahash   = {74cccdd8ca38bd843ce2fcc0061f3c8d}

   title       = {Microformats : Meaningful HTML}, 
   url         = {https://microformats.io/},
   biburl      = {https://www.bibsonomy.org/url/b2f3faea46f4a43bd053f2b54c56776b/bshanks},
   keywords    = {data schema semweb web},
   added-at    = {2022-01-28T21:37:50.000+0100},
   description = {},
   interhash   = {b2f3faea46f4a43bd053f2b54c56776b}, 
   intrahash   = {b2f3faea46f4a43bd053f2b54c56776b}

   title       = {Sunsetting support for data-vocabulary  |  Google Search Central Blog  |  Google Developers}, 
   url         = {https://developers.google.com/search/blog/2020/01/data-vocabulary},
   biburl      = {https://www.bibsonomy.org/url/2f1677fb5223473b70c61eca6a8da1e9/bshanks},
   keywords    = {data schema web},
   added-at    = {2022-01-28T21:37:37.000+0100},
   description = {},
   interhash   = {2f1677fb5223473b70c61eca6a8da1e9}, 
   intrahash   = {2f1677fb5223473b70c61eca6a8da1e9}

   title       = {WDR}, 
   url         = {https://webdatarender.com/},
   biburl      = {https://www.bibsonomy.org/url/f554ef10894f8985bbbbe083e626c243/analyst},
   keywords    = {blog data html json web},
   added-at    = {2020-12-10T21:05:10.000+0100},
   description = {This website is a valid JSON},
   interhash   = {f554ef10894f8985bbbbe083e626c243}, 
   intrahash   = {f554ef10894f8985bbbbe083e626c243}

   title       = {| CommonCrawl}, 
   url         = {http://commoncrawl.org/},
   biburl      = {https://www.bibsonomy.org/url/a9c25dd160c8f8e45d2553a51517851b/bshanks},
   keywords    = {data crawl web corpus open nlp},
   added-at    = {2016-12-12T10:00:17.000+0100},
   description = {},
   interhash   = {a9c25dd160c8f8e45d2553a51517851b}, 
   intrahash   = {a9c25dd160c8f8e45d2553a51517851b}

   title       = {Web Data Commons}, 
   url         = {http://webdatacommons.org/},
   biburl      = {https://www.bibsonomy.org/url/d18b86c1a01b30214539ac6b88727aa1/hotho},
   keywords    = {common crawl data dataset rdf relations semantic web},
   added-at    = {2016-11-10T08:38:37.000+0100},
   description = {},
   interhash   = {d18b86c1a01b30214539ac6b88727aa1}, 
   intrahash   = {d18b86c1a01b30214539ac6b88727aa1}

   title       = {Grafana - Beautiful Metrics Dashboards, Data Visualization and Monitoring}, 
   url         = {http://grafana.org/},
   biburl      = {https://www.bibsonomy.org/url/e663920dfc73405060ba3b70780e8db8/hotho},
   keywords    = {data grafana metrics visualization web},
   added-at    = {2016-10-12T14:42:06.000+0200},
   description = {Grafana is the leading open source project for visualizing metrics. Supporting rich integration for every popular database like Graphite, Prometheus and InfluxDB.},
   interhash   = {e663920dfc73405060ba3b70780e8db8}, 
   intrahash   = {e663920dfc73405060ba3b70780e8db8}

   title       = {Net Data Directory}, 
   url         = {https://netdatadirectory.org/},
   biburl      = {https://www.bibsonomy.org/url/b36877883af584d7e4ce08c46c66ce3c/jaeschke},
   keywords    = {data dataset directory internet monitor net web},
   added-at    = {2016-06-03T12:15:51.000+0200},
   description = {The Net Data Directory collects and shares information on different sources of data about the Internet. For more about the project, see our about page. To get started, use the search box below, or check out our quick start guide.},
   interhash   = {b36877883af584d7e4ce08c46c66ce3c}, 
   intrahash   = {b36877883af584d7e4ce08c46c66ce3c}

   title       = {Mining Ubiquitous and Social Environments - Workshop at ECML/PKDD 2010}, 
   url         = {http://www.kde.cs.uni-kassel.de/ws/muse2010},
   biburl      = {https://www.bibsonomy.org/url/f3a49dcf5c170e1aff0cc905c11d2254/kde-alumni},
   keywords    = {2010 data mining myown social web workshop},
   added-at    = {2015-10-16T10:54:07.000+0200},
   description = {},
   interhash   = {f3a49dcf5c170e1aff0cc905c11d2254}, 
   intrahash   = {f3a49dcf5c170e1aff0cc905c11d2254}

   title       = {Linked Data Fragments}, 
   url         = {http://linkeddatafragments.org/},
   biburl      = {https://www.bibsonomy.org/url/b3bda9c7a58188b9081d2f32a7ceffb5/brightbyte},
   keywords    = {data fragments linked pattern query rdf semantic sparql triple web},
   added-at    = {2015-06-26T22:50:26.000+0200},
   description = {},
   interhash   = {b3bda9c7a58188b9081d2f32a7ceffb5}, 
   intrahash   = {b3bda9c7a58188b9081d2f32a7ceffb5}

   title       = {Web Data Commons}, 
   url         = {http://webdatacommons.org/},
   biburl      = {https://www.bibsonomy.org/url/d18b86c1a01b30214539ac6b88727aa1/jaeschke},
   keywords    = {commoncrawl crawl data dataset linked lod microformat open rdf semantic web},
   added-at    = {2015-04-13T10:42:22.000+0200},
   description = {},
   interhash   = {d18b86c1a01b30214539ac6b88727aa1}, 
   intrahash   = {d18b86c1a01b30214539ac6b88727aa1}

   title       = {Signed up for Facebook? This is what you really agreed to | Toronto Star}, 
   url         = {http://www.thestar.com/business/2015/02/24/signed-up-for-facebook-this-is-what-you-really-agreed-too.html},
   biburl      = {https://www.bibsonomy.org/url/abe19bf667d52dd7e42fff4c495fc997/shelley.adams},
   keywords    = {canada data law newspaper privacy socialnetworking web},
   added-at    = {2015-02-25T16:20:49.000+0100},
   description = {The Star and privacy lawyer David Fraser have teamed up to annotate Facebook's privacy policy},
   interhash   = {abe19bf667d52dd7e42fff4c495fc997}, 
   intrahash   = {abe19bf667d52dd7e42fff4c495fc997}

   title       = {Protovis}, 
   url         = {http://mbostock.github.io/protovis/},
   biburl      = {https://www.bibsonomy.org/url/2dd6c081d0f19c31fd65485952e9c93e/victoria_helen},
   keywords    = {data visualisation web},
   added-at    = {2015-01-08T12:03:54.000+0100},
   description = {Protovis composes custom views of data with simple marks such as bars and dots. Unlike low-level graphics libraries that quickly become tedious for visualization, Protovis defines marks through dynamic properties that encode data, allowing inheritance, scales and layouts to simplify construction.

Protovis is free and open-source, provided under the BSD License. It uses JavaScript and SVG for web-native visualizations; no plugin required (though you will need a modern web browser)! Although programming experience is helpful, Protovis is mostly declarative and designed to be learned by example. },
   interhash   = {2dd6c081d0f19c31fd65485952e9c93e}, 
   intrahash   = {2dd6c081d0f19c31fd65485952e9c93e}

   title       = {Host Link Graph JISC UK Web Domain Dataset (1996-2010)}, 
   url         = {http://data.webarchive.org.uk/opendata/ukwa.ds.2/host-linkage/},
   biburl      = {https://www.bibsonomy.org/url/c2bea98fca7ce9e5caacc6a09c868994/jaeschke},
   keywords    = {archive data dataset graph host jisc link uk web},
   added-at    = {2015-01-06T08:31:46.000+0100},
   description = {UK Web Archive Open Data},
   interhash   = {c2bea98fca7ce9e5caacc6a09c868994}, 
   intrahash   = {c2bea98fca7ce9e5caacc6a09c868994}

   title       = {JISC UK Web Domain Dataset (1996-2013)}, 
   url         = {http://data.webarchive.org.uk/opendata/ukwa.ds.2/},
   biburl      = {https://www.bibsonomy.org/url/7fe3f3d648b12c1f9df5fa52f73b3abd/jaeschke},
   keywords    = {archive data dataset domain jisc open uk web},
   added-at    = {2015-01-06T08:29:06.000+0100},
   description = {UK Web Archive Open Data},
   interhash   = {7fe3f3d648b12c1f9df5fa52f73b3abd}, 
   intrahash   = {7fe3f3d648b12c1f9df5fa52f73b3abd}

   title       = {Interactive Data Visualization for the Web}, 
   url         = {http://chimera.labs.oreilly.com/books/1230000000345/index.html},
   biburl      = {https://www.bibsonomy.org/url/984d3c950fe446f6525dafcc24cdb7b0/saberio},
   keywords    = {data interactive visualization web},
   added-at    = {2014-09-13T15:45:25.000+0200},
   description = {},
   interhash   = {984d3c950fe446f6525dafcc24cdb7b0}, 
   intrahash   = {984d3c950fe446f6525dafcc24cdb7b0}

   title       = {Web Data Mining, book by Bing Liu}, 
   url         = {http://www.cs.uic.edu/~liub/WebMiningBook.html},
   biburl      = {https://www.bibsonomy.org/url/386f533f8b99ba01202555ec7f85f793/jaeschke},
   keywords    = {algorithm book data mining web},
   added-at    = {2014-04-02T10:46:35.000+0200},
   description = {Web data mining techniques and algorithm},
   interhash   = {386f533f8b99ba01202555ec7f85f793}, 
   intrahash   = {386f533f8b99ba01202555ec7f85f793}