<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
    <url>
        <loc>https://commoncrawl.org</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/ccbot</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/contact-us</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/faq</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/get-started</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/latest-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/overview</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/collaborators</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/privacy-policy</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/research-papers</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/search</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/terms-of-use</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/web-graphs</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/jobs</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/uk-copyright-and-ai-consultation</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/ai-agent</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/cdxj-index</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/columnar-index</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/about</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/alex-xue</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/chris-tolles</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/eva-ho</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/ford-heilizer</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/gil-elbaz-chairman</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/greg-lindahl-engineer</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/hande-celikkanat</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/hugh-marbury</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/jen-english</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/joy-jing</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/julien-nioche</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/kevin-debre-legal-counsel</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/kurt-bollacker-advisor</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/laurie-burchell</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/lisa-green</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/luca-foppiano</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/malte-ostendorff</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/michael-birnbach</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/michael-birnbach-operations</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/michael-paris</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/mike-markson</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/pedro-ortiz-suarez</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/rich-skrenta-director</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/sam-reddy</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/sebastian-nagel-engineer</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/stephen-burns</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/stephen-merity</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/thijs-dalhuijsen</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/thom-vaughan</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/team/wayne-yamamoto</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/2012-crawl-data-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/3-25-billion-pages-crawled-in-july-2018</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-feb-13-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-feb-20-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-feb-6-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-february-27-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-march-13</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-march-20-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-march-26-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/5-good-reads-in-big-open-data-march-6-2015</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/a-further-look-into-the-prevalence-of-various-ml-opt-out-protocols</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/a-look-inside-common-crawls-210tb-2012-web-corpus</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/a-sampling-of-2025-research-referencing-common-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/ai-optimization-is-here-are-you-ready-for-search-2-0</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/ai-plumbers-at-fosdem26</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/amazon-web-services-sponsoring-50-in-credit-to-all-contest-entrants</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/analysis-of-the-ncsu-library-urls-in-the-common-crawl-index</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/analyzing-a-web-graph-with-129-billion-edges-using-flashgraph</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-a-change-to-common-crawl-dataset-size-reporting</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-gneissweb-annotations</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-the-common-crawl-index</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-the-first-workshop-on-multilingual-data-quality-signals</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-the-whirlwind-tour-of-common-crawls-datasets-using-java</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-the-whirlwind-tour-of-common-crawls-datasets-using-python</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/announcing-the-winners-of-the-code-contest</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/answers-to-recent-community-questions</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2014-crawl-data-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2026-common-crawl-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/april-2026-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2014-crawl-data-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2022-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/august-september-2024-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/balancing-discovery-and-privacy-a-look-into-opt-out-protocols</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/big-data-week-meetups-in-sf-and-around-the-world</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/blekko-donates-search-data-to-common-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/bridging-digital-exploration-and-scientific-frontiers</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/cc-citations-a-visualization-of-research-papers-referencing-common-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-at-the-mozilla-festival-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-at-un-open-source-week-june-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-celebrates-world-digital-preservation-day</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-code-contest-extended-through-the-holiday-weekend</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-discussion-list</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-enters-a-new-phase</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-foundation-at-acl-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-foundation-at-colm-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-foundation-at-neurips-2024-expanding-horizons-and-building-connections</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-foundation-at-stanford-hai</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-foundation-at-stanford-hai-a-shared-legacy-of-data-and-innovation</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-foundation-opt-out-registry</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-move-to-nutch</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-on-aws-public-data-sets</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-statistics-now-available-on-hugging-face</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawl-url-index</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawls-advisory-board</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/common-crawls-brand-spanking-new-video-and-first-ever-code-contest</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/commonlid-re-evaluating-state-of-the-art-language-identification-performance-on-web-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/data-2-0-summit</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2014-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/december-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/dialog-and-discovery-at-ai-dev-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/evaluating-graph-computation-systems-performance-and-scale</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/expanding-the-language-and-cultural-coverage-of-common-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-2026-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-march-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/february-march-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/from-seo-to-aio-why-your-content-needs-to-exist-in-ai-training-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/gil-elbaz-and-nova-spivack-on-this-week-in-startups</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/gneissweb-annotations-examples</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-april-may-and-june-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-april-may-and-june-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-august-september-and-october-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-august-september-and-october-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-december-2024-and-january-february-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-december-2025-and-january-february-2026</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-february-march-and-april-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-february-march-and-april-2026</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-february-march-april-and-may-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-january-february-and-march-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-january-february-and-march-2026</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-july-august-and-september-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-july-august-and-september-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-june-july-and-august-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-june-july-and-august-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-march-april-and-may-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-may-june-and-july-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-may-june-and-july-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-november-december-2024-and-january-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-november-december-2025-and-january-2026</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-october-november-and-december-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-october-november-december-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-september-october-and-november-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host--and-domain-level-web-graphs-september-october-november-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-aug-sep-oct-2019</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-augseptoct-2017</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-feb-apr-may-2021</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-febmarapr-2019</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-febmarmay-2020</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-julaugsep-2020</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-jun-jul-sep-2021</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-mar-may-oct-2023</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-may-jun-aug-2022</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-may-sep-nov-2023</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-mayjunejuly-2019</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-nov-dec-2018-jan-2019</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-novdecjan-2019-2020</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-november-december-2023-february-march-2024-and-april-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-oct-nov-jan-2020-2021</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-oct-nov-jan-2021-2022</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-sep-nov-jan-2022-2023</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/host-and-domain-level-web-graphs-september-october-november-december-2023-and-february-march-2024</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/hostgraph-2017-feb-mar-apr-crawls</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/how-seos-are-using-common-crawls-web-graph-data-for-ai-ranking-signals</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/hyperlink-graph-from-web-data-commons</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/iab-workshop-on-ai-control</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/ietf-123-report</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/iipc-general-assembly-web-archiving-conference-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/index-to-warc-files-and-urls-in-columnar-format</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/interactive-webgraph-statistics-notebook-released</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/introducing-cc-downloader</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/introducing-cloudfront-access-to-common-crawl-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/introducing-common-crawl-ai-agent-by-readyai</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/introducing-the-common-crawl-errata-page-for-data-transparency</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/introducing-the-host-index</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/introducing-the-new-examples-resources-browser</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/ipv6-adoption-across-the-top-100k-web-hosts</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/jan-feb-2023-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2022-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-2026-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/january-february-2025-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2014-crawl-data-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-august-2021-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/july-august-2025-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/june-july-2022-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/learn-hadoop-and-get-a-paper-published</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/lexalytics-text-analysis-work-with-common-crawl-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/mapreduce-for-the-masses</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/mar-apr-2023-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2014-crawl-data-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-2026-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-april-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-april-2024-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/march-april-2025-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/mat-kelcey-joins-the-common-crawl-advisory-board</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2015-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2022-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-june-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-june-2023-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-june-2024-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/may-june-2025-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/measuring-web-accessibility-from-crawl-archives</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/navigating-the-warc-file-format</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/new-crawl-data-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/news-dataset-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/nov-dec-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/nov-dec-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/nov-dec-2022-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2014-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2015-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/november-december-2023-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/oct-nov-2023-performance-issues</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2014-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-november-2024-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/october-november-2025-newsletter</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/opening-the-gates-to-online-safety</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/oscon-2012</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/please-donate-to-common-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/professor-jim-hendler-joins-the-common-crawl-advisory-board</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/providing-authenticity-data-provenance-for-common-crawl-using-blockchain-our-work-with-constellation-network</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/reflections-on-recent-talks-at-the-turing-institute-and-ucl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/robotstxt-and-404-redirect-data-sets</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/sep-oct-2022-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2014-crawl-archive-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2015-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2016-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2017-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2018-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2019-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2020-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2021-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2024-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-2025-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/september-october-2023-crawl-archive-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/setting-the-record-straight-common-crawls-commitment-to-transparency-fair-use-and-the-public-good</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/slideshare-building-a-scalable-web-crawler-with-hadoop</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/startup-profile-swiftkeys-head-data-scientist-on-the-value-of-common-crawls-open-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/still-time-to-participate-in-the-common-crawl-code-contest</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/strata-conference-hadoop-world</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/submission-to-the-uks-copyright-and-ai-consultation</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/talentbin-adds-prizes-to-the-code-contest</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-environmental-impact-of-the-cloud---the-common-crawl-case-study</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-first-wmdqs-masakhane-langid-hackathon</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-increase-of-common-crawl-citations-in-academic-research</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-norvig-web-data-science-award</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-open-cloud-consortiums-open-science-data-cloud</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-promise-of-open-government-data-where-we-go-next</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/the-winners-of-the-norvig-web-data-science-award</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/towards-social-discovery</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/trip-report-ai-dev-linux-foundation-august-2025</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/twelve-steps-to-running-your-ruby-code-across-five-billion-web-pages</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/url-search-tool</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/video-gil-elbaz-at-web-2-0-summit-2011</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/video-this-week-in-startups</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/video-tutorial-zero-to-hadoop-in-five-minutes-with-common-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-archives-for-social-sciences-datathon-bristol</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-archiving-file-formats-explained</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-data-commons-extraction-framework-for-the-distributed-processing-of-cc-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-graph-statistics-gets-a-proper-upgrade</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-graphs-aug-sep-oct-2018</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-image-size-prediction-for-efficient-focused-image-crawling</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/web-languages-needing-review-by-native-speakers</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/webdatacommons</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/webgraph-2017-may-june-july</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/webgraphs-feb-mar-apr-2018</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/webgraphs-may-june-july-2018</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/webgraphs-nov-dec-2017-jan-2018</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/welcome-sebastian</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/white-house-briefing-on-open-datas-role-in-technology</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/wikireverse-visualizing-reverse-links-with-open-data</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/winter-2013-crawl-data-now-available</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/wmdqs-shared-task-on-language-identification</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/blog/you-can-now-build-directly-on-common-crawl-from-the-browser</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/arc-format-legacy-crawls</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/charset-detection-bug-in-wet-records</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/co-uk-cctld-not-included</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/columnar-index-subsets-with-fewer-than-900-partitions-per-crawl</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/content-is-truncated</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/erroneous-title-field-in-wat-records</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/extra-line-in-response-records-between-headers-and-payload</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/incorrect-fetch-time-metadata</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/missing-content-truncated-flag-in-url-indexes</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/missing-fetch-status-fields</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/missing-language-classification</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/missing-warc-file</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/no-truncation-indicator-in-warc-records</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/nodes-in-domain-level-webgraphs-not-sorted-and-may-include-duplicates</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/redirect-target-url-in-url-indexes-may-be-a-relative-url</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/surt-urls-do-not-properly-encode-non-utf-8-percent-encoded-characters</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/truncated-wat-files</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/warc-content-type-header-in-revisit-records</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/warc-revisit-metadata-records</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/warc-target-uri-may-include-non-ascii-characters</loc>
    </url>
    <url>
        <loc>https://commoncrawl.org/errata/wat-data-repeated-warc-and-http-headers-are-not-preserved</loc>
    </url>
</urlset>