# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file # # # 1. Ban MOST spiders from the entire site: # User-agent: * Disallow: / # # # 2. Allow CERTAIN search spiders limited access: # User-agent: Googlebot User-agent: Bingbot User-agent: Slurp # User-agent: YandexBot User-agent: DuckDuckBot User-agent: Baiduspider User-agent: Yeti User-agent: ia_archiver User-agent: Applebot User-agent: OAI-SearchBot Allow: /bic_categories/ Disallow: /bisac_categories/ Allow: /onix/ Disallow: /onix36/ Allow: /thema/ Disallow: /thema10/ Disallow: /thema11/ Disallow: /thema12/ Disallow: /thema13/ Disallow: /thema14/ Disallow: /thema15/ Disallow: /thema16/ # # # 3. Point to the sitemaps # Sitemap: https://ns.editeur.org/sitemap.xml Sitemap: https://ns.editeur.org/thema/sitemap.xml Sitemap: https://ns.editeur.org/onix/sitemap.xml # # # 4. Specifically disallow bots associated with scraping AI training data # or acting as an agent on behalf of a real user # # Amazon User-agent: Amazonbot Disallow: / # # Anthropic AI User-agent: anthropic-ai Disallow: / User-agent: claude-web Disallow: / User-agent: ClaudeBot Disallow: / # # Apple User-agent: Applebot-Extended Disallow: / # # Cohere User-agent: cohere-ai Disallow: / # # Common Crawl (Allen Institute) User-agent: CCBot Disallow: / User-agent: AI2Bot Disallow: / User-agent: Diffbot Disallow: / # # Diff User-agent: Diffbot Disallow: / # # Google Bard User-agent: Google-Extended Disallow: / # # Huawei User-agent: PanguBot Disallow: / # # Meta User-agent: FacebookBot Disallow: / User-agent: meta-externalagent Disallow: / User-agent: meta-externalfetcher Disallow: / # # Mistral User-agent: MistralAI-User Disallow: / # # OpenAI User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: ChatGPT-User/2.0 Disallow: / # # Perplexity AI User-agent: PerplexityBot Disallow: / User-agent: Perplexity-User Disallow: / # # Bytedance (won't work but shows our intent) User-agent: ByteDance Disallow: / User-agent: Bytespider Disallow: / # # Webz.io User-agent: omgili Disallow: / User-agent: omgilibot Disallow: / # # You.com User-agent: YouBot Disallow: / # # disallow all the above AI bots # # contact EDItEUR via info@editeur.org