# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
#
#
# 1. Ban MOST spiders from the entire site:
#
User-agent: *
Disallow: /
#
#
# 2. Allow CERTAIN search spiders limited access:
#
User-agent: Googlebot
User-agent: Bingbot
User-agent: Slurp
# User-agent: YandexBot
User-agent: DuckDuckBot
User-agent: Baiduspider
User-agent: Yeti
User-agent: ia_archiver
User-agent: Applebot
User-agent: OAI-SearchBot
Allow: /bic_categories/
Disallow: /bisac_categories/
Allow: /onix/
Disallow: /onix36/
Allow: /thema/
Disallow: /thema10/
Disallow: /thema11/
Disallow: /thema12/
Disallow: /thema13/
Disallow: /thema14/
Disallow: /thema15/
Disallow: /thema16/
#
#
# 3. Point to the sitemaps
#
Sitemap: https://ns.editeur.org/sitemap.xml
Sitemap: https://ns.editeur.org/thema/sitemap.xml
Sitemap: https://ns.editeur.org/onix/sitemap.xml
#
#
# 4. Specifically disallow bots associated with scraping AI training data
# or acting as an agent on behalf of a real user
#
# Amazon
User-agent: Amazonbot
Disallow: /
#
# Anthropic AI
User-agent: anthropic-ai
Disallow: /
User-agent: claude-web
Disallow: /
User-agent: ClaudeBot
Disallow: /
#
# Apple
User-agent: Applebot-Extended
Disallow: /
#
# Cohere
User-agent: cohere-ai
Disallow: /
#
# Common Crawl (Allen Institute)
User-agent: CCBot
Disallow: /
User-agent: AI2Bot
Disallow: /
User-agent: Diffbot
Disallow: /
#
# Diff
User-agent: Diffbot
Disallow: /
#
# Google Bard
User-agent: Google-Extended
Disallow: /
#
# Huawei
User-agent: PanguBot
Disallow: /
#
# Meta
User-agent: FacebookBot
Disallow: /
User-agent: meta-externalagent
Disallow: /
User-agent: meta-externalfetcher
Disallow: /
#
# Mistral
User-agent: MistralAI-User
Disallow: /
#
# OpenAI
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: ChatGPT-User/2.0
Disallow: /
#
# Perplexity AI
User-agent: PerplexityBot
Disallow: /
User-agent: Perplexity-User
Disallow: /
#
# Bytedance (won't work but shows our intent)
User-agent: ByteDance
Disallow: /
User-agent: Bytespider
Disallow: /
#
# Webz.io
User-agent: omgili
Disallow: /
User-agent: omgilibot
Disallow: /
#
# You.com
User-agent: YouBot
Disallow: /
#
# disallow all the above AI bots
#
# contact EDItEUR via info@editeur.org