# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# Blurit.app — robots.txt
# Référence : https://www.blurit.app/robots.txt
#
# === Politique de crawl ===
# Pages publiques indexables : Allow: / (par défaut)
# Homes marketing : /fr/, /en/, /de/, /de-at/, /es/, /pt/, /it/, /sv/, /tr/, /ar/ (+ sitemap)
# Blog (canonique) : /{lang}/blog/ — listé dans sitemap.xml
#
# === Politique IA / GEO (Generative Engine Optimization) ===
# Blurit autorise explicitement les crawlers IA — qu'ils soient utilisés pour
# l'entrainement (training) ou pour les reponses live (RAG / Answer engines).
# Bots IA explicitement bienvenus (suivent la regle User-agent: * ci-dessous) :
#   - OpenAI : GPTBot, ChatGPT-User, OAI-SearchBot
#   - Anthropic : ClaudeBot, Claude-Web, anthropic-ai
#   - Google : Google-Extended (entrainement), Googlebot (search)
#   - Perplexity : PerplexityBot, Perplexity-User
#   - Microsoft : Bingbot, MSNBot
#   - Apple : Applebot, Applebot-Extended
#   - ByteDance : Bytespider
#   - Cohere : cohere-ai, cohere-training-data-crawler
#   - You.com : YouBot
#   - Meta : Meta-ExternalAgent, FacebookBot
#   - Diffbot, CCBot (Common Crawl), Amazonbot
# Voir aussi : /llms.txt (resume du site optimise pour LLMs, format llmstxt.org)
#
# === Anciennes URL ===
# Anciennes URL /blog/{lang}/*.html : toujours servies pour les liens historiques ; chaque fichier
# a noindex + canonical vers /{lang}/blog/{slug}/ (voir blog/{lang}/*.html). Ne pas Disallow :
# sinon Search Console signale « Bloquée par robots.txt » sans que le robot puisse lire la page.
# Redirections 301 : _redirects (Netlify / Cloudflare Pages). Voir docs/DEPLOYMENT.md pour
# le statut Scaleway (sans CDN, ces redirects ne sont PAS appliqués en prod).

User-agent: *
Allow: /

# Chemins non publics / techniques
Disallow: /auth/
Disallow: /account.html
Disallow: /studio-dev/
Disallow: /scripts/
Disallow: /docs/
Disallow: /dist-studio/
# Artefact de build local (ne doit pas être crawlé si exposé par erreur)
Disallow: /dist/
# Gabarits produit sources — pages canoniques : /{lang}/…/ (voir sitemap.xml)
Disallow: /products/
# Données JSON brutes des articles (non destinées au HTML public)
Disallow: /blog/data/

# Anciens hubs index-{lang}.html sous /blog/ (doublons d’index)
Disallow: /blog/index-

# Sitemap index (régénéré automatiquement au début de npm run build).
# Référence sitemap-pages.xml, sitemap-blog.xml, sitemap-images.xml et,
# si éligible, sitemap-news.xml (Google News, < 48 h).
Sitemap: https://www.blurit.app/sitemap.xml