# Default — allow all well-behaved crawlers
User-agent: *
Allow: /

# --- AI crawlers (explicit allows so WAF/CDN rules don't silently block them) ---

# OpenAI
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic
User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

# Google AI (Gemini, AI Overviews training signal)
User-agent: Google-Extended
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Apple
User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

# Meta
User-agent: Meta-ExternalAgent
Allow: /

User-agent: FacebookBot
Allow: /

# Bytedance / ByteSpider (Doubao)
User-agent: Bytespider
Allow: /

# DuckDuckGo AI (DuckAssist)
User-agent: DuckAssistBot
Allow: /

# Common Crawl (feeds many LLM training sets)
User-agent: CCBot
Allow: /

# Diffbot, You.com, Mistral, Cohere
User-agent: Diffbot
Allow: /

User-agent: YouBot
Allow: /

User-agent: MistralAI-User
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: cohere-training-data-crawler
Allow: /

# Host
Host: https://www.agentui.ai

# Sitemaps
Sitemap: https://www.agentui.ai/sitemap.xml

# LLM content index (llms.txt convention)
# https://llmstxt.org
# Primary index: https://www.agentui.ai/llms.txt
