# robots.txt for https://cperales.github.io/
# This file allows search engines to crawl and index the portfolio website

# Default rules for all crawlers
User-agent: *
Allow: /

# Specific rules for major search engines
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

# OpenAI's web crawler (GPTBot)
User-agent: GPTBot
Allow: /

# Other AI crawlers (optional - allows ChatGPT and other AI systems)
User-agent: ChatGPT-User
Allow: /

User-agent: Google-Extended
Allow: /

# Anthropic
User-agent: ClaudeBot
Allow: /

# Google DeepMind
User-agent: GeminiBot
Allow: /

# Mistral
User-agent: MistralBot
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

# You.com
User-agent: YouBot
Allow: /

# Cohere
User-agent: CohereBot
Allow: /

# Block access to common non-public directories (if they exist)
User-agent: *
Disallow: /node_modules/
Disallow: /.git/
Disallow: /.github/
Disallow: /src/
Disallow: /build/
Disallow: /dist/
Disallow: /*.log
Disallow: /*.md
Disallow: /package.json
Disallow: /package-lock.json
Allow: /pyridge/

# Sitemap location (replace with your actual sitemap URL if you have one)
# Sitemap: https://cperales.github.io/sitemap.xml

# Crawl delay (optional - helps prevent server overload)
# Crawl-delay: 1