# robots.txt for https://cperales.github.io/ # This file allows search engines to crawl and index the portfolio website # Default rules for all crawlers User-agent: * Allow: / # Specific rules for major search engines User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / # OpenAI's web crawler (GPTBot) User-agent: GPTBot Allow: / # Other AI crawlers (optional - allows ChatGPT and other AI systems) User-agent: ChatGPT-User Allow: / User-agent: Google-Extended Allow: / # Anthropic User-agent: ClaudeBot Allow: / # Google DeepMind User-agent: GeminiBot Allow: / # Mistral User-agent: MistralBot Allow: / # Perplexity User-agent: PerplexityBot Allow: / # You.com User-agent: YouBot Allow: / # Cohere User-agent: CohereBot Allow: / # Block access to common non-public directories (if they exist) User-agent: * Disallow: /node_modules/ Disallow: /.git/ Disallow: /.github/ Disallow: /src/ Disallow: /build/ Disallow: /dist/ Disallow: /*.log Disallow: /*.md Disallow: /package.json Disallow: /package-lock.json Allow: /pyridge/ # Sitemap location (replace with your actual sitemap URL if you have one) # Sitemap: https://cperales.github.io/sitemap.xml # Crawl delay (optional - helps prevent server overload) # Crawl-delay: 1