# ---------------------------------------------------- # robots.txt for Come2Indonesia # SEO + Security + AI Alignment + Cloudflare + Wordfence optimized # ---------------------------------------------------- # ---------------------------------------------------- # GLOBAL RULES # ---------------------------------------------------- User-agent: * Disallow: /wp-login.php Disallow: /xmlrpc.php Disallow: /cart/ Disallow: /checkout/ Disallow: /my-account/ # WordPress admin protected but allow Ajax Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php # Block Wordfence internal endpoints Disallow: /?wfls-* Disallow: /*?wc-ajax=* # ---------------------------------------------------- # BLOCK BAD / AGGRESSIVE / LOW-QUALITY CRAWLERS # ---------------------------------------------------- User-agent: AhrefsBot Disallow: / User-agent: AhrefsSiteAudit Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Sogou Disallow: / User-agent: Yandex Disallow: / User-agent: YandexBot Disallow: / # Unwanted AI scrapers (massive data extraction) User-agent: omgili Disallow: / User-agent: Omgilibot Disallow: / User-agent: Bytespider Disallow: / User-agent: cohere-training-data-crawler Disallow: / # ---------------------------------------------------- # ALLOW GOOD AI INDEXERS (for visibility in LLMs) # ---------------------------------------------------- User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: Claude-SearchBot Allow: / User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / User-agent: Google-Extended Allow: / User-agent: Gemini-Deep-Research Allow: / User-agent: Meta-ExternalFetcher Allow: / User-agent: meta-externalagent Allow: / User-agent: Applebot-Extended Allow: / User-agent: Amazonbot Allow: / User-agent: YouBot Allow: / User-agent: MistralAI-User Allow: / User-agent: cohere-ai Allow: / User-agent: Webzio Allow: / User-agent: Webzio-Extended Allow: / # Generic fallback for future LLMs User-agent: anthropic-ai Allow: / # ---------------------------------------------------- # SITEMAPS # ---------------------------------------------------- Sitemap: https://come2indonesia.com/sitemap_index.xml Sitemap: https://come2indonesia.com/es/sitemap_index.xml # ---------------------------------------------------- # LLM RULES (separate file — optional) # ---------------------------------------------------- # Large AI training bots should read: # https://come2indonesia.com/llms.txt # AI policy # See https://come2indonesia.com/llms-policy.txt