# CJX Studios — robots.txt
# Last updated: April 2026
# Canonical domain: https://www.cjxstudio.co.za
#
# All legitimate crawlers are welcome.
# Non-www redirects to www at server level (Netlify).

# ── ALL CRAWLERS ────────────────────────────────────────────────────────────
User-agent: *
Allow: /

# Block access to build artifacts and config files
Disallow: /dist/
Disallow: /node_modules/
Disallow: /scripts/
Disallow: /*.toml$
Disallow: /*.json$
Disallow: /package.json$

# ── GOOGLE ───────────────────────────────────────────────────────────────────
User-agent: Googlebot
Allow: /
Crawl-delay: 0

User-agent: Googlebot-Image
Allow: /photos/
Allow: /og-*.jpg
Allow: /og-*.png

# ── BING ─────────────────────────────────────────────────────────────────────
User-agent: Bingbot
Allow: /
Crawl-delay: 1

# ── AI CRAWLERS — explicitly welcomed ───────────────────────────────────────
# These crawlers index content for AI language models.
# CJX Studios permits this use. See /llms.txt for structured AI-readable data.

User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Omgilibot
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

# ── SITEMAPS ─────────────────────────────────────────────────────────────────
# Master index (submit this one to Google Search Console)
Sitemap: https://www.cjxstudio.co.za/sitemap.xml

# Sub-sitemaps (also valid to submit individually)
Sitemap: https://www.cjxstudio.co.za/sitemap-pages.xml
Sitemap: https://www.cjxstudio.co.za/sitemap-blog.xml
Sitemap: https://www.cjxstudio.co.za/sitemap-locations.xml

# ── AI PLAIN-TEXT INDEX ──────────────────────────────────────────────────────
# Structured plain-text summary of this site for AI language models:
# https://www.cjxstudio.co.za/llms.txt