# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

User-agent: *
Allow: /

# Disallow access to admin and sensitive areas
Disallow: /admin/
Disallow: /api/private/
Disallow: /config/
Disallow: /data/
Disallow: /tools/
Disallow: /tracker/

# Disallow access to development and build files
Disallow: /src/
Disallow: /node_modules/
Disallow: /scripts/

# Disallow rotating machine-data cache directories. These are autoindex JSON
# listings whose files rotate constantly: Google crawls them as thin/duplicate
# content ("crawled - currently not indexed") and 404s when files age out,
# wasting crawl budget on non-page URLs. The rendered pages get their content
# from prerendered HTML + the rmAPI, not these raw caches. AI crawlers keep full
# access via their dedicated user-agent groups below.
Disallow: /*_cache/

# Allow access to public content
Allow: /public/
Allow: /dist/
Allow: /Research_Notes/
Allow: /Calendar_Data/

# Allow public API endpoints for crawlers
Allow: /api/v1/research/
Allow: /api/v1/calendar/

# Crawl delay to be respectful to server resources
Crawl-delay: 1

# Sitemap location
Sitemap: https://robomacro.com/sitemap.xml

# RSS/Atom feeds
# RSS Feed: https://robomacro.com/feed.xml
# Newsletter RSS: https://robomacro.com/api/newsletters/rss

# AI chatbot and LLM discovery file
# See https://llmstxt.org for the llms.txt standard
# LLMs.txt: https://robomacro.com/llms.txt

# Allow AI crawlers to index our content for better discoverability
User-agent: GPTBot
Allow: /
Disallow: /api/private/
Disallow: /admin/

User-agent: ChatGPT-User
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: Anthropic-AI
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: CCBot
Allow: /