# This virtual robots.txt file was created by the Virtual Robots.txt WordPress plugin: https://www.wordpress.org/plugins/pc-robotstxt/ # # robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these "robots" where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: http://example.com/robots.txt # Ignored: http://example.com/site/robots.txt # # For more information about the robots.txt standard, see: # https://www.robotstxt.org/robotstxt.html User-agent: * Disallow: /wp-login.php Disallow: /wp-register.php Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Allow: /wp-content/uploads/ Disallow: /wp-content/plugins/ Disallow: /wp-content/themes/ Disallow: /wp-includes/ Allow: /wp-includes/js/ Allow: /wp-includes/images/ Disallow: /README.md Disallow: /taxonomy/ Disallow: /trackback/ # Don't crawl search results Disallow: /search/ Disallow: /search/refine/ Disallow: /?s= Disallow: /page/ Disallow: /-/ # Block unwanted bots User-agent: Baiduspider User-agent: barkrowler User-agent: Brightbot User-agent: DataForSeoBot User-agent: DorkBot User-agent: Drupal User-agent: Go-http-client User-agent: ImagesiftBot User-agent: Scrapy User-agent: SeekportBot User-agent: SemrushBot User-agent: YandexBot Disallow: / # OPENAI # Search (shows pages as links inside ChatGPT search). Not used for model training. User-agent: OAI-SearchBot Allow: / # User-driven browsing from ChatGPT and Custom GPTs. Acts after a human click. User-agent: ChatGPT-User User-agent: ChatGPT-User/2.0 Allow: / # Model-training crawler User-agent: GPTBot Allow: / # ANTHROPIC (Claude) User-agent: anthropic-ai # bulk model training Allow: / User-agent: ClaudeBot # chat citation fetch User-agent: claude-web # web-focused crawl Allow: / # PERPLEXITY User-agent: PerplexityBot # index builder Allow: / User-agent: Perplexity-User # human-triggered visit Allow: / # GOOGLE (Gemini) User-agent: Google-Extended Allow: / # MICROSOFT (Bing / Copilot) User-agent: BingBot Allow: / # AMAZON User-agent: Amazonbot #supports Alexa Allow: / # APPLE User-agent: Applebot User-agent: Applebot-Extended Allow: / # META User-agent: FacebookBot User-agent: meta-externalagent Allow: / # LINKEDIN User-agent: LinkedInBot Allow: / # BYTEDANCE User-agent: Bytespider #feeds TikTok Allow: / # DUCKDUCKGO User-agent: DuckAssistBot Allow: / # ALLEN INSTITUTE / COMMON CRAWL / OTHER RESEARCH User-agent: AI2Bot #Semantic Scholar User-agent: CCBot User-agent: SemanticScholarBot Allow: / # Sitemap Sitemap: https://students.ubc.ca/sitemap.xml