[extensions] Add new bots: Blueno, BufferLinkPreviewBot, Claude-SearchBot, Claude-User, Coveobot, CriteoBot

This commit is contained in:
Faisal Salman 2025-07-22 12:29:01 +07:00
parent a19977ce4c
commit 0bb6e24837
4 changed files with 56 additions and 9 deletions

View File

@ -44,6 +44,8 @@ const Crawlers = Object.freeze({
// Amazonbot - https://developer.amazon.com/amazonbot // Amazonbot - https://developer.amazon.com/amazonbot
// Bingbot / AdIdxBot - https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0 // Bingbot / AdIdxBot - https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0
// CCBot - https://commoncrawl.org/faq // CCBot - https://commoncrawl.org/faq
// Coveobot - https://connect.coveo.com/s/article/19648
// CriteoBot - https://www.criteo.com/criteo-crawler/
// Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot // Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html // DuckDuckBot - http://duckduckgo.com/duckduckbot.html
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/ // FacebookBot - https://developers.facebook.com/docs/sharing/bot/
@ -56,7 +58,7 @@ const Crawlers = Object.freeze({
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots // OpenAI's SearchGPT - https://platform.openai.com/docs/bots
// PerplexityBot - https://perplexity.ai/perplexitybot // PerplexityBot - https://perplexity.ai/perplexitybot
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro // SeznamBot - http://napoveda.seznam.cz/seznambot-intro
/((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, /((?:adidx|ahrefs|amazon|bing|cc|coveo|criteo|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
// Applebot - http://apple.com/go/applebot // Applebot - http://apple.com/go/applebot
/(applebot(?:-extended)?)\/?([\w\.]*)/i, /(applebot(?:-extended)?)\/?([\w\.]*)/i,
@ -65,7 +67,7 @@ const Crawlers = Object.freeze({
/(baiduspider[-imagevdonwsfcpr]{0,7})\/?([\w\.]*)/i, /(baiduspider[-imagevdonwsfcpr]{0,7})\/?([\w\.]*)/i,
// ClaudeBot (Anthropic) // ClaudeBot (Anthropic)
/(claude(?:bot|-web)|anthropic-ai)\/?([\w\.]*)/i, /(claude(?:bot|-searchbot|-web)|anthropic-ai)\/?([\w\.]*)/i,
// Coc Coc Bot - https://help.coccoc.com/en/search-engine // Coc Coc Bot - https://help.coccoc.com/en/search-engine
/(coccocbot-(?:image|web))\/([\w\.]+)/i, /(coccocbot-(?:image|web))\/([\w\.]+)/i,
@ -235,15 +237,16 @@ const Fetchers = Object.freeze({
browser : [ browser : [
[ [
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit // AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
// Buffer Link Preview Bot - https://scraper.buffer.com/about/bots/link-preview-bot
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot // ChatGPT-User - https://platform.openai.com/docs/plugins/bot
// DuckAssistBot - https://duckduckgo.com/duckassistbot/ // DuckAssistBot - https://duckduckgo.com/duckassistbot/
// Better Uptime / BingPreview / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot // Better Uptime / BingPreview / Blueno / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
// Google Site Verifier / Meta / Yahoo! Japan // Google Site Verifier / Meta / Yahoo! Japan
// Iframely - https://iframely.com/docs/about // Iframely - https://iframely.com/docs/about
// Perplexity-User - https://docs.perplexity.ai/guides/bots // Perplexity-User - https://docs.perplexity.ai/guides/bots
// MistralAI-User - https://docs.mistral.ai/robots/ // MistralAI-User - https://docs.mistral.ai/robots/
// Yandex Bots - https://yandex.com/bots // Yandex Bots - https://yandex.com/bots
/(ahrefssiteaudit|(?:bing|microsoft)preview|(?:chatgpt|mistralai|perplexity)-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|iframely|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, /(ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|mastodon|(?:bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|iframely|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
// Bluesky // Bluesky
/(bluesky) cardyb\/([\w\.]+)/i, /(bluesky) cardyb\/([\w\.]+)/i,
@ -404,8 +407,8 @@ const Vehicles = Object.freeze({
const Bots = Object.freeze({ const Bots = Object.freeze({
browser : [ browser : [
...CLIs.browser, ...CLIs.browser,
...Crawlers.browser,
...Fetchers.browser, ...Fetchers.browser,
...Crawlers.browser,
...Libraries.browser ...Libraries.browser
], ],
os : [ os : [

View File

@ -52,6 +52,7 @@ const isAIBot = (resultOrUA) => [
// Anthropic // Anthropic
'anthropic-ai', 'anthropic-ai',
'claude-web', 'claude-web',
'claude-searchbot',
'claudebot', 'claudebot',
// Apple // Apple
@ -63,6 +64,9 @@ const isAIBot = (resultOrUA) => [
// Common Crawl // Common Crawl
'ccbot', 'ccbot',
// Coveo
'coveobot',
// DataForSeo // DataForSeo
'dataforseobot', 'dataforseobot',

View File

@ -269,6 +269,16 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "ClaudeWeb",
"ua" : "Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)",
"expect" :
{
"name" : "Claude-Web",
"version" : "1.0",
"type" : "crawler"
}
},
{ {
"desc" : "Coc Coc Bot (web)", "desc" : "Coc Coc Bot (web)",
"ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", "ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)",
@ -290,12 +300,22 @@
} }
}, },
{ {
"desc" : "ClaudeWeb", "desc" : "Coveobot",
"ua" : "Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) (compatible; Coveobot/2.0;+http://www.coveo.com/bot.html)",
"expect" : "expect" :
{ {
"name" : "Claude-Web", "name" : "Coveobot",
"version" : "1.0", "version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "CriteoBot",
"ua" : "CriteoBot/0.1 (+https://www.criteo.com/criteo-crawler/)",
"expect" :
{
"name" : "CriteoBot",
"version" : "0.1",
"type" : "crawler" "type" : "crawler"
} }
}, },

View File

@ -29,6 +29,16 @@
"type" : "fetcher" "type" : "fetcher"
} }
}, },
{
"desc" : "Blueno",
"ua" : "acebookexternalhit/1.1 (compatible; Blueno/1.0; +http://naver.me/scrap)",
"expect" :
{
"name" : "Blueno",
"version" : "1.0",
"type" : "fetcher"
}
},
{ {
"desc" : "Bluesky", "desc" : "Bluesky",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Bluesky Cardyb/1.1; +mailto:support@bsky.app) Chrome/100.0.0.0 Safari/537.36", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Bluesky Cardyb/1.1; +mailto:support@bsky.app) Chrome/100.0.0.0 Safari/537.36",
@ -39,6 +49,16 @@
"type" : "fetcher" "type" : "fetcher"
} }
}, },
{
"desc" : "BufferLinkPreviewBot",
"ua" : "BufferLinkPreviewBot/1.0 (+https://scraper.buffer.com/about/bots/link-preview-bot)",
"expect" :
{
"name" : "BufferLinkPreviewBot",
"version" : "1.0",
"type" : "fetcher"
}
},
{ {
"desc" : "ChatGPT-User", "desc" : "ChatGPT-User",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot",