[extensions][enums] Improve detection for Yandex bots

This commit is contained in:
Faisal Salman 2025-08-30 17:01:05 +07:00
parent 2078b1ec92
commit ce242a362f
4 changed files with 415 additions and 3 deletions

View File

@ -553,7 +553,35 @@ const Extension = Object.freeze({
VERCEL_V0BOT: 'v0bot', VERCEL_V0BOT: 'v0bot',
YAHOO_JAPAN: 'Y!J-BRW', YAHOO_JAPAN: 'Y!J-BRW',
YAHOO_SLURP: 'Yahoo! Slurp', YAHOO_SLURP: 'Yahoo! Slurp',
YANDEX_ACCESSIBILITY_BOT: 'YandexAccessibilityBot',
YANDEX_ADDITIONAL_BOT: 'YandexAdditionalBot',
YANDEX_ADNET: 'YandexAdNet',
YANDEX_BLOGS: 'YandexBlogs',
YANDEX_BOT: 'YandexBot', YANDEX_BOT: 'YandexBot',
YANDEX_BOT_MIRRORDETECTOR: 'YandexBot MirrorDetector',
YANDEX_COMBOT: 'YandexComBot',
YANDEX_FAVICONS: 'YandexFavicons',
YANDEX_IMAGE_RESIZER: 'YandexImageResizer',
YANDEX_IMAGES: 'YandexImages',
YANDEX_MARKET: 'YandexMarket',
YANDEX_MEDIA: 'YandexMedia',
YANDEX_METRIKA: 'YandexMetrika',
YANDEX_MOBILE_BOT: 'YandexMobileBot',
YANDEX_MOBILE_SCREENSHOT_BOT: 'YandexMobileScreenShotBot',
YANDEX_NEWS: 'YandexNews',
YANDEX_ONTODB: 'YandexOntoDB',
YANDEX_ONTODB_API: 'YandexOntoDBAPI',
YANDEX_PARTNER: 'YandexPartner',
YANDEX_RCA: 'YandexRCA',
YANDEX_RENDERRESOURCES_BOT: 'YandexRenderResourcesBot',
YANDEX_SCREENSHOT_BOT: 'YandexScreenshotBot',
YANDEX_SPRAV_BOT: 'YandexSpravBot',
YANDEX_TRACKER: 'YandexTracker',
YANDEX_VERTICALS: 'YandexVerticals',
YANDEX_VERTIS: 'YandexVertis',
YANDEX_VIDEO: 'YandexVideo',
YANDEX_VIDEO_PARSER: 'YandexVideoParser',
YANDEX_WEBMASTER: 'YandexWebmaster',
YEP_BOT: 'YepBot', YEP_BOT: 'YepBot',
YETI: 'Yeti', YETI: 'Yeti',
YISOU_SPIDER: 'YisouSpider', YISOU_SPIDER: 'YisouSpider',
@ -624,6 +652,15 @@ const Extension = Object.freeze({
VERCEL_BOT: 'Vercelbot', VERCEL_BOT: 'Vercelbot',
VERCEL_FLAGS: 'vercelflags', VERCEL_FLAGS: 'vercelflags',
VERCEL_TRACING: 'verceltracing', VERCEL_TRACING: 'verceltracing',
YANDEX_CALENDAR: 'YandexCalendar',
YANDEX_DIRECT: 'YandexDirect',
YANDEX_DIRECTDYN: 'YandexDirectDyn',
YANDEX_DIRECTFETCHER: 'YaDirectFetcher',
YANDEX_FORDOMAIN: 'YandexForDomain',
YANDEX_PAGECHECKER: 'YandexPagechecker',
YANDEX_SEARCHSHOP: 'YandexSearchShop',
YANDEX_SITELINKS: 'YandexSitelinks',
YANDEX_USERPROXY: 'YandexUserproxy',
WHATSAPP: 'WhatsApp', WHATSAPP: 'WhatsApp',
ZOOMINFO_BOT: 'Zoombot' ZOOMINFO_BOT: 'Zoombot'
}, },

View File

@ -109,7 +109,7 @@ const Crawlers = Object.freeze({
/(y!?j-(?:asr|br[uw]|dscv|mmp|vsidx|wsc))\/([\w\.]+)/i, /(y!?j-(?:asr|br[uw]|dscv|mmp|vsidx|wsc))\/([\w\.]+)/i,
// Yandex Bots - https://yandex.com/bots // Yandex Bots - https://yandex.com/bots
/(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i, /(yandex(?:(?:mobile)?(?:accessibility|additional|com|renderresources|screenshot|sprav)?bot(?!.+mirror)|image(?:s|resizer)|adnet|blogs|favicons|market|media|metrika|news|ontodb(?:api)?|partner|rca|tracker|turbo|verti(?:cal)?s|webmaster|video(?:parser)?))\/([\w\.]+)/i,
// Yeti (Naver) // Yeti (Naver)
/(yeti)\/([\w\.]+)/i, /(yeti)\/([\w\.]+)/i,
@ -119,9 +119,14 @@ const Crawlers = Object.freeze({
// Freespoke - https://docs.freespoke.com/search/bot/ // Freespoke - https://docs.freespoke.com/search/bot/
/((?:aihit|blex|diff|huggingface-|msn|pangu|replicate-|runpod-|timpi|together-|xai-|you|zum)bot|(?:magpie-|velenpublicweb)crawler|(?:chatglm-|line|screaming frog seo |yisou)spider|cotoyogi|firecrawlagent|freespoke|omgili(?:bot)?|openai image downloader|startpageprivateimageproxy|twinagent|webzio-extended)\/?([\w\.]*)/i /((?:aihit|blex|diff|huggingface-|msn|pangu|replicate-|runpod-|timpi|together-|xai-|you|zum)bot|(?:magpie-|velenpublicweb)crawler|(?:chatglm-|line|screaming frog seo |yisou)spider|cotoyogi|firecrawlagent|freespoke|omgili(?:bot)?|openai image downloader|startpageprivateimageproxy|twinagent|webzio-extended)\/?([\w\.]*)/i
], ],
[NAME, VERSION, [TYPE, CRAWLER]], [NAME, VERSION, [TYPE, CRAWLER]],
[
// YandexBot MirrorDetector
/(yandexbot\/([\w\.]+); mirrordetector)/i
],
[[NAME, /\/.+;/ig, ''], VERSION, [TYPE, CRAWLER]],
[ [
// Google Bots // Google Bots
/((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i, /((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i,
@ -260,7 +265,7 @@ const Fetchers = Object.freeze({
// Perplexity-User - https://docs.perplexity.ai/guides/bots // Perplexity-User - https://docs.perplexity.ai/guides/bots
// MistralAI-User - https://docs.mistral.ai/robots/ // MistralAI-User - https://docs.mistral.ai/robots/
// Yandex Bots - https://yandex.com/bots // Yandex Bots - https://yandex.com/bots
/(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|cohere-ai|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, /(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|cohere-ai|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|fordomain|pagechecker|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
// Bluesky // Bluesky
/(bluesky) cardyb\/([\w\.]+)/i, /(bluesky) cardyb\/([\w\.]+)/i,

View File

@ -1170,6 +1170,46 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "YandexAccessibilityBot",
"ua" : "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexAccessibilityBot",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexAdditionalBot",
"ua" : "Mozilla/5.0 (compatible; YandexAdditionalBot/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexAdditionalBot",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexAdNet",
"ua" : "Mozilla/5.0 (compatible; YandexAdNet/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexAdNet",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexBlogs",
"ua" : "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexBlogs",
"version" : "0.99",
"type" : "crawler"
}
},
{ {
"desc" : "YandexBot", "desc" : "YandexBot",
"ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", "ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
@ -1180,6 +1220,246 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "YandexBot MirrorDetector",
"ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexBot MirrorDetector",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexComBot",
"ua" : "Mozilla/5.0 (compatible; YandexComBot/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexComBot",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexFavicons",
"ua" : "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexFavicons",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexImageResizer",
"ua" : "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexImageResizer",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "YandexImages",
"ua" : "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexImages",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexMarket",
"ua" : "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexMarket",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexMetrika",
"ua" : "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexMetrika",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "YandexMedia",
"ua" : "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexMedia",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexMobileBot",
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexMobileBot",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexMobileScreenShotBot",
"ua" : "Mozilla/5.0 (compatible; YandexMobileScreenShotBot/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexMobileScreenShotBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexNews",
"ua" : "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexNews",
"version" : "4.0",
"type" : "crawler"
}
},
{
"desc" : "YandexOntoDB",
"ua" : "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexOntoDB",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexOntoDBAPI",
"ua" : "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexOntoDBAPI",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexPartner",
"ua" : "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexPartner",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexRCA",
"ua" : "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexRCA",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexRenderResourcesBot",
"ua" : "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexRenderResourcesBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexScreenshotBot",
"ua" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexScreenshotBot",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexSpravBot",
"ua" : "Mozilla/5.0 (compatible; YandexSpravBot/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexSpravBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexTracker",
"ua" : "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexTracker",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexVertis",
"ua" : "Mozilla/5.0 (compatible; YandexVertis/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexVertis",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexVerticals",
"ua" : "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexVerticals",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexVideo",
"ua" : "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexVideo",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "YandexVideoParser",
"ua" : "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexVideoParser",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "YandexWebmaster",
"ua" : "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexWebmaster",
"version" : "2.0",
"type" : "crawler"
}
},
{ {
"desc" : "YepBot", "desc" : "YepBot",
"ua" : "Mozilla/5.0 (compatible; YepBot/1.0; +http://yep.com/yepbot/)", "ua" : "Mozilla/5.0 (compatible; YepBot/1.0; +http://yep.com/yepbot/)",

View File

@ -419,6 +419,96 @@
"type" : "fetcher" "type" : "fetcher"
} }
}, },
{
"desc" : "YaDirectFetcher",
"ua" : "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)",
"expect" :
{
"name" : "YaDirectFetcher",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexCalendar",
"ua" : "Mozilla/5.0 (compatible; YandexCalendar/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexCalendar",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexDirect",
"ua" : "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexDirect",
"version" : "3.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexDirectDyn",
"ua" : "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots",
"expect" :
{
"name" : "YandexDirectDyn",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexForDomain",
"ua" : "Mozilla/5.0 (compatible; YandexForDomain/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexForDomain",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexPagechecker",
"ua" : "Mozilla/5.0 (compatible; YandexPagechecker/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexPagechecker",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexSearchShop",
"ua" : "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexSearchShop",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "YandexSitelinks",
"ua" : "Mozilla/5.0 (compatible; YandexSitelinks; Dyatel; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexSitelinks",
"version" : "undefined",
"type" : "fetcher"
}
},
{
"desc" : "YandexUserproxy",
"ua" : "Mozilla/5.0 (compatible; YandexUserproxy; robot; +http://yandex.com/bots)",
"expect" :
{
"name" : "YandexUserproxy",
"version" : "undefined",
"type" : "fetcher"
}
},
{ {
"desc" : "Zoombot", "desc" : "Zoombot",
"ua" : "Mozilla/5.0 (compatible; Zoombot/1.0; +https://zoom.us; crawler@domain.com)", "ua" : "Mozilla/5.0 (compatible; Zoombot/1.0; +https://zoom.us; crawler@domain.com)",