From ce242a362fcf5b6a4fd01ddec39174e055c1bec2 Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Sat, 30 Aug 2025 17:01:05 +0700 Subject: [PATCH] [extensions][enums] Improve detection for Yandex bots --- src/enums/ua-parser-enums.js | 37 ++++ src/extensions/ua-parser-extensions.js | 11 +- test/data/ua/extension/crawler.json | 280 +++++++++++++++++++++++++ test/data/ua/extension/fetcher.json | 90 ++++++++ 4 files changed, 415 insertions(+), 3 deletions(-) diff --git a/src/enums/ua-parser-enums.js b/src/enums/ua-parser-enums.js index bb3b83c..d06e1db 100644 --- a/src/enums/ua-parser-enums.js +++ b/src/enums/ua-parser-enums.js @@ -553,7 +553,35 @@ const Extension = Object.freeze({ VERCEL_V0BOT: 'v0bot', YAHOO_JAPAN: 'Y!J-BRW', YAHOO_SLURP: 'Yahoo! Slurp', + YANDEX_ACCESSIBILITY_BOT: 'YandexAccessibilityBot', + YANDEX_ADDITIONAL_BOT: 'YandexAdditionalBot', + YANDEX_ADNET: 'YandexAdNet', + YANDEX_BLOGS: 'YandexBlogs', YANDEX_BOT: 'YandexBot', + YANDEX_BOT_MIRRORDETECTOR: 'YandexBot MirrorDetector', + YANDEX_COMBOT: 'YandexComBot', + YANDEX_FAVICONS: 'YandexFavicons', + YANDEX_IMAGE_RESIZER: 'YandexImageResizer', + YANDEX_IMAGES: 'YandexImages', + YANDEX_MARKET: 'YandexMarket', + YANDEX_MEDIA: 'YandexMedia', + YANDEX_METRIKA: 'YandexMetrika', + YANDEX_MOBILE_BOT: 'YandexMobileBot', + YANDEX_MOBILE_SCREENSHOT_BOT: 'YandexMobileScreenShotBot', + YANDEX_NEWS: 'YandexNews', + YANDEX_ONTODB: 'YandexOntoDB', + YANDEX_ONTODB_API: 'YandexOntoDBAPI', + YANDEX_PARTNER: 'YandexPartner', + YANDEX_RCA: 'YandexRCA', + YANDEX_RENDERRESOURCES_BOT: 'YandexRenderResourcesBot', + YANDEX_SCREENSHOT_BOT: 'YandexScreenshotBot', + YANDEX_SPRAV_BOT: 'YandexSpravBot', + YANDEX_TRACKER: 'YandexTracker', + YANDEX_VERTICALS: 'YandexVerticals', + YANDEX_VERTIS: 'YandexVertis', + YANDEX_VIDEO: 'YandexVideo', + YANDEX_VIDEO_PARSER: 'YandexVideoParser', + YANDEX_WEBMASTER: 'YandexWebmaster', YEP_BOT: 'YepBot', YETI: 'Yeti', YISOU_SPIDER: 'YisouSpider', @@ -624,6 +652,15 @@ const Extension = Object.freeze({ VERCEL_BOT: 'Vercelbot', VERCEL_FLAGS: 'vercelflags', VERCEL_TRACING: 'verceltracing', + YANDEX_CALENDAR: 'YandexCalendar', + YANDEX_DIRECT: 'YandexDirect', + YANDEX_DIRECTDYN: 'YandexDirectDyn', + YANDEX_DIRECTFETCHER: 'YaDirectFetcher', + YANDEX_FORDOMAIN: 'YandexForDomain', + YANDEX_PAGECHECKER: 'YandexPagechecker', + YANDEX_SEARCHSHOP: 'YandexSearchShop', + YANDEX_SITELINKS: 'YandexSitelinks', + YANDEX_USERPROXY: 'YandexUserproxy', WHATSAPP: 'WhatsApp', ZOOMINFO_BOT: 'Zoombot' }, diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 3d6c9fc..5f345e1 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -109,7 +109,7 @@ const Crawlers = Object.freeze({ /(y!?j-(?:asr|br[uw]|dscv|mmp|vsidx|wsc))\/([\w\.]+)/i, // Yandex Bots - https://yandex.com/bots - /(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i, + /(yandex(?:(?:mobile)?(?:accessibility|additional|com|renderresources|screenshot|sprav)?bot(?!.+mirror)|image(?:s|resizer)|adnet|blogs|favicons|market|media|metrika|news|ontodb(?:api)?|partner|rca|tracker|turbo|verti(?:cal)?s|webmaster|video(?:parser)?))\/([\w\.]+)/i, // Yeti (Naver) /(yeti)\/([\w\.]+)/i, @@ -119,9 +119,14 @@ const Crawlers = Object.freeze({ // Freespoke - https://docs.freespoke.com/search/bot/ /((?:aihit|blex|diff|huggingface-|msn|pangu|replicate-|runpod-|timpi|together-|xai-|you|zum)bot|(?:magpie-|velenpublicweb)crawler|(?:chatglm-|line|screaming frog seo |yisou)spider|cotoyogi|firecrawlagent|freespoke|omgili(?:bot)?|openai image downloader|startpageprivateimageproxy|twinagent|webzio-extended)\/?([\w\.]*)/i ], - [NAME, VERSION, [TYPE, CRAWLER]], + [ + // YandexBot MirrorDetector + /(yandexbot\/([\w\.]+); mirrordetector)/i + ], + [[NAME, /\/.+;/ig, ''], VERSION, [TYPE, CRAWLER]], + [ // Google Bots /((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i, @@ -260,7 +265,7 @@ const Fetchers = Object.freeze({ // Perplexity-User - https://docs.perplexity.ai/guides/bots // MistralAI-User - https://docs.mistral.ai/robots/ // Yandex Bots - https://yandex.com/bots - /(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|cohere-ai|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, + /(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|cohere-ai|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|fordomain|pagechecker|searchshop)|yadirectfetcher)\/([\w\.]+)/i, // Bluesky /(bluesky) cardyb\/([\w\.]+)/i, diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index 61d6a21..312831e 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -1170,6 +1170,46 @@ "type" : "crawler" } }, + { + "desc" : "YandexAccessibilityBot", + "ua" : "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexAccessibilityBot", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexAdditionalBot", + "ua" : "Mozilla/5.0 (compatible; YandexAdditionalBot/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexAdditionalBot", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexAdNet", + "ua" : "Mozilla/5.0 (compatible; YandexAdNet/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexAdNet", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexBlogs", + "ua" : "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexBlogs", + "version" : "0.99", + "type" : "crawler" + } + }, { "desc" : "YandexBot", "ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", @@ -1180,6 +1220,246 @@ "type" : "crawler" } }, + { + "desc" : "YandexBot MirrorDetector", + "ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexBot MirrorDetector", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexComBot", + "ua" : "Mozilla/5.0 (compatible; YandexComBot/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexComBot", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexFavicons", + "ua" : "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexFavicons", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexImageResizer", + "ua" : "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexImageResizer", + "version" : "2.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexImages", + "ua" : "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexImages", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexMarket", + "ua" : "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexMarket", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexMetrika", + "ua" : "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexMetrika", + "version" : "2.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexMedia", + "ua" : "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexMedia", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexMobileBot", + "ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexMobileBot", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexMobileScreenShotBot", + "ua" : "Mozilla/5.0 (compatible; YandexMobileScreenShotBot/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexMobileScreenShotBot", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexNews", + "ua" : "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexNews", + "version" : "4.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexOntoDB", + "ua" : "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexOntoDB", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexOntoDBAPI", + "ua" : "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexOntoDBAPI", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexPartner", + "ua" : "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexPartner", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexRCA", + "ua" : "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexRCA", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexRenderResourcesBot", + "ua" : "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexRenderResourcesBot", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexScreenshotBot", + "ua" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexScreenshotBot", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexSpravBot", + "ua" : "Mozilla/5.0 (compatible; YandexSpravBot/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexSpravBot", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexTracker", + "ua" : "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexTracker", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexVertis", + "ua" : "Mozilla/5.0 (compatible; YandexVertis/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexVertis", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexVerticals", + "ua" : "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexVerticals", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexVideo", + "ua" : "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexVideo", + "version" : "3.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexVideoParser", + "ua" : "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexVideoParser", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "YandexWebmaster", + "ua" : "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexWebmaster", + "version" : "2.0", + "type" : "crawler" + } + }, { "desc" : "YepBot", "ua" : "Mozilla/5.0 (compatible; YepBot/1.0; +http://yep.com/yepbot/)", diff --git a/test/data/ua/extension/fetcher.json b/test/data/ua/extension/fetcher.json index 33df886..7b0bd4e 100644 --- a/test/data/ua/extension/fetcher.json +++ b/test/data/ua/extension/fetcher.json @@ -419,6 +419,96 @@ "type" : "fetcher" } }, + { + "desc" : "YaDirectFetcher", + "ua" : "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)", + "expect" : + { + "name" : "YaDirectFetcher", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexCalendar", + "ua" : "Mozilla/5.0 (compatible; YandexCalendar/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexCalendar", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexDirect", + "ua" : "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexDirect", + "version" : "3.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexDirectDyn", + "ua" : "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots", + "expect" : + { + "name" : "YandexDirectDyn", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexForDomain", + "ua" : "Mozilla/5.0 (compatible; YandexForDomain/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexForDomain", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexPagechecker", + "ua" : "Mozilla/5.0 (compatible; YandexPagechecker/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexPagechecker", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexSearchShop", + "ua" : "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexSearchShop", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "YandexSitelinks", + "ua" : "Mozilla/5.0 (compatible; YandexSitelinks; Dyatel; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexSitelinks", + "version" : "undefined", + "type" : "fetcher" + } + }, + { + "desc" : "YandexUserproxy", + "ua" : "Mozilla/5.0 (compatible; YandexUserproxy; robot; +http://yandex.com/bots)", + "expect" : + { + "name" : "YandexUserproxy", + "version" : "undefined", + "type" : "fetcher" + } + }, { "desc" : "Zoombot", "ua" : "Mozilla/5.0 (compatible; Zoombot/1.0; +https://zoom.us; crawler@domain.com)",