From 146f182533cad0f2f2303c6e9aa98db923e653f3 Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Sun, 31 Aug 2025 20:04:49 +0700 Subject: [PATCH] [extensions] Improve bot detection for ByteDance, Google, SB Intuitions, Webzio --- src/enums/ua-parser-enums.js | 20 ++++++--- src/extensions/ua-parser-extensions.js | 7 +-- src/helpers/ua-parser-helpers.js | 4 ++ test/data/ua/extension/crawler.json | 60 ++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/src/enums/ua-parser-enums.js b/src/enums/ua-parser-enums.js index d06e1db..7d5eb6d 100644 --- a/src/enums/ua-parser-enums.js +++ b/src/enums/ua-parser-enums.js @@ -441,7 +441,6 @@ const Extension = Object.freeze({ WGET: 'wget' }, Crawlers: { - '360_SPIDER': '360Spider', AHREFS_BOT: 'AhrefsBot', AI2_BOT: 'AI2Bot', AIHIT_BOT: 'aiHitBot', @@ -468,9 +467,9 @@ const Extension = Object.freeze({ BLEX_BOT: 'BLEXBot', BOTIFY: 'botify', BRAVE_BOT: 'Bravebot', - BYTEDANCE_SPIDER: 'Bytespider', + BYTEDANCE_BYTESPIDER: 'Bytespider', + BYTEDANCE_TIKTOKSPIDER: 'TikTokSpider', CC_BOT: 'CCBot', - CHATGLM_SPIDER: 'ChatGLM-Spider', COCCOC_BOT_WEB: 'coccocbot-web', COCCOC_BOT_IMAGE: 'coccocbot-image', COHERE_TRAINING_DATA_CRAWLER: 'cohere-training-data-crawler', @@ -492,10 +491,12 @@ const Extension = Object.freeze({ GOOGLE_ADSBOT: 'AdsBot-Google', GOOGLE_ADSBOT_MOBILE: 'Adsbot-Google-Mobile', GOOGLE_ADSENSE: 'AdSense', + GOOGLE_APIS: 'APIs-Google', GOOGLE_BOT: 'Googlebot', GOOGLE_BOT_IMAGE: 'Googlebot-Image', GOOGLE_BOT_NEWS: 'Googlebot-News', GOOGLE_BOT_VIDEO: 'Googlebot-Video', + GOOGLE_CLOUDVERTEXBOT: 'Google-CloudVertexBot', GOOGLE_INSPECTIONTOOL: 'Google-InspectionTool', GOOGLE_OTHER: 'GoogleOther', GOOGLE_OTHER_IMAGE: 'GoogleOther-Image', @@ -525,16 +526,16 @@ const Extension = Object.freeze({ MICROSOFT_ADIDXBOT: 'adidxbot', MOJEEK_BOT: 'MojeekBot', MOZ_DOTBOT: 'DotBot', - OMGILI: 'omgili', - OMGILI_BOT: 'omgilibot', ONCRAWL: 'OnCrawl', ONESPOT_SCRAPERBOT: 'Onespot-ScraperBot', OPENAI_GPTBOT: 'GPTBot', OPENAI_SEARCH: 'OAI-SearchBot', PERPLEXITY_BOT: 'PerplexityBot', + QIHOO_360_SPIDER: '360Spider', QWANT_BOT: 'Qwantbot', REPLICATE_BOT: 'Replicate-Bot', RUNPOD_BOT: 'RunPod-Bot', + SB_INTUITIONS_BOT: 'SBIntuitionsBot', SEEKPORT_BOT: 'SeekportBot', SEMRUSH_BOT: 'SemrushBot', SEMRUSH_BOT_BACKLINK: 'SemrushBot-BA', @@ -549,8 +550,12 @@ const Extension = Object.freeze({ TOGETHER_BOT: 'Together-Bot', TURNITIN_BOT: 'TurnitinBot', TWIN_AGENT: 'TwinAgent', - XAI_BOT: 'xAI-Bot', VERCEL_V0BOT: 'v0bot', + WEBZIO: 'webzio', + WEBZIO_EXTENDED: 'Webzio-Extended', + WEBZIO_OMGILI: 'omgili', + WEBZIO_OMGILI_BOT: 'omgilibot', + XAI_BOT: 'xAI-Bot', YAHOO_JAPAN: 'Y!J-BRW', YAHOO_SLURP: 'Yahoo! Slurp', YANDEX_ACCESSIBILITY_BOT: 'YandexAccessibilityBot', @@ -586,6 +591,7 @@ const Extension = Object.freeze({ YETI: 'Yeti', YISOU_SPIDER: 'YisouSpider', YOU_BOT: 'YouBot', + ZHIPU_CHATGLM_SPIDER: 'ChatGLM-Spider', ZUM_BOT: 'ZumBot' }, Emails: { @@ -624,7 +630,7 @@ const Extension = Object.freeze({ GOOGLE_CHROME_LIGHTHOUSE: 'Chrome-Lighthouse', GOOGLE_FEEDFETCHER: 'FeedFetcher-Google', GOOGLE_GEMINI_DEEP_RESEARCH: 'Gemini-Deep-Research', - GOOGLE_IMAGE_PROXY: 'GoogleImageProxy', + GOOGLE_IMAGEPROXY: 'GoogleImageProxy', GOOGLE_PAGERENDERER: 'Google-PageRenderer', GOOGLE_READ_ALOUD: 'Google-Read-Aloud', GOOGLE_PRODUCER: 'GoogleProducer', diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 5f345e1..cc6f8d0 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -61,9 +61,10 @@ const Crawlers = Object.freeze({ // Onespot - https://www.onespot.com/identifying-traffic.html // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot + // SBIntuitionsBot - https://www.sbintuitions.co.jp/bot/ // SeznamBot - http://napoveda.seznam.cz/seznambot-intro // YepBot - https://yep.com/yepbot/ - /((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam|yep)bot)\/([\w\.-]+)/i, + /((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|sbintuitions|semrush|seznam|yep)bot)\/([\w\.-]+)/i, // Algolia Crawler /(algolia crawler(?: renderscript)?)\/?([\w\.]*)/i, @@ -139,8 +140,8 @@ const Crawlers = Object.freeze({ // TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html // v0bot - https://vercel.com/docs/bot-management // Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp - // Botify / Bytespider / DeepSeekBot / Qihoo 360Spider / SeekportBot - /\b((?:ai2|aspiegel|dataforseo|deepseek|imagesift|petal|seekport|turnitin|v0)bot|360spider-?(?:image|video)?|baidu-ads|botify|bytespider|cohere-training-data-crawler|elastic(?=\/s)|marginalia|siteimprove(?=bot|\.com)|teoma|yahoo! slurp)/i + // Botify / Bytespider / DeepSeekBot / Qihoo 360Spider / SeekportBot / TikTokSpider + /\b((ai2|aspiegel|dataforseo|deepseek|imagesift|petal|seekport|turnitin|v0)bot|360spider-?(image|video)?|baidu-ads|botify|(byte|tiktok)spider|cohere-training-data-crawler|elastic(?=\/s)|marginalia|siteimprove(?=bot|\.com)|teoma|webzio|yahoo! slurp)/i ], [NAME, [TYPE, CRAWLER]] ] diff --git a/src/helpers/ua-parser-helpers.js b/src/helpers/ua-parser-helpers.js index 6626ad5..e8af693 100644 --- a/src/helpers/ua-parser-helpers.js +++ b/src/helpers/ua-parser-helpers.js @@ -88,6 +88,7 @@ const isAIBot = (resultOrUA) => [ 'googleother', 'googleother-image', 'googleother-video', + 'google-cloudvertexbot', 'google-extended', // Hive AI @@ -123,6 +124,9 @@ const isAIBot = (resultOrUA) => [ // Runpod 'runpod-bot', + // SB Intuitions + 'sbintuitionsbot', + // Semrush 'semrushbot-ocob', diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index 312831e..d00a108 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -579,6 +579,16 @@ "type" : "crawler" } }, + { + "desc" : "APIs-Google", + "ua" : "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)", + "expect" : + { + "name" : "APIs-Google", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "Googlebot-Video", "ua" : "Googlebot-Video/1.0", @@ -679,6 +689,16 @@ "type" : "crawler" } }, + { + "desc" : "Google-CloudVertexBot", + "ua" : "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.183 Mobile Safari/537.36 (compatible; Google-CloudVertexBot; +https://cloud.google.com/enterprise-search)", + "expect" : + { + "name" : "Google-CloudVertexBot", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "Google-Safety", "ua" : "Google-Safety", @@ -970,6 +990,16 @@ "type" : "crawler" } }, + { + "desc" : "SBIntuitionsBot", + "ua" : "Mozilla/5.0 (compatible; SBIntuitionsBot/0.1;+https://www.sbintuitions.co.jp/bot/)", + "expect" : + { + "name" : "SBIntuitionsBot", + "version" : "0.1", + "type" : "crawler" + } + }, { "desc" : "SeekportBot", "ua" : "Mozilla/5.0 (compatible; SeekportBot; +https://bot.seekport.com)", @@ -1080,6 +1110,16 @@ "type" : "crawler" } }, + { + "desc" : "TikTokSpider", + "ua" : "Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; TikTokSpider; ttspider-feedback@tiktok.com)", + "expect" : + { + "name" : "TikTokSpider", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "Timpibot", "ua" : "Timpibot/0.8 (+http://www.timpi.io)", @@ -1150,6 +1190,26 @@ "type" : "crawler" } }, + { + "desc" : "webzio", + "ua" : "webzio (+https://webz.io/bot.html)", + "expect" : + { + "name" : "webzio", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "Webzio-Extended", + "ua" : "Mozilla/5.0 (compatible; Webzio-Extended/1.0; +https://www.webzio.com/bot.html)", + "expect" : + { + "name" : "Webzio-Extended", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Yahoo! Japan", "ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)",