diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 06ef90f..1f611a7 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -58,7 +58,7 @@ const Crawlers = Object.freeze({ // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot // SeznamBot - http://napoveda.seznam.cz/seznambot-intro - /((?:adidx|ahrefs|amazon|bing|cc|coveo|criteo|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, + /((?:adidx|ahrefs|amazon|bing|cc|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, // Applebot - http://apple.com/go/applebot /(applebot(?:-extended)?)\/?([\w\.]*)/i, @@ -116,12 +116,13 @@ const Crawlers = Object.freeze({ // AI2Bot - https://allenai.org/crawler // Bytespider // DataForSeoBot - https://dataforseo.com/dataforseo-bot + // DeepSeekBot // Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot // ImagesiftBot - https://imagesift.com/about // Qihoo 360Spider // TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html // Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp - /\b(360spider-?(?:image|video)?|bytespider|(?:ai2|aspiegel|dataforseo|imagesift|petal|turnitin)bot|teoma|yahoo! slurp)/i + /\b(360spider-?(?:image|video)?|bytespider|cohere-training-data-crawler|elastic(?=\/s)|(?:ai2|aspiegel|dataforseo|deepseek|imagesift|petal|turnitin)bot|teoma|yahoo! slurp)/i ], [NAME, [TYPE, CRAWLER]] ] @@ -236,17 +237,17 @@ const Emails = Object.freeze({ const Fetchers = Object.freeze({ browser : [ [ + // Asana / Bitlybot / Better Uptime / BingPreview / Blueno / kakaotalk-scrap / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot // AhrefsSiteAudit - https://ahrefs.com/robot/site-audit // Buffer Link Preview Bot - https://scraper.buffer.com/about/bots/link-preview-bot // ChatGPT-User - https://platform.openai.com/docs/plugins/bot // DuckAssistBot - https://duckduckgo.com/duckassistbot/ - // Better Uptime / BingPreview / Blueno / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot // Google Site Verifier / Meta / Yahoo! Japan // Iframely - https://iframely.com/docs/about // Perplexity-User - https://docs.perplexity.ai/guides/bots // MistralAI-User - https://docs.mistral.ai/robots/ // Yandex Bots - https://yandex.com/bots - /(ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|mastodon|(?:bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|iframely|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, + /(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, // Bluesky /(bluesky) cardyb\/([\w\.]+)/i, @@ -263,8 +264,8 @@ const Fetchers = Object.freeze({ [NAME, VERSION, [TYPE, FETCHER]], [ - // Google Bots / Cohere / Snapchat / Vercelbot / Yandex Bots - /((?:better uptime |telegram|vercel)bot|cohere-ai|feedfetcher-google|google(?:imageproxy|-read-aloud|-pagerenderer|producer)|snap url preview|yandex(?:sitelinks|userproxy))/i + // Google Bots / Chrome-Lighthouse / Cohere / Gemini-Deep-Research / Snapchat / TikTokSpider / Vercelbot / Yandex Bots + /((?:better uptime |telegram|vercel)bot|chrome-lighthouse|cohere-ai|feedfetcher-google|gemini-deep-research|google(?:imageproxy|-read-aloud|-pagerenderer|producer)|snap url preview|tiktokspider|yandex(?:sitelinks|userproxy))/i ], [NAME, [TYPE, FETCHER]], ], diff --git a/src/helpers/ua-parser-helpers.js b/src/helpers/ua-parser-helpers.js index c62a541..94b0482 100644 --- a/src/helpers/ua-parser-helpers.js +++ b/src/helpers/ua-parser-helpers.js @@ -62,6 +62,9 @@ const isAIBot = (resultOrUA) => [ // ByteDance 'bytespider', + // Cohere + 'cohere-training-data-crawler', + // Common Crawl 'ccbot', diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index 69743a0..73de476 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -299,6 +299,16 @@ "type" : "crawler" } }, + { + "desc" : "cohere-training-data-crawler", + "ua" : "cohere-training-data-crawler (+crawler@cohere.ai)", + "expect" : + { + "name" : "cohere-training-data-crawler", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "Coveobot", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) (compatible; Coveobot/2.0;+http://www.coveo.com/bot.html)", diff --git a/test/data/ua/extension/fetcher.json b/test/data/ua/extension/fetcher.json index e1f3951..088b76c 100644 --- a/test/data/ua/extension/fetcher.json +++ b/test/data/ua/extension/fetcher.json @@ -109,6 +109,16 @@ "type" : "fetcher" } }, + { + "desc" : "Gemini-Deep-Research", + "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Gemini-Deep-Research; +https://gemini.google/overview/deep-research/) Chrome/135.0.0.0 Safari/537.36", + "expect" : + { + "name" : "Gemini-Deep-Research", + "version" : "undefined", + "type" : "fetcher" + } + }, { "desc" : "Google FeedFetcher", "ua" : "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)", @@ -189,6 +199,16 @@ "type" : "fetcher" } }, + { + "desc" : "kakaotalk-scrap", + "ua" : "facebookexternalhit/1.1; kakaotalk-scrap/1.0; +https://devtalk.kakao.com/t/scrap/33984", + "expect" : + { + "name" : "kakaotalk-scrap", + "version" : "1.0", + "type" : "fetcher" + } + }, { "desc" : "Meta-ExternalFetcher", "ua" : "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", @@ -289,6 +309,16 @@ "type" : "fetcher" } }, + { + "desc" : "TikTokSpider", + "ua" : "Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; TikTokSpider; ttspider-feedback@tiktok.com)", + "expect" : + { + "name" : "TikTokSpider", + "version" : "undefined", + "type" : "fetcher" + } + }, { "desc" : "UptimeRobot", "ua" : "Mozilla/5.0 (compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)",