diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index d20dab0..4e93b00 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -43,6 +43,7 @@ const Crawlers = Object.freeze({ // AhrefsBot - https://ahrefs.com/robot // Amazonbot - https://developer.amazon.com/amazonbot // Bingbot / AdIdxBot - https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0 + // Bravebot - https://search.brave.com/help/brave-search-crawler // CCBot - https://commoncrawl.org/faq // Coveobot - https://connect.coveo.com/s/article/19648 // CriteoBot - https://www.criteo.com/criteo-crawler/ @@ -51,6 +52,7 @@ const Crawlers = Object.freeze({ // FacebookBot - https://developers.facebook.com/docs/sharing/bot/ // GPTBot - https://platform.openai.com/docs/gptbot // iAskBot - https://iask.ai + // Kangaroo Bot - https://kangaroollm.com.au/kangaroo-bot/ // LinkedInBot - http://www.linkedin.com // MJ12bot - https://mj12bot.com/ // MojeekBot - https://www.mojeek.com/bot.html @@ -58,7 +60,7 @@ const Crawlers = Object.freeze({ // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot // SeznamBot - http://napoveda.seznam.cz/seznambot-intro - /((?:adidx|ahrefs|amazon|bing|cc|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, + /((?:adidx|ahrefs|amazon|bing|brave|cc|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, // Applebot - http://apple.com/go/applebot /(applebot(?:-extended)?)\/?([\w\.]*)/i, @@ -103,8 +105,9 @@ const Crawlers = Object.freeze({ // Yeti (Naver) /(yeti)\/([\w\.]+)/i, - // aiHitBot / Diffbot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / Webzio-Extended / Screaming Frog SEO Spider / Startpage / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot - /((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|openai image downloader|(?:magpie-|velenpublicweb)crawler|startpageprivateimageproxy|webzio-extended|(?:chatglm-|line|screaming frog seo |yisou)spider)\/?([\w\.]*)/i + // aiHitBot / Diffbot / FirecrawlAgent / HuggingFace-Bot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / PanguBot / Replicate-Bot / RunPod-Bot / Webzio-Extended / Screaming Frog SEO Spider / Startpage / Timpibot / Together-Bot / VelenPublicWebCrawler / xAI-Bot / YisouSpider / YouBot + // Cotoyogi - https://ds.rois.ac.jp/en_center8/en_crawler/ + /((?:aihit|diff|huggingface-|pangu|replicate-|runpod-|timpi|together-|xai-|you)bot|omgili(?:bot)?|cotoyogi|firecrawlagent|openai image downloader|(?:magpie-|velenpublicweb)crawler|startpageprivateimageproxy|webzio-extended|(?:chatglm-|line|screaming frog seo |yisou)spider)\/?([\w\.]*)/i ], [NAME, VERSION, [TYPE, CRAWLER]], diff --git a/src/helpers/ua-parser-helpers.js b/src/helpers/ua-parser-helpers.js index b7b88d2..912ccdb 100644 --- a/src/helpers/ua-parser-helpers.js +++ b/src/helpers/ua-parser-helpers.js @@ -59,6 +59,9 @@ const isAIBot = (resultOrUA) => [ 'applebot', 'applebot-extended', + // Brave + 'bravebot', + // ByteDance 'bytespider', 'tiktokspider', @@ -92,6 +95,16 @@ const isAIBot = (resultOrUA) => [ // Huawei 'petalbot', + 'pangubot', + + // Hugging Face + 'huggingface-bot', + + // Kangaroo + 'kangaroo bot', + + // Mendable.ai + 'firecrawlagent', // Meta 'facebookbot', @@ -104,12 +117,21 @@ const isAIBot = (resultOrUA) => [ // Perplexity 'perplexitybot', + // Replicate + 'replicate-bot', + + // Runpod + 'runpod-bot', + // Semrush 'semrushbot-ocob', // Timpi 'timpibot', + // Together AI + 'together-bot', + // Velen.io 'velenpublicwebcrawler', @@ -121,6 +143,9 @@ const isAIBot = (resultOrUA) => [ 'omgilibot', 'webzio-extended', + // X + 'xai-bot', + // You.com 'youbot', diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index 8887a45..f754b66 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -229,6 +229,16 @@ "type" : "crawler" } }, + { + "desc" : "Bravebot", + "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Bravebot/1.0; +https://search.brave.com/help/brave-search-crawler) Chrome/W.X.Y.Z Safari/537.36", + "expect" : + { + "name" : "Bravebot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Bytespider", "ua" : "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.1511.1269 Mobile Safari/537.36; Bytespider", @@ -309,6 +319,16 @@ "type" : "crawler" } }, + { + "desc" : "Cotoyogi", + "ua" : "Mozilla/5.0 (compatible; Cotoyogi/4.0; +https://ds.rois.ac.jp/center8/crawler/)", + "expect" : + { + "name" : "Cotoyogi", + "version" : "4.0", + "type" : "crawler" + } + }, { "desc" : "Coveobot", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) (compatible; Coveobot/2.0;+http://www.coveo.com/bot.html)", @@ -469,6 +489,16 @@ "type" : "crawler" } }, + { + "desc" : "FirecrawlAgent", + "ua" : "Mozilla/5.0 (compatible; FirecrawlAgent/1.0)", + "expect" : + { + "name" : "FirecrawlAgent", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Googlebot-Video", "ua" : "Googlebot-Video/1.0", @@ -589,6 +619,16 @@ "type" : "crawler" } }, + { + "desc" : "HuggingFace-Bot", + "ua" : "Mozilla/5.0 (compatible; HuggingFace-Bot/1.0; +https://huggingface.co/)", + "expect" : + { + "name" : "HuggingFace-Bot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "iAskBot", "ua" : "Mozilla/5.0 AppleWebKit/605.1.15 (KHTML, like Gecko; compatible; iAskBot/1.0; +https://iask.ai/) Chrome/120.0.6099.119 Safari/605.1.15", @@ -609,6 +649,16 @@ "type" : "crawler" } }, + { + "desc" : "Kangaroo Bot", + "ua" : "Mozilla/5.0 (compatible; Kangaroo Bot/1.0)", + "expect" : + { + "name" : "Kangaroo Bot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Linespider", "ua" : "Mozilla/5.0 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)", @@ -710,6 +760,16 @@ "type" : "crawler" } }, + { + "desc" : "PanguBot", + "ua" : "Mozilla/5.0 (compatible; PanguBot/1.0)", + "expect" : + { + "name" : "PanguBot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "PerplexityBot", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)", @@ -770,6 +830,26 @@ "type" : "crawler" } }, + { + "desc" : "Replicate-Bot", + "ua" : "Mozilla/5.0 (compatible; Replicate-Bot/1.0; +https://replicate.com/)", + "expect" : + { + "name" : "Replicate-Bot", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "RunPod-Bot", + "ua" : "Mozilla/5.0 (compatible; RunPod-Bot/1.0; +https://runpod.io/)", + "expect" : + { + "name" : "RunPod-Bot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "SemrushBot", "ua" : "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)", @@ -860,6 +940,16 @@ "type" : "crawler" } }, + { + "desc" : "Together-Bot", + "ua" : "Mozilla/5.0 (compatible; Together-Bot/1.0; +https://together.ai/)", + "expect" : + { + "name" : "Together-Bot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "TurnitinBot", "ua" : "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", @@ -870,6 +960,16 @@ "type" : "crawler" } }, + { + "desc" : "xAI-Bot", + "ua" : "Mozilla/5.0 (compatible; xAI-Bot/1.0; +https://x.ai/)", + "expect" : + { + "name" : "xAI-Bot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "VelenPublicWebCrawler", "ua" : "Mozilla/5.0 (compatible; VelenPublicWebCrawler/1.0; +https://velen.io)",