diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 1ba574f..d80e48c 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -44,26 +44,36 @@ const Crawlers = Object.freeze({ // Amazonbot - https://developer.amazon.com/amazonbot // Applebot - http://apple.com/go/applebot // Bingbot - http://www.bing.com/bingbot.htm + // CCBot - https://commoncrawl.org/faq // Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot // DuckDuckBot - http://duckduckgo.com/duckduckbot.html // FacebookBot - https://developers.facebook.com/docs/sharing/bot/ // GPTBot - https://platform.openai.com/docs/gptbot // MJ12bot - https://mj12bot.com/ - // OpenAI Search - https://platform.openai.com/docs/bots + // MojeekBot - https://www.mojeek.com/bot.html + // OpenAI's SearchGPT - https://platform.openai.com/docs/bots + // PerplexityBot - https://perplexity.ai/perplexitybot // SemrushBot - http://www.semrush.com/bot.html - /((?:ahrefs|amazon|apple|bing|dot|duckduck|facebook|gpt|mj12|oai-search|semrush)bot)\/([\w\.]+)/i, + /((?:ahrefs|amazon|apple|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush)bot)\/([\w\.]+)/i, // Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001 /(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i, - // ClaudeBot + // ClaudeBot (Anthropic) /(claude(?:bot|-web))\/([\w\.]+)/i, // Coc Coc Bot - https://help.coccoc.com/en/search-engine /(coccocbot-(?:image|web))\/([\w\.]+)/i, + // Facebook / Meta + // https://developers.facebook.com/docs/sharing/webmasters/web-crawlers + /(facebook(?:externalhit|catalog)|meta-externalagent)\/([\w\.]+)/i, + // Googlebot - http://www.google.com/bot.html - /(google(?:bot|other)(?:-image|-video|-news|-extended)?|(?:storebot-)?google(?:-inspectiontool)?)\/?([\w\.]*)/i, + /(google(?:bot|other|-inspectiontool)(?:-image|-video|-news)?|storebot-google)\/?([\w\.]*)/i, + + // Internet Archive (archive.org) + /(ia_archiver|archive\.org_bot)\/?([\w\.]*)/i, // Sogou Spider /(sogou (?:pic|head|web|orion|news) spider)\/([\w\.]+)/i, @@ -72,14 +82,29 @@ const Crawlers = Object.freeze({ /(y!?j-(?:asr|br[uw]|dscv|mmp|vsidx|wsc))\/([\w\.]+)/i, // Yandex Bots - https://yandex.com/bots - /(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i + /(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i, + + // Yeti (Naver) + /(yeti)\/([\w\.]+)/i, + + // YisouSpider + /(yisouspider)\/?([\w\.]*)/i ], [NAME, VERSION, [TYPE, CRAWLER]], - // Bytespider - // Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp - [/((?:bytespider|(?=yahoo! )slurp))/i], + [ + // Google Bots + /((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i, + + // Bytespider + // DataForSeoBot - https://dataforseo.com/dataforseo-bot + // Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot + // Qihoo 360Spider + // TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html + // Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp + /(360spider-?(?:image|video)?|bytespider|(?:aspiegel|dataforseo|petal|turnitin)bot|(?=yahoo! )slurp)/i + ], [NAME, [TYPE, CRAWLER]] ] }); @@ -184,8 +209,15 @@ const Fetchers = Object.freeze({ [ // AhrefsSiteAudit - https://ahrefs.com/robot/site-audit // ChatGPT-User - https://platform.openai.com/docs/plugins/bot + // DuckAssistBot - https://duckduckgo.com/duckassistbot/ // BingPreview / Mastodon / Pinterestbot / Redditbot / Rogerbot / Telegrambot / Twitterbot / UptimeRobot - /(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|linkedin|pinterest|reddit|roger|telegram|twitter|uptimero)bot)\/([\w\.]+)/i, + /(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|telegram|twitter|uptimero)bot)\/([\w\.]+)/i, + + // Google Site Verifier + /(google-site-verification)\/([\w\.]+)/i, + + // Meta + /(meta-externalfetcher)\/([\w\.]+)/i, // Slackbot - https://api.slack.com/robots /(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i, @@ -203,7 +235,7 @@ const Fetchers = Object.freeze({ [NAME, VERSION, [TYPE, FETCHER]], // Google Bots / Snapchat - [/(feedfetcher-google|google-read-aloud|(?=bot; )snapchat)/i], + [/(feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i], [NAME, [TYPE, FETCHER]], ] }); @@ -252,8 +284,8 @@ const MediaPlayers = Object.freeze({ /(flrp)\/([\w\.-]+)/i // Flip Player ], [[NAME, 'Flip Player'], VERSION, [TYPE, MEDIAPLAYER]], [ - /(fstream|nativehost|queryseekspider|ia-archiver|facebookexternalhit)/i - // FStream/NativeHost/QuerySeekSpider/IA Archiver/facebookexternalhit + /(fstream|nativehost|queryseekspider)/i + // FStream/NativeHost/QuerySeekSpider ], [NAME, [TYPE, MEDIAPLAYER]], [ /(gstreamer) souphttpsrc.+libsoup\/([\w\.-]+)/i diff --git a/test/specs/browser-crawlers.json b/test/specs/browser-crawlers.json index 45e25ad..e527740 100644 --- a/test/specs/browser-crawlers.json +++ b/test/specs/browser-crawlers.json @@ -1,4 +1,44 @@ [ + { + "desc" : "360Spider", + "ua" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider", + "expect" : + { + "name" : "360Spider", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "AdsBot Mobile Web", + "ua" : "AdsBot-Google (+http://www.google.com/adsbot.html)", + "expect" : + { + "name" : "AdsBot-Google", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "AdsBot Mobile Web", + "ua" : "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)", + "expect" : + { + "name" : "AdsBot-Google-Mobile", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "AdSense", + "ua" : "Mediapartners-Google", + "expect" : + { + "name" : "Mediapartners-Google", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "AhrefsBot", "ua" : "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)", @@ -49,6 +89,16 @@ "type" : "crawler" } }, + { + "desc" : "CCBot", + "ua" : "CCBot/1.0 (+https://commoncrawl.org/bot.html)", + "expect" : + { + "name" : "CCBot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Coc Coc Bot (web)", "ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", @@ -79,6 +129,16 @@ "type" : "crawler" } }, + { + "desc" : "DataForSEO", + "ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot) ", + "expect" : + { + "name" : "DataForSeoBot", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "Dotbot", "ua" : "Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)", @@ -89,6 +149,16 @@ "type" : "crawler" } }, + { + "desc" : "Exabot", + "ua" : "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)", + "expect" : + { + "name" : "Exabot", + "version" : "3.0", + "type" : "crawler" + } + }, { "desc" : "FacebookBot", "ua" : "Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/webmasters/facebookbot/", @@ -99,6 +169,26 @@ "type" : "crawler" } }, + { + "desc" : "FacebookExternalHit", + "ua" : "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", + "expect" : + { + "name" : "facebookexternalhit", + "version" : "1.1", + "type" : "crawler" + } + }, + { + "desc" : "FacebookExternalHit", + "ua" : "facebookcatalog/1.0", + "expect" : + { + "name" : "facebookcatalog", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Googlebot-Video", "ua" : "Googlebot-Video/1.0", @@ -109,6 +199,106 @@ "type" : "crawler" } }, + { + "desc" : "Googlebot", + "ua" : "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "expect" : + { + "name" : "Googlebot", + "version" : "2.1", + "type" : "crawler" + } + }, + { + "desc" : "Googlebot Image", + "ua" : "Googlebot-Image/1.0", + "expect" : + { + "name" : "Googlebot-Image", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "Googlebot Video", + "ua" : "Googlebot-Video/1.0", + "expect" : + { + "name" : "Googlebot-Video", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "Googlebot News", + "ua" : "Googlebot-News/1.0", + "expect" : + { + "name" : "Googlebot-News", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "Google Storebot", + "ua" : "Storebot-Google/1.0", + "expect" : + { + "name" : "Storebot-Google", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "Google InspectionTool", + "ua" : "Mozilla/5.0 (compatible; Google-InspectionTool/1.0;)", + "expect" : + { + "name" : "Google-InspectionTool", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "GoogleOther", + "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GoogleOther) Chrome/41.0.2272.96 Safari/537.36", + "expect" : + { + "name" : "GoogleOther", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "GoogleOther-Image", + "ua" : "GoogleOther-Image/1.0", + "expect" : + { + "name" : "GoogleOther-Image", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "GoogleOther-Video", + "ua" : "GoogleOther-Video/1.0", + "expect" : + { + "name" : "GoogleOther-Video", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "Google-Safety", + "ua" : "Google-Safety", + "expect" : + { + "name" : "Google-Safety", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "GPTBot", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)", @@ -119,6 +309,36 @@ "type" : "crawler" } }, + { + "desc" : "Archive.org Bot", + "ua" : "ia_archiver/8.1 (Windows 2000 1.9; en-US;)", + "expect" : + { + "name" : "ia_archiver", + "version" : "8.1", + "type" : "crawler" + } + }, + { + "desc" : "Archive.org Bot", + "ua" : "Mozilla/5.0 (compatible; archive.org_bot/3.3.0 +https://archive.org/details/archive.org_bot)", + "expect" : + { + "name" : "archive.org_bot", + "version" : "3.3.0", + "type" : "crawler" + } + }, + { + "desc" : "Meta-ExternalAgent", + "ua" : "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", + "expect" : + { + "name" : "meta-externalagent", + "version" : "1.1", + "type" : "crawler" + } + }, { "desc" : "MJ12bot", "ua" : "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)", @@ -126,6 +346,17 @@ { "name" : "MJ12bot", "version" : "v1.4.8", + "major" : "1", + "type" : "crawler" + } + }, + { + "desc" : "MojeekBot", + "ua" : "Mozilla/5.0 (compatible; MojeekBot/0.11; +https://www.mojeek.com/bot.html)", + "expect" : + { + "name" : "MojeekBot", + "version" : "0.11", "type" : "crawler" } }, @@ -139,6 +370,36 @@ "type" : "crawler" } }, + { + "desc" : "PerplexityBot", + "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)", + "expect" : + { + "name" : "PerplexityBot", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "PetalBot", + "ua" : "Mozilla/5.0 (compatible;PetalBot; +https://webmaster.petalsearch.com/site/petalbot) ", + "expect" : + { + "name" : "PetalBot", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "PetalBot", + "ua" : "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot) ", + "expect" : + { + "name" : "PetalBot", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "SemrushBot", "ua" : "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)", @@ -149,6 +410,16 @@ "type" : "crawler" } }, + { + "desc" : "TurnitinBot", + "ua" : "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", + "expect" : + { + "name" : "TurnitinBot", + "version" : "undefined", + "type" : "crawler" + } + }, { "desc" : "Yahoo! Japan", "ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)", @@ -168,5 +439,35 @@ "version" : "3.0", "type" : "crawler" } + }, + { + "desc" : "Yeti", + "ua" : "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/spd)", + "expect" : + { + "name" : "Yeti", + "version" : "1.1", + "type" : "crawler" + } + }, + { + "desc" : "YisouSpider", + "ua" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 YisouSpider/5.0 Safari/537.36", + "expect" : + { + "name" : "YisouSpider", + "version" : "5.0", + "type" : "crawler" + } + }, + { + "desc" : "YisouSpider", + "ua" : "YisouSpider", + "expect" : + { + "name" : "YisouSpider", + "version" : "undefined", + "type" : "crawler" + } } ] diff --git a/test/specs/browser-fetchers.json b/test/specs/browser-fetchers.json index c508bd2..94bada0 100644 --- a/test/specs/browser-fetchers.json +++ b/test/specs/browser-fetchers.json @@ -29,6 +29,76 @@ "type" : "fetcher" } }, + { + "desc" : "DuckAssistBot", + "ua" : "DuckAssistBot/1.2; (+http://duckduckgo.com/duckassistbot.html)", + "expect" : + { + "name" : "DuckAssistBot", + "version" : "1.2", + "type" : "fetcher" + } + }, + { + "desc" : "Google FeedFetcher", + "ua" : "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)", + "expect" : + { + "name" : "FeedFetcher-Google", + "version" : "undefined", + "type" : "fetcher" + } + }, + { + "desc" : "Google Read Aloud - Mobile agent", + "ua" : "Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36 (compatible; Google-Read-Aloud; +https://support.google.com/webmasters/answer/1061943)", + "expect" : + { + "name" : "Google-Read-Aloud", + "version" : "undefined", + "type" : "fetcher" + } + }, + { + "desc" : "Google Read Aloud - Desktop agent", + "ua" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36 (compatible; Google-Read-Aloud; +https://support.google.com/webmasters/answer/1061943)", + "expect" : + { + "name" : "Google-Read-Aloud", + "version" : "undefined", + "type" : "fetcher" + } + }, + { + "desc" : "Google Publisher Center", + "ua" : "GoogleProducer; (+https://developers.google.com/search/docs/crawling-indexing/google-producer)", + "expect" : + { + "name" : "GoogleProducer", + "version" : "undefined", + "type" : "fetcher" + } + }, + { + "desc" : "Google Site Verifier", + "ua" : "Mozilla/5.0 (compatible; Google-Site-Verification/1.0)", + "expect" : + { + "name" : "Google-Site-Verification", + "version" : "1.0", + "type" : "fetcher" + } + }, + { + "desc" : "Meta-ExternalFetcher", + "ua" : "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", + "expect" : + { + "name" : "meta-externalfetcher", + "version" : "1.1", + "type" : "fetcher" + } + }, { "desc" : "Rogerbot", "ua" : "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)",