From 837d31963033d937f72708e07e3e34673e7be884 Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Thu, 20 Feb 2025 20:34:45 +0700 Subject: [PATCH] [extensions][helpers] Update Semrush bot variants --- package-lock.json | 10 +++------- src/extensions/ua-parser-extensions.js | 8 +++++--- src/helpers/ua-parser-helpers.js | 3 +++ test/data/ua/extension/crawler.json | 10 ++++++++++ test/data/ua/extension/fetcher.json | 10 ++++++++++ test/unit/helpers.js | 2 ++ 6 files changed, 33 insertions(+), 10 deletions(-) diff --git a/package-lock.json b/package-lock.json index e577985..187bf02 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,6 +26,7 @@ "@types/node-fetch": "^2.6.12", "detect-europe-js": "^0.1.2", "is-standalone-pwa": "^0.1.1", + "node-fetch": "^2.7.0", "ua-is-frozen": "^0.1.2" }, "bin": { @@ -38,7 +39,6 @@ "@playwright/test": "^1.49.0", "jshint": "~2.13.6", "mocha": "~8.2.0", - "node-fetch": "^2.7.0", "requirejs": "2.3.2", "safe-regex": "^2.1.1", "tsd": "^0.29.0", @@ -3144,7 +3144,6 @@ "version": "2.7.0", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "dev": true, "dependencies": { "whatwg-url": "^5.0.0" }, @@ -4179,8 +4178,7 @@ "node_modules/tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", - "dev": true + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, "node_modules/trim-newlines": { "version": "3.0.1", @@ -4336,14 +4334,12 @@ "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", - "dev": true + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" }, "node_modules/whatwg-url": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "dev": true, "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 9cd884c..1d528c9 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -52,7 +52,6 @@ const Crawlers = Object.freeze({ // MojeekBot - https://www.mojeek.com/bot.html // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot - // SemrushBot - http://www.semrush.com/bot.html // SeznamBot - http://napoveda.seznam.cz/seznambot-intro /((?:ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, @@ -78,6 +77,9 @@ const Crawlers = Object.freeze({ // Internet Archive (archive.org) /(ia_archiver|archive\.org_bot)\/?([\w\.]*)/i, + // SemrushBot - http://www.semrush.com/bot.html + /((?:semrush|splitsignal)bot[-abcfimostw]*)\/([\w\.-]+)/i, + // Sogou Spider /(sogou (?:pic|head|web|orion|news) spider)\/([\w\.]+)/i, @@ -217,10 +219,10 @@ const Fetchers = Object.freeze({ // AhrefsSiteAudit - https://ahrefs.com/robot/site-audit // ChatGPT-User - https://platform.openai.com/docs/plugins/bot // DuckAssistBot - https://duckduckgo.com/duckassistbot/ - // BingPreview / Mastodon / Pinterestbot / Redditbot / Rogerbot / Telegrambot / Twitterbot / UptimeRobot + // BingPreview / Mastodon / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot // Google Site Verifier / Meta / Yahoo! Japan // Yandex Bots - https://yandex.com/bots - /(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|telegram|twitter|uptimero)bot|google-site-verification|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, + /(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|telegram|twitter|uptimero)bot|google-site-verification|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, // Bluesky /(bluesky) cardyb\/([\w\.]+)/i, diff --git a/src/helpers/ua-parser-helpers.js b/src/helpers/ua-parser-helpers.js index 3cd3aac..daaa897 100644 --- a/src/helpers/ua-parser-helpers.js +++ b/src/helpers/ua-parser-helpers.js @@ -93,6 +93,9 @@ const isAIBot = (resultOrUA) => [ // Perplexity 'perplexitybot', + // Semrush + 'semrushbot-ocob', + // Timpi 'timpibot', diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index 04a1a35..48c5865 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -480,6 +480,16 @@ "type" : "crawler" } }, + { + "desc" : "SemrushBot for ContentShake AI tool", + "ua" : "Mozilla/5.0 (compatible; SemrushBot-OCOB/1; +https://www.semrush.com/bot/)", + "expect" : + { + "name" : "SemrushBot-OCOB", + "version" : "1", + "type" : "crawler" + } + }, { "desc" : "SeznamBot", "ua" : "Mozilla/5.0 (compatible; SeznamBot/4.0-RC1; +http://napoveda.seznam.cz/seznambot-intro/)", diff --git a/test/data/ua/extension/fetcher.json b/test/data/ua/extension/fetcher.json index a04e4f6..075281e 100644 --- a/test/data/ua/extension/fetcher.json +++ b/test/data/ua/extension/fetcher.json @@ -119,6 +119,16 @@ "type" : "fetcher" } }, + { + "desc" : "SiteAuditBot", + "ua" : "Mozilla/5.0 (compatible; SiteAuditBot/0.97; +http://www.semrush.com/bot.html)", + "expect" : + { + "name" : "SiteAuditBot", + "version" : "0.97", + "type" : "fetcher" + } + }, { "desc" : "UptimeRobot", "ua" : "Mozilla/5.0 (compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)", diff --git a/test/unit/helpers.js b/test/unit/helpers.js index fcd6678..95ce871 100644 --- a/test/unit/helpers.js +++ b/test/unit/helpers.js @@ -40,11 +40,13 @@ describe('isAIBot', () => { const claudeBot = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)'; const firefox = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0'; const searchGPT = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot'; + const semrushAI = 'Mozilla/5.0 (compatible; SemrushBot-OCOB/1; +https://www.semrush.com/bot/)'; assert.equal(isAIBot(UAParser(claudeBot, Bots)), true); assert.equal(isAIBot(claudeBot), true); assert.equal(isAIBot(firefox), false); assert.equal(isAIBot(searchGPT), true); + assert.equal(isAIBot(semrushAI), true); }); });