From 173325faa182777e212c8f5a9ae087a7df346f86 Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Thu, 6 Jun 2024 22:36:15 +0700 Subject: [PATCH] Add some well-known bot user-agents: Applebot, Amazonbot, Bytespider, Claudebot, Yandexbot --- src/extensions/ua-parser-extensions.js | 28 ++++++++++++++++++-------- test/mocha-test-extension.js | 16 +++++++++++++++ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 9d73878..8e416c1 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -14,24 +14,36 @@ const VENDOR = 'vendor'; const VERSION = 'version'; const MOBILE = 'mobile'; const TABLET = 'tablet'; +const BOT = 'bot'; +const CLI = 'cli'; +const EMAIL = 'email'; +const INAPP = 'inapp'; +const MODULE = 'module'; const Bots = Object.freeze({ browser : [ // Googlebot / BingBot / MSNBot / FacebookBot - [/((?:google|bing|msn|facebook)bot(?:[\-imagevdo]{0,6})|bingpreview)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'bot']], - // GPTBot - https://platform.openai.com/docs/gptbot - [/(gptbot)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'bot']], + // YandexBot - https://yandex.com/bots + // Applebot - http://apple.com/go/applebot + // Amazonbot - https://developer.amazon.com/amazonbot + [/((?:google|bing|msn|facebook|gpt|yandex|apple|amazon)bot(?:[\-imagevdo]{0,6})|bingpreview)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, BOT]], // Slackbot - https://api.slack.com/robots - [/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i], [NAME, VERSION, [TYPE, 'bot']] + [/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i], [NAME, VERSION, [TYPE, BOT]], + + // ClaudeBot / Bytespider + [/(claude(?:bot|-web)|bytespider)\/?([\w\.]*)/i], [NAME, VERSION, [TYPE, BOT]], + + // Yandex Bots - https://yandex.com/bots + [/http:\/\/(yandex).com\/(bot)s/i], [NAME, TYPE] ] }); const CLIs = Object.freeze({ browser : [ // wget / curl / lynx - [/(wget|curl|lynx)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'cli']] + [/(wget|curl|lynx)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, CLI]] ] }); @@ -114,13 +126,13 @@ const ExtraDevices = Object.freeze({ const Emails = Object.freeze({ browser : [ // Microsoft Outlook / Thunderbird - [/(microsoft outlook|thunderbird)[\s\/]([\w\.]+)/i], [NAME, VERSION, [TYPE, 'email']] + [/(microsoft outlook|thunderbird)[\s\/]([\w\.]+)/i], [NAME, VERSION, [TYPE, EMAIL]] ] }); const InApps = Object.freeze({ browser : [ - [/chatlyio\/([\d\.]+)/i], [VERSION, 'Slack', [TYPE, 'inapp']] + [/chatlyio\/([\d\.]+)/i], [VERSION, 'Slack', [TYPE, INAPP]] ] }); @@ -233,7 +245,7 @@ const MediaPlayers = Object.freeze({ const Modules = Object.freeze({ browser : [ // Axios/jsdom/Scrapy - [/\b(axios|jsdom|scrapy)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'module']] + [/\b(axios|jsdom|scrapy)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, MODULE]] ] }); diff --git a/test/mocha-test-extension.js b/test/mocha-test-extension.js index 6c66502..ca2251b 100644 --- a/test/mocha-test-extension.js +++ b/test/mocha-test-extension.js @@ -15,6 +15,14 @@ describe('Bots', () => { const opera = 'Opera/8.5 (Macintosh; PPC Mac OS X; U; en)'; const wget = 'Wget/1.21.1'; const facebookBot = 'Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/webmasters/facebookbot/)'; + const yandexBot = 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)'; + const yandexMobileScreenShotBot ='Mozilla/5.0 (compatible; YandexMobileScreenShotBot/1.0; +http://yandex.com/bots)'; + const appleBot = 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)'; + const amazonBot = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)'; + const claudeBot = 'ClaudeBot'; + const claudeBot2 = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)'; + const claudeWeb = 'Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)'; + const bytespider = 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.1511.1269 Mobile Safari/537.36; Bytespider'; const outlook = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; Microsoft Outlook 16.0.9126; Microsoft Outlook 16.0.9126; ms-office; MSOffice 16)'; const thunderbird = 'Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0'; const axios = 'axios/1.3.5'; @@ -25,6 +33,14 @@ describe('Bots', () => { assert.deepEqual(botParser.setUA(googleBot).getBrowser(), {name: "Googlebot-Video", version: "1.0", major: "1", type: "bot"}); assert.deepEqual(botParser.setUA(gptBot).getBrowser(), {name: "GPTBot", version: "1.0", major: "1", type: "bot"}); assert.deepEqual(botParser.setUA(msnBot).getBrowser(), {name: "msnbot-media", version: "1.1", major: "1", type: "bot"}); + assert.deepEqual(botParser.setUA(yandexBot).getBrowser(), {name: "YandexBot", version: "3.0", major: "3", type: "bot"}); + assert.deepEqual(botParser.setUA(yandexMobileScreenShotBot).getBrowser(), {name: "yandex", version: undefined, major: undefined, type: "bot"}); + assert.deepEqual(botParser.setUA(appleBot).getBrowser(), {name: "Applebot", version: "0.1", major: "0", type: "bot"}); + assert.deepEqual(botParser.setUA(amazonBot).getBrowser(), {name: "Amazonbot", version: "0.1", major: "0", type: "bot"}); + assert.deepEqual(botParser.setUA(claudeBot).getBrowser(), {name: "ClaudeBot", version: undefined, major: undefined, type: "bot"}); + assert.deepEqual(botParser.setUA(claudeBot2).getBrowser(), {name: "ClaudeBot", version: "1.0", major: "1", type: "bot"}); + assert.deepEqual(botParser.setUA(claudeWeb).getBrowser(), {name: "Claude-Web", version: "1.0", major: "1", type: "bot"}); + assert.deepEqual(botParser.setUA(bytespider).getBrowser(), {name: "Bytespider", version: undefined, major: undefined, type: "bot"}); assert.deepEqual(botParser.setUA(bingPreview).getBrowser(), {name: "BingPreview", version: "1.0b", major: "1", type: "bot"}); assert.deepEqual(botParser.setUA(opera).getBrowser(), {name: "Opera", version: "8.5", major: "8", type: undefined});