Add some well-known bot user-agents: Applebot, Amazonbot, Bytespider, Claudebot, Yandexbot

This commit is contained in:
Faisal Salman 2024-06-06 22:36:15 +07:00
parent 5190905df8
commit 173325faa1
2 changed files with 36 additions and 8 deletions

View File

@ -14,24 +14,36 @@ const VENDOR = 'vendor';
const VERSION = 'version';
const MOBILE = 'mobile';
const TABLET = 'tablet';
const BOT = 'bot';
const CLI = 'cli';
const EMAIL = 'email';
const INAPP = 'inapp';
const MODULE = 'module';
const Bots = Object.freeze({
browser : [
// Googlebot / BingBot / MSNBot / FacebookBot
[/((?:google|bing|msn|facebook)bot(?:[\-imagevdo]{0,6})|bingpreview)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'bot']],
// GPTBot - https://platform.openai.com/docs/gptbot
[/(gptbot)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'bot']],
// YandexBot - https://yandex.com/bots
// Applebot - http://apple.com/go/applebot
// Amazonbot - https://developer.amazon.com/amazonbot
[/((?:google|bing|msn|facebook|gpt|yandex|apple|amazon)bot(?:[\-imagevdo]{0,6})|bingpreview)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, BOT]],
// Slackbot - https://api.slack.com/robots
[/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i], [NAME, VERSION, [TYPE, 'bot']]
[/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i], [NAME, VERSION, [TYPE, BOT]],
// ClaudeBot / Bytespider
[/(claude(?:bot|-web)|bytespider)\/?([\w\.]*)/i], [NAME, VERSION, [TYPE, BOT]],
// Yandex Bots - https://yandex.com/bots
[/http:\/\/(yandex).com\/(bot)s/i], [NAME, TYPE]
]
});
const CLIs = Object.freeze({
browser : [
// wget / curl / lynx
[/(wget|curl|lynx)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'cli']]
[/(wget|curl|lynx)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, CLI]]
]
});
@ -114,13 +126,13 @@ const ExtraDevices = Object.freeze({
const Emails = Object.freeze({
browser : [
// Microsoft Outlook / Thunderbird
[/(microsoft outlook|thunderbird)[\s\/]([\w\.]+)/i], [NAME, VERSION, [TYPE, 'email']]
[/(microsoft outlook|thunderbird)[\s\/]([\w\.]+)/i], [NAME, VERSION, [TYPE, EMAIL]]
]
});
const InApps = Object.freeze({
browser : [
[/chatlyio\/([\d\.]+)/i], [VERSION, 'Slack', [TYPE, 'inapp']]
[/chatlyio\/([\d\.]+)/i], [VERSION, 'Slack', [TYPE, INAPP]]
]
});
@ -233,7 +245,7 @@ const MediaPlayers = Object.freeze({
const Modules = Object.freeze({
browser : [
// Axios/jsdom/Scrapy
[/\b(axios|jsdom|scrapy)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, 'module']]
[/\b(axios|jsdom|scrapy)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, MODULE]]
]
});

View File

@ -15,6 +15,14 @@ describe('Bots', () => {
const opera = 'Opera/8.5 (Macintosh; PPC Mac OS X; U; en)';
const wget = 'Wget/1.21.1';
const facebookBot = 'Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/webmasters/facebookbot/)';
const yandexBot = 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)';
const yandexMobileScreenShotBot ='Mozilla/5.0 (compatible; YandexMobileScreenShotBot/1.0; +http://yandex.com/bots)';
const appleBot = 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)';
const amazonBot = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)';
const claudeBot = 'ClaudeBot';
const claudeBot2 = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)';
const claudeWeb = 'Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)';
const bytespider = 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.1511.1269 Mobile Safari/537.36; Bytespider';
const outlook = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; Microsoft Outlook 16.0.9126; Microsoft Outlook 16.0.9126; ms-office; MSOffice 16)';
const thunderbird = 'Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0';
const axios = 'axios/1.3.5';
@ -25,6 +33,14 @@ describe('Bots', () => {
assert.deepEqual(botParser.setUA(googleBot).getBrowser(), {name: "Googlebot-Video", version: "1.0", major: "1", type: "bot"});
assert.deepEqual(botParser.setUA(gptBot).getBrowser(), {name: "GPTBot", version: "1.0", major: "1", type: "bot"});
assert.deepEqual(botParser.setUA(msnBot).getBrowser(), {name: "msnbot-media", version: "1.1", major: "1", type: "bot"});
assert.deepEqual(botParser.setUA(yandexBot).getBrowser(), {name: "YandexBot", version: "3.0", major: "3", type: "bot"});
assert.deepEqual(botParser.setUA(yandexMobileScreenShotBot).getBrowser(), {name: "yandex", version: undefined, major: undefined, type: "bot"});
assert.deepEqual(botParser.setUA(appleBot).getBrowser(), {name: "Applebot", version: "0.1", major: "0", type: "bot"});
assert.deepEqual(botParser.setUA(amazonBot).getBrowser(), {name: "Amazonbot", version: "0.1", major: "0", type: "bot"});
assert.deepEqual(botParser.setUA(claudeBot).getBrowser(), {name: "ClaudeBot", version: undefined, major: undefined, type: "bot"});
assert.deepEqual(botParser.setUA(claudeBot2).getBrowser(), {name: "ClaudeBot", version: "1.0", major: "1", type: "bot"});
assert.deepEqual(botParser.setUA(claudeWeb).getBrowser(), {name: "Claude-Web", version: "1.0", major: "1", type: "bot"});
assert.deepEqual(botParser.setUA(bytespider).getBrowser(), {name: "Bytespider", version: undefined, major: undefined, type: "bot"});
assert.deepEqual(botParser.setUA(bingPreview).getBrowser(), {name: "BingPreview", version: "1.0b", major: "1", type: "bot"});
assert.deepEqual(botParser.setUA(opera).getBrowser(), {name: "Opera", version: "8.5", major: "8", type: undefined});