BREAKING - Remove bot type, divide as crawler / fetcher

Add new crawler: Baiduspider, DuckDuckBot, & Sogou Web Spider
Add new fetcher: Mastodon, Pinterestbot, Redditbot, LinkedInBot, Discordbot, Telegrambot, Twitterbot, Snapchat Bot, WhatsApp
This commit is contained in:
Faisal Salman 2024-06-07 23:59:24 +07:00
parent 173325faa1
commit db3423a76c
5 changed files with 81 additions and 27 deletions

View File

@ -147,9 +147,10 @@ const Browser = Object.freeze({
}); });
const BrowserType = Object.freeze({ const BrowserType = Object.freeze({
BOT: 'bot', CRAWLER: 'crawler',
CLI: 'cli', CLI: 'cli',
EMAIL: 'email', EMAIL: 'email',
FETCHER: 'fetcher',
INAPP: 'inapp', INAPP: 'inapp',
MODULE: 'module' MODULE: 'module'
}); });

View File

@ -4,10 +4,11 @@
import type { UAParserExt } from "../main/ua-parser"; import type { UAParserExt } from "../main/ua-parser";
export const Bots: UAParserExt;
export const CLIs: UAParserExt; export const CLIs: UAParserExt;
export const Crawlers: UAParserExt;
export const ExtraDevices: UAParserExt; export const ExtraDevices: UAParserExt;
export const Emails: UAParserExt; export const Emails: UAParserExt;
export const Fetchers: UAParserExt;
export const InApps: UAParserExt; export const InApps: UAParserExt;
export const MediaPlayers: UAParserExt; export const MediaPlayers: UAParserExt;
export const Modules: UAParserExt; export const Modules: UAParserExt;

View File

@ -14,36 +14,59 @@ const VENDOR = 'vendor';
const VERSION = 'version'; const VERSION = 'version';
const MOBILE = 'mobile'; const MOBILE = 'mobile';
const TABLET = 'tablet'; const TABLET = 'tablet';
const BOT = 'bot'; const CRAWLER = 'crawler';
const CLI = 'cli'; const CLI = 'cli';
const EMAIL = 'email'; const EMAIL = 'email';
const FETCHER = 'fetcher';
const INAPP = 'inapp'; const INAPP = 'inapp';
const MODULE = 'module'; const MODULE = 'module';
const Bots = Object.freeze({ const CLIs = Object.freeze({
browser : [ browser : [
// Googlebot / BingBot / MSNBot / FacebookBot // wget / curl / lynx
// GPTBot - https://platform.openai.com/docs/gptbot [/(wget|curl|lynx)[\/ ]([\w\.]+)/i], [NAME, VERSION, [TYPE, CLI]]
// YandexBot - https://yandex.com/bots
// Applebot - http://apple.com/go/applebot
// Amazonbot - https://developer.amazon.com/amazonbot
[/((?:google|bing|msn|facebook|gpt|yandex|apple|amazon)bot(?:[\-imagevdo]{0,6})|bingpreview)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, BOT]],
// Slackbot - https://api.slack.com/robots
[/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i], [NAME, VERSION, [TYPE, BOT]],
// ClaudeBot / Bytespider
[/(claude(?:bot|-web)|bytespider)\/?([\w\.]*)/i], [NAME, VERSION, [TYPE, BOT]],
// Yandex Bots - https://yandex.com/bots
[/http:\/\/(yandex).com\/(bot)s/i], [NAME, TYPE]
] ]
}); });
const CLIs = Object.freeze({ const Crawlers = Object.freeze({
browser : [ browser : [
// wget / curl / lynx // Amazonbot - https://developer.amazon.com/amazonbot
[/(wget|curl|lynx)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, CLI]] // Applebot - http://apple.com/go/applebot
// Bingbot - http://www.bing.com/bingbot.htm
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/
// GPTBot - https://platform.openai.com/docs/gptbot
[/((?:amazon|apple|bing|duckduck|facebook|gpt)bot)\/([\w\.]+)/i],
[NAME, VERSION, [TYPE, CRAWLER]],
// Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001
[/(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i],
[NAME, VERSION, [TYPE, CRAWLER]],
// Bytespider
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
[/((?:bytespider|(?=yahoo! )slurp))/i],
[NAME, [TYPE, CRAWLER]],
// ClaudeBot
[/(claude(?:bot|-web))\/([\w\.]+)/i],
[NAME, VERSION, [TYPE, CRAWLER]],
// Googlebot - http://www.google.com/bot.html
[
/(google(?:bot|other)(?:-image|-video|-news|-extended)?|(?:storebot-)?google(?:-inspectiontool)?)\/?([\w\.]*)/i
],
[NAME, VERSION, [TYPE, CRAWLER]],
// Sogou Spider
[/(sogou (?:pic|head|web|orion|news) spider)\/([\w\.]+)/i],
[NAME, VERSION, [TYPE, CRAWLER]],
// Yandex Bots - https://yandex.com/bots
[
/(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i
],
[NAME, VERSION, [TYPE, CRAWLER]]
] ]
}); });
@ -125,11 +148,39 @@ const ExtraDevices = Object.freeze({
const Emails = Object.freeze({ const Emails = Object.freeze({
browser : [ browser : [
// Microsoft Outlook / Thunderbird // Microsoft Outlook / Thunderbird
[/(microsoft outlook|thunderbird)[\s\/]([\w\.]+)/i], [NAME, VERSION, [TYPE, EMAIL]] [/(microsoft outlook|thunderbird)[\s\/]([\w\.]+)/i], [NAME, VERSION, [TYPE, EMAIL]]
] ]
}); });
const Fetchers = Object.freeze({
browser : [
// BingPreview / Mastodon / Pinterestbot / Redditbot / Telegrambot / Twitterbot
[/(bingpreview|mastodon|(?:discord|linkedin|pinterest|reddit|telegram|twitter)bot)\/([\w\.]+)/i],
[NAME, VERSION, [TYPE, FETCHER]],
// Google Bots / Snapchat
[/(feedfetcher-google|google-read-aloud|(?=bot; )snapchat)/i],
[NAME, [TYPE, FETCHER]],
// Slackbot - https://api.slack.com/robots
[/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i],
[NAME, VERSION, [TYPE, FETCHER]],
// WhatsApp
[/(whatsapp)\/([\w\.]+)[\/ ][ianw]/i],
[NAME, VERSION, [TYPE, FETCHER]],
// Yandex Bots - https://yandex.com/bots
[
/(yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
/(yandex(?:sitelinks|userproxy))/i
],
[NAME, VERSION, [TYPE, FETCHER]]
]
});
const InApps = Object.freeze({ const InApps = Object.freeze({
browser : [ browser : [
[/chatlyio\/([\d\.]+)/i], [VERSION, 'Slack', [TYPE, INAPP]] [/chatlyio\/([\d\.]+)/i], [VERSION, 'Slack', [TYPE, INAPP]]
@ -244,16 +295,17 @@ const MediaPlayers = Object.freeze({
const Modules = Object.freeze({ const Modules = Object.freeze({
browser : [ browser : [
// Axios/jsdom/Scrapy // Axios/jsdom/Scrapy
[/\b(axios|jsdom|scrapy)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, MODULE]] [/\b(axios|jsdom|scrapy)\/([\w\.]+)/i], [NAME, VERSION, [TYPE, MODULE]]
] ]
}); });
module.exports = { module.exports = {
Bots,
CLIs, CLIs,
Crawlers,
ExtraDevices, ExtraDevices,
Emails, Emails,
Fetchers,
InApps, InApps,
MediaPlayers, MediaPlayers,
Modules Modules

View File

@ -15,7 +15,7 @@ declare namespace UAParser {
name?: string; name?: string;
version?: string; version?: string;
major?: string; major?: string;
type?: 'bot' | 'cli' | 'email' | 'inapp' | 'module'; type?: 'crawler' | 'cli' | 'email' | 'fetcher' | 'inapp' | 'module';
} }
interface ICPU extends IData<ICPU> { interface ICPU extends IData<ICPU> {

View File

@ -28,7 +28,7 @@ expectType<IBrowser>(browser);
expectType<string | undefined>(browser.name); expectType<string | undefined>(browser.name);
expectType<string | undefined>(browser.version); expectType<string | undefined>(browser.version);
expectType<string | undefined>(browser.major); expectType<string | undefined>(browser.major);
expectType<'bot' | 'cli' | 'email' | 'inapp' | 'module' | undefined>(browser.type); expectType<'crawler' | 'cli' | 'email' | 'fetcher' | 'inapp' | 'module' | undefined>(browser.type);
expectType<boolean>(browser.is('')); expectType<boolean>(browser.is(''));
expectType<string>(browser.toString()); expectType<string>(browser.toString());
expectType<IBrowser | PromiseLike<IBrowser>>(browser.withClientHints()); expectType<IBrowser | PromiseLike<IBrowser>>(browser.withClientHints());