[submodule:helpers] Add new method isAIBot(): detect AI bots

This commit is contained in:
Faisal Salman 2024-11-16 22:14:14 +07:00
parent 5b375b90d5
commit 70b3003344
5 changed files with 102 additions and 6 deletions

View File

@ -99,7 +99,15 @@ see what's new & breaking.
<td></td> <td></td>
</tr> </tr>
<tr> <tr>
<td>Extras (Apps, Libs, Emails, Media Players, etc)</td> <td>AI Bot detection</td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>Extras (Apps, Libs, Emails, Media Players, etc) detection</td>
<td></td> <td></td>
<td></td> <td></td>
<td></td> <td></td>

View File

@ -90,8 +90,8 @@ const Crawlers = Object.freeze({
// Yeti (Naver) // Yeti (Naver)
/(yeti)\/([\w\.]+)/i, /(yeti)\/([\w\.]+)/i,
// aiHitBot / Cohere-AI / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot // aiHitBot / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
/((?:aihit|diff|timpi|you)bot|cohere-ai|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i /((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
], ],
[NAME, VERSION, [TYPE, CRAWLER]], [NAME, VERSION, [TYPE, CRAWLER]],
@ -241,8 +241,8 @@ const Fetchers = Object.freeze({
], ],
[NAME, VERSION, [TYPE, FETCHER]], [NAME, VERSION, [TYPE, FETCHER]],
// Google Bots / Snapchat / Vercelbot // Google Bots / Cohere / Snapchat / Vercelbot
[/(vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i], [/(cohere-ai|vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
[NAME, [TYPE, FETCHER]], [NAME, [TYPE, FETCHER]],
] ]
}); });

View File

@ -6,6 +6,7 @@ import { IResult } from "../main/ua-parser";
declare function getDeviceVendor(model: string): string | undefined; declare function getDeviceVendor(model: string): string | undefined;
declare function isAppleSilicon(resultOrUA: IResult | string): boolean; declare function isAppleSilicon(resultOrUA: IResult | string): boolean;
declare function isAIBot(resultOrUA: IResult | string): boolean;
declare function isBot(resultOrUA: IResult | string): boolean; declare function isBot(resultOrUA: IResult | string): boolean;
declare function isChromeFamily(resultOrUA: IResult | string): boolean; declare function isChromeFamily(resultOrUA: IResult | string): boolean;
declare function isElectron(): boolean; declare function isElectron(): boolean;
@ -16,6 +17,7 @@ declare function isStandalonePWA(): boolean;
export { export {
getDeviceVendor, getDeviceVendor,
isAppleSilicon, isAppleSilicon,
isAIBot,
isBot, isBot,
isChromeFamily, isChromeFamily,
isElectron, isElectron,

View File

@ -41,6 +41,77 @@ const isAppleSilicon = (resultOrUA) => {
return false; return false;
} }
const isAIBot = (resultOrUA) => [
// AI2
'ai2bot',
// Amazon
'amazonbot',
// Anthropic
'anthropic-ai',
'claude-web',
'claudebot',
// Apple
'applebot',
'applebot-extended',
// ByteDance
'bytespider',
// Common Crawl
'ccbot',
// DataForSeo
'dataforseobot',
// Diffbot
'diffbot',
// Google
'googleother',
'googleother-image',
'googleother-video',
'google-extended',
// Hive AI
'imagesiftbot',
// Huawei
'petalbot',
// Meta
'facebookbot',
'meta-externalagent',
// OpenAI
'gptbot',
'oai-searchbot',
// Perplexity
'perplexitybot',
// Timpi
'timpibot',
// Velen.io
'velenpublicwebcrawler',
// Webz.io
'omgili',
'omgilibot',
'webzio-extended',
// You.com
'youbot',
// Zyte
'scrapy'
].includes(String(toResult(resultOrUA, Bots).browser.name).toLowerCase());
const isBot = (resultOrUA) => [ const isBot = (resultOrUA) => [
'cli', 'cli',
'crawler', 'crawler',
@ -56,6 +127,7 @@ const isElectron = () => !!(process?.versions?.hasOwnProperty('electron') ||
module.exports = { module.exports = {
getDeviceVendor, getDeviceVendor,
isAppleSilicon, isAppleSilicon,
isAIBot,
isBot, isBot,
isChromeFamily, isChromeFamily,
isElectron, isElectron,

View File

@ -1,6 +1,6 @@
const assert = require('assert'); const assert = require('assert');
const { UAParser } = require('../src/main/ua-parser'); const { UAParser } = require('../src/main/ua-parser');
const { getDeviceVendor, isAppleSilicon, isBot, isChromeFamily } = require('../src/helpers/ua-parser-helpers'); const { getDeviceVendor, isAppleSilicon, isAIBot, isBot, isChromeFamily } = require('../src/helpers/ua-parser-helpers');
const { Bots, Emails } = require('../src/extensions/ua-parser-extensions'); const { Bots, Emails } = require('../src/extensions/ua-parser-extensions');
describe('getDeviceVendor', () => { describe('getDeviceVendor', () => {
@ -34,6 +34,20 @@ describe('isAppleSilicon', () => {
}); });
}); });
describe('isAIBot', () => {
it('Can detect AI Bots', () => {
const claudeBot = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)';
const firefox = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0';
const searchGPT = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot';
assert.equal(isAIBot(UAParser(claudeBot, Bots)), true);
assert.equal(isAIBot(claudeBot), true);
assert.equal(isAIBot(firefox), false);
assert.equal(isAIBot(searchGPT), true);
});
});
describe('isBot', () => { describe('isBot', () => {
it('Can detect Bots', () => { it('Can detect Bots', () => {