[bot-detection] Add new method: isAIAssistant() to check whether user-agent is an AI assistant

This commit is contained in:
Faisal Salman
2025-10-25 16:39:59 +07:00
parent 232fb321f1
commit 042c57cc10
3 changed files with 167 additions and 46 deletions

View File

@@ -2,5 +2,6 @@
// Project: https://github.com/faisalman/ua-parser-js // Project: https://github.com/faisalman/ua-parser-js
// Definitions by: Faisal Salman <https://github.com/faisalman> // Definitions by: Faisal Salman <https://github.com/faisalman>
export function isAIAssistant(ua: string): boolean;
export function isAICrawler(ua: string): boolean; export function isAICrawler(ua: string): boolean;
export function isBot(ua: string): boolean; export function isBot(ua: string): boolean;

View File

@@ -8,118 +8,178 @@
/*jshint esversion: 6 */ /*jshint esversion: 6 */
const { UAParser } = require('../main/ua-parser'); const { UAParser } = require('../main/ua-parser');
const { Extension, BrowserType } = require('../enums/ua-parser-enums'); const { Bots, Crawlers, Fetchers } = require('../extensions/ua-parser-extensions');
const { Bots, Crawlers } = require('../extensions/ua-parser-extensions'); const { BrowserType, Extension } = require('../enums/ua-parser-enums');
const { Crawler } = Extension.BrowserName; const { Crawler, Fetcher } = Extension.BrowserName;
const BotTypesList = [ class BotList {
constructor(ext, prop, list) {
this.ext = ext;
this.prop = prop;
this.list = list.map(x => x.toLowerCase());
}
includes(ua) {
return this.list.includes(
(typeof ua === 'string' ?
new UAParser(ua, this.ext).getBrowser() :
ua.browser
)[this.prop]?.toLowerCase());
}
}
const BotTypes = new BotList(Bots, 'type', [
BrowserType.CLI, BrowserType.CLI,
BrowserType.CRAWLER, BrowserType.CRAWLER,
BrowserType.FETCHER, BrowserType.FETCHER,
BrowserType.LIBRARY BrowserType.LIBRARY
]; ]);
const AIAssistants = new BotList(Fetchers, 'name', [
// Anthropic
Fetcher.ANTHROPIC_CLAUDE_USER,
// Cohere
Fetcher.COHERE_AI,
// DuckDuckGo
Fetcher.DUCKDUCKGO_ASSISTBOT,
// Google
Fetcher.GOOGLE_GEMINI_DEEP_RESEARCH,
// Mistral AI
Fetcher.MISTRALAI_USER,
// OpenAI
Fetcher.OPENAI_CHATGPT_USER,
// Perplexity
Fetcher.PERPLEXITY_USER
]);
const AICrawlers = new BotList(Crawlers, 'name', [
const AICrawlersList = [
// AI2 // AI2
Crawler.AI2_BOT, Crawler.AI2_BOT,
// Amazon // Amazon
Crawler.AMAZON_BOT, Crawler.AMAZON_BOT,
// Anthropic // Anthropic
Crawler.ANTHROPIC_AI, Crawler.ANTHROPIC_AI,
Crawler.ANTHROPIC_CLAUDE_BOT, Crawler.ANTHROPIC_CLAUDE_BOT,
Crawler.ANTHROPIC_CLAUDE_SEARCHBOT, Crawler.ANTHROPIC_CLAUDE_SEARCHBOT,
Crawler.ANTHROPIC_CLAUDE_WEB, Crawler.ANTHROPIC_CLAUDE_WEB,
// Apple // Apple
Crawler.APPLE_BOT, Crawler.APPLE_BOT,
Crawler.APPLE_BOT_EXTENDED, Crawler.APPLE_BOT_EXTENDED,
// Brave // Brave
Crawler.BRAVE_BOT, Crawler.BRAVE_BOT,
// ByteDance // ByteDance
Crawler.BYTEDANCE_BYTESPIDER, Crawler.BYTEDANCE_BYTESPIDER,
Crawler.BYTEDANCE_TIKTOKSPIDER, Crawler.BYTEDANCE_TIKTOKSPIDER,
// Cohere // Cohere
Crawler.COHERE_TRAINING_DATA_CRAWLER, Crawler.COHERE_TRAINING_DATA_CRAWLER,
// Common Crawl // Common Crawl
Crawler.COMMON_CRAWL_CCBOT, Crawler.COMMON_CRAWL_CCBOT,
// Coveo // Coveo
Crawler.COVEO_BOT, Crawler.COVEO_BOT,
// DataForSeo // DataForSeo
Crawler.DATAFORSEO_BOT, Crawler.DATAFORSEO_BOT,
// DeepSeek // DeepSeek
Crawler.DEEPSEEK_BOT, Crawler.DEEPSEEK_BOT,
// Diffbot // Diffbot
Crawler.DIFFBOT, Crawler.DIFFBOT,
// Google // Google
Crawler.GOOGLE_EXTENDED, Crawler.GOOGLE_EXTENDED,
Crawler.GOOGLE_OTHER, Crawler.GOOGLE_OTHER,
Crawler.GOOGLE_OTHER_IMAGE, Crawler.GOOGLE_OTHER_IMAGE,
Crawler.GOOGLE_OTHER_VIDEO, Crawler.GOOGLE_OTHER_VIDEO,
Crawler.GOOGLE_CLOUDVERTEXBOT, Crawler.GOOGLE_CLOUDVERTEXBOT,
// Hive AI // Hive AI
Crawler.HIVE_IMAGESIFTBOT, Crawler.HIVE_IMAGESIFTBOT,
// Huawei // Huawei
Crawler.HUAWEI_PETALBOT, Crawler.HUAWEI_PETALBOT,
Crawler.HUAWEI_PANGUBOT, Crawler.HUAWEI_PANGUBOT,
// Hugging Face // Hugging Face
Crawler.HUGGINGFACE_BOT, Crawler.HUGGINGFACE_BOT,
// Kangaroo // Kangaroo
Crawler.KANGAROO_BOT, Crawler.KANGAROO_BOT,
// Mendable.ai // Mendable.ai
Crawler.FIRECRAWL_AGENT, Crawler.FIRECRAWL_AGENT,
// Meta // Meta
Crawler.META_FACEBOOKBOT, Crawler.META_FACEBOOKBOT,
Crawler.META_EXTERNALAGENT, Crawler.META_EXTERNALAGENT,
// OpenAI // OpenAI
Crawler.OPENAI_GPTBOT, Crawler.OPENAI_GPTBOT,
Crawler.OPENAI_SEARCH_BOT, Crawler.OPENAI_SEARCH_BOT,
// Perplexity // Perplexity
Crawler.PERPLEXITY_BOT, Crawler.PERPLEXITY_BOT,
// Replicate // Replicate
Crawler.REPLICATE_BOT, Crawler.REPLICATE_BOT,
// Runpod // Runpod
Crawler.RUNPOD_BOT, Crawler.RUNPOD_BOT,
// SB Intuitions // SB Intuitions
Crawler.SB_INTUITIONS_BOT, Crawler.SB_INTUITIONS_BOT,
// Semrush // Semrush
Crawler.SEMRUSH_BOT_CONTENTSHAKE, Crawler.SEMRUSH_BOT_CONTENTSHAKE,
// Timpi // Timpi
Crawler.TIMPI_BOT, Crawler.TIMPI_BOT,
// Together AI // Together AI
Crawler.TOGETHER_BOT, Crawler.TOGETHER_BOT,
// Velen.io // Velen.io
Crawler.HUNTER_VELENPUBLICWEBCRAWLER, Crawler.HUNTER_VELENPUBLICWEBCRAWLER,
// Vercel // Vercel
Crawler.VERCEL_V0BOT, Crawler.VERCEL_V0BOT,
// Webz.io // Webz.io
Crawler.WEBZIO_OMGILI, Crawler.WEBZIO_OMGILI,
Crawler.WEBZIO_OMGILI_BOT, Crawler.WEBZIO_OMGILI_BOT,
Crawler.WEBZIO_EXTENDED, Crawler.WEBZIO_EXTENDED,
// X // X
Crawler.XAI_BOT, Crawler.XAI_BOT,
// You.com // You.com
Crawler.YOU_BOT, Crawler.YOU_BOT,
// Zhipu AI // Zhipu AI
Crawler.ZHIPU_CHATGLM_SPIDER Crawler.ZHIPU_CHATGLM_SPIDER
]; ]);
const isAICrawler = ua => const isBot = ua => BotTypes.includes(ua);
AICrawlersList const isAIAssistant = ua => AIAssistants.includes(ua);
.map(s=>s.toLowerCase()) const isAICrawler = ua => AICrawlers.includes(ua);
.includes(
(typeof ua === 'string' ?
new UAParser(ua, Crawlers).getBrowser() :
ua.browser
).name?.toLowerCase());
const isBot = ua => module.exports = {
BotTypesList isAIAssistant,
.includes(
(typeof ua === 'string' ?
new UAParser(ua, Bots).getBrowser() :
ua.browser
).type);
module.exports = {
isAICrawler, isAICrawler,
isBot isBot
} }

View File

@@ -14,116 +14,176 @@
import { UAParser } from '../main/ua-parser.mjs'; import { UAParser } from '../main/ua-parser.mjs';
import { Extension, BrowserType } from '../enums/ua-parser-enums.mjs'; import { Extension, BrowserType } from '../enums/ua-parser-enums.mjs';
import { Bots, Crawlers, Fetchers } from '../extensions/ua-parser-extensions.mjs'; import { Bots, Crawlers, Fetchers } from '../extensions/ua-parser-extensions.mjs';
const { Crawler } = Extension.BrowserName; const { Crawler, Fetcher } = Extension.BrowserName;
const BotTypesList = [ class BotList {
constructor(ext, prop, list) {
this.ext = ext;
this.prop = prop;
this.list = list.map(x => x.toLowerCase());
}
includes(ua) {
return this.list.includes(
(typeof ua === 'string' ?
new UAParser(ua, this.ext).getBrowser() :
ua.browser
)[this.prop]?.toLowerCase());
}
}
const BotTypes = new BotList(Bots, 'type', [
BrowserType.CLI, BrowserType.CLI,
BrowserType.CRAWLER, BrowserType.CRAWLER,
BrowserType.FETCHER, BrowserType.FETCHER,
BrowserType.LIBRARY BrowserType.LIBRARY
]; ]);
const AIAssistants = new BotList(Fetchers, 'name', [
// Anthropic
Fetcher.ANTHROPIC_CLAUDE_USER,
// Cohere
Fetcher.COHERE_AI,
// DuckDuckGo
Fetcher.DUCKDUCKGO_ASSISTBOT,
// Google
Fetcher.GOOGLE_GEMINI_DEEP_RESEARCH,
// Mistral AI
Fetcher.MISTRALAI_USER,
// OpenAI
Fetcher.OPENAI_CHATGPT_USER,
// Perplexity
Fetcher.PERPLEXITY_USER
]);
const AICrawlers = new BotList(Crawlers, 'name', [
const AICrawlersList = [
// AI2 // AI2
Crawler.AI2_BOT, Crawler.AI2_BOT,
// Amazon // Amazon
Crawler.AMAZON_BOT, Crawler.AMAZON_BOT,
// Anthropic // Anthropic
Crawler.ANTHROPIC_AI, Crawler.ANTHROPIC_AI,
Crawler.ANTHROPIC_CLAUDE_BOT, Crawler.ANTHROPIC_CLAUDE_BOT,
Crawler.ANTHROPIC_CLAUDE_SEARCHBOT, Crawler.ANTHROPIC_CLAUDE_SEARCHBOT,
Crawler.ANTHROPIC_CLAUDE_WEB, Crawler.ANTHROPIC_CLAUDE_WEB,
// Apple // Apple
Crawler.APPLE_BOT, Crawler.APPLE_BOT,
Crawler.APPLE_BOT_EXTENDED, Crawler.APPLE_BOT_EXTENDED,
// Brave // Brave
Crawler.BRAVE_BOT, Crawler.BRAVE_BOT,
// ByteDance // ByteDance
Crawler.BYTEDANCE_BYTESPIDER, Crawler.BYTEDANCE_BYTESPIDER,
Crawler.BYTEDANCE_TIKTOKSPIDER, Crawler.BYTEDANCE_TIKTOKSPIDER,
// Cohere // Cohere
Crawler.COHERE_TRAINING_DATA_CRAWLER, Crawler.COHERE_TRAINING_DATA_CRAWLER,
// Common Crawl // Common Crawl
Crawler.COMMON_CRAWL_CCBOT, Crawler.COMMON_CRAWL_CCBOT,
// Coveo // Coveo
Crawler.COVEO_BOT, Crawler.COVEO_BOT,
// DataForSeo // DataForSeo
Crawler.DATAFORSEO_BOT, Crawler.DATAFORSEO_BOT,
// DeepSeek // DeepSeek
Crawler.DEEPSEEK_BOT, Crawler.DEEPSEEK_BOT,
// Diffbot // Diffbot
Crawler.DIFFBOT, Crawler.DIFFBOT,
// Google // Google
Crawler.GOOGLE_EXTENDED, Crawler.GOOGLE_EXTENDED,
Crawler.GOOGLE_OTHER, Crawler.GOOGLE_OTHER,
Crawler.GOOGLE_OTHER_IMAGE, Crawler.GOOGLE_OTHER_IMAGE,
Crawler.GOOGLE_OTHER_VIDEO, Crawler.GOOGLE_OTHER_VIDEO,
Crawler.GOOGLE_CLOUDVERTEXBOT, Crawler.GOOGLE_CLOUDVERTEXBOT,
// Hive AI // Hive AI
Crawler.HIVE_IMAGESIFTBOT, Crawler.HIVE_IMAGESIFTBOT,
// Huawei // Huawei
Crawler.HUAWEI_PETALBOT, Crawler.HUAWEI_PETALBOT,
Crawler.HUAWEI_PANGUBOT, Crawler.HUAWEI_PANGUBOT,
// Hugging Face // Hugging Face
Crawler.HUGGINGFACE_BOT, Crawler.HUGGINGFACE_BOT,
// Kangaroo // Kangaroo
Crawler.KANGAROO_BOT, Crawler.KANGAROO_BOT,
// Mendable.ai // Mendable.ai
Crawler.FIRECRAWL_AGENT, Crawler.FIRECRAWL_AGENT,
// Meta // Meta
Crawler.META_FACEBOOKBOT, Crawler.META_FACEBOOKBOT,
Crawler.META_EXTERNALAGENT, Crawler.META_EXTERNALAGENT,
// OpenAI // OpenAI
Crawler.OPENAI_GPTBOT, Crawler.OPENAI_GPTBOT,
Crawler.OPENAI_SEARCH_BOT, Crawler.OPENAI_SEARCH_BOT,
// Perplexity // Perplexity
Crawler.PERPLEXITY_BOT, Crawler.PERPLEXITY_BOT,
// Replicate // Replicate
Crawler.REPLICATE_BOT, Crawler.REPLICATE_BOT,
// Runpod // Runpod
Crawler.RUNPOD_BOT, Crawler.RUNPOD_BOT,
// SB Intuitions // SB Intuitions
Crawler.SB_INTUITIONS_BOT, Crawler.SB_INTUITIONS_BOT,
// Semrush // Semrush
Crawler.SEMRUSH_BOT_CONTENTSHAKE, Crawler.SEMRUSH_BOT_CONTENTSHAKE,
// Timpi // Timpi
Crawler.TIMPI_BOT, Crawler.TIMPI_BOT,
// Together AI // Together AI
Crawler.TOGETHER_BOT, Crawler.TOGETHER_BOT,
// Velen.io // Velen.io
Crawler.HUNTER_VELENPUBLICWEBCRAWLER, Crawler.HUNTER_VELENPUBLICWEBCRAWLER,
// Vercel // Vercel
Crawler.VERCEL_V0BOT, Crawler.VERCEL_V0BOT,
// Webz.io // Webz.io
Crawler.WEBZIO_OMGILI, Crawler.WEBZIO_OMGILI,
Crawler.WEBZIO_OMGILI_BOT, Crawler.WEBZIO_OMGILI_BOT,
Crawler.WEBZIO_EXTENDED, Crawler.WEBZIO_EXTENDED,
// X // X
Crawler.XAI_BOT, Crawler.XAI_BOT,
// You.com // You.com
Crawler.YOU_BOT, Crawler.YOU_BOT,
// Zhipu AI // Zhipu AI
Crawler.ZHIPU_CHATGLM_SPIDER Crawler.ZHIPU_CHATGLM_SPIDER
]; ]);
const isAICrawler = ua => const isBot = ua => BotTypes.includes(ua);
AICrawlersList const isAIAssistant = ua => AIAssistants.includes(ua);
.map(s=>s.toLowerCase()) const isAICrawler = ua => AICrawlers.includes(ua);
.includes(
(typeof ua === 'string' ?
new UAParser(ua, Crawlers).getBrowser() :
ua.browser
).name?.toLowerCase());
const isBot = ua => export {
BotTypesList isAIAssistant,
.includes(
(typeof ua === 'string' ?
new UAParser(ua, Bots).getBrowser() :
ua.browser
).type);
export {
isAICrawler, isAICrawler,
isBot isBot
} }