mirror of
https://github.com/faisalman/ua-parser-js.git
synced 2025-09-27 07:58:45 +03:00
[extensions][helpers] Add new bots: cohere-training-data-crawler, Gemini-Deep-Research, kakaotalk-scrap, TikTokSpider
This commit is contained in:
parent
74ef71cf63
commit
95485f7b5d
@ -58,7 +58,7 @@ const Crawlers = Object.freeze({
|
|||||||
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
|
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
|
||||||
// PerplexityBot - https://perplexity.ai/perplexitybot
|
// PerplexityBot - https://perplexity.ai/perplexitybot
|
||||||
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
|
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
|
||||||
/((?:adidx|ahrefs|amazon|bing|cc|coveo|criteo|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
|
/((?:adidx|ahrefs|amazon|bing|cc|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
|
||||||
|
|
||||||
// Applebot - http://apple.com/go/applebot
|
// Applebot - http://apple.com/go/applebot
|
||||||
/(applebot(?:-extended)?)\/?([\w\.]*)/i,
|
/(applebot(?:-extended)?)\/?([\w\.]*)/i,
|
||||||
@ -116,12 +116,13 @@ const Crawlers = Object.freeze({
|
|||||||
// AI2Bot - https://allenai.org/crawler
|
// AI2Bot - https://allenai.org/crawler
|
||||||
// Bytespider
|
// Bytespider
|
||||||
// DataForSeoBot - https://dataforseo.com/dataforseo-bot
|
// DataForSeoBot - https://dataforseo.com/dataforseo-bot
|
||||||
|
// DeepSeekBot
|
||||||
// Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot
|
// Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot
|
||||||
// ImagesiftBot - https://imagesift.com/about
|
// ImagesiftBot - https://imagesift.com/about
|
||||||
// Qihoo 360Spider
|
// Qihoo 360Spider
|
||||||
// TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html
|
// TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html
|
||||||
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
|
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
|
||||||
/\b(360spider-?(?:image|video)?|bytespider|(?:ai2|aspiegel|dataforseo|imagesift|petal|turnitin)bot|teoma|yahoo! slurp)/i
|
/\b(360spider-?(?:image|video)?|bytespider|cohere-training-data-crawler|elastic(?=\/s)|(?:ai2|aspiegel|dataforseo|deepseek|imagesift|petal|turnitin)bot|teoma|yahoo! slurp)/i
|
||||||
],
|
],
|
||||||
[NAME, [TYPE, CRAWLER]]
|
[NAME, [TYPE, CRAWLER]]
|
||||||
]
|
]
|
||||||
@ -236,17 +237,17 @@ const Emails = Object.freeze({
|
|||||||
const Fetchers = Object.freeze({
|
const Fetchers = Object.freeze({
|
||||||
browser : [
|
browser : [
|
||||||
[
|
[
|
||||||
|
// Asana / Bitlybot / Better Uptime / BingPreview / Blueno / kakaotalk-scrap / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
|
||||||
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
|
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
|
||||||
// Buffer Link Preview Bot - https://scraper.buffer.com/about/bots/link-preview-bot
|
// Buffer Link Preview Bot - https://scraper.buffer.com/about/bots/link-preview-bot
|
||||||
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot
|
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot
|
||||||
// DuckAssistBot - https://duckduckgo.com/duckassistbot/
|
// DuckAssistBot - https://duckduckgo.com/duckassistbot/
|
||||||
// Better Uptime / BingPreview / Blueno / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
|
|
||||||
// Google Site Verifier / Meta / Yahoo! Japan
|
// Google Site Verifier / Meta / Yahoo! Japan
|
||||||
// Iframely - https://iframely.com/docs/about
|
// Iframely - https://iframely.com/docs/about
|
||||||
// Perplexity-User - https://docs.perplexity.ai/guides/bots
|
// Perplexity-User - https://docs.perplexity.ai/guides/bots
|
||||||
// MistralAI-User - https://docs.mistral.ai/robots/
|
// MistralAI-User - https://docs.mistral.ai/robots/
|
||||||
// Yandex Bots - https://yandex.com/bots
|
// Yandex Bots - https://yandex.com/bots
|
||||||
/(ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|mastodon|(?:bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|iframely|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
|
/(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
|
||||||
|
|
||||||
// Bluesky
|
// Bluesky
|
||||||
/(bluesky) cardyb\/([\w\.]+)/i,
|
/(bluesky) cardyb\/([\w\.]+)/i,
|
||||||
@ -263,8 +264,8 @@ const Fetchers = Object.freeze({
|
|||||||
[NAME, VERSION, [TYPE, FETCHER]],
|
[NAME, VERSION, [TYPE, FETCHER]],
|
||||||
|
|
||||||
[
|
[
|
||||||
// Google Bots / Cohere / Snapchat / Vercelbot / Yandex Bots
|
// Google Bots / Chrome-Lighthouse / Cohere / Gemini-Deep-Research / Snapchat / TikTokSpider / Vercelbot / Yandex Bots
|
||||||
/((?:better uptime |telegram|vercel)bot|cohere-ai|feedfetcher-google|google(?:imageproxy|-read-aloud|-pagerenderer|producer)|snap url preview|yandex(?:sitelinks|userproxy))/i
|
/((?:better uptime |telegram|vercel)bot|chrome-lighthouse|cohere-ai|feedfetcher-google|gemini-deep-research|google(?:imageproxy|-read-aloud|-pagerenderer|producer)|snap url preview|tiktokspider|yandex(?:sitelinks|userproxy))/i
|
||||||
],
|
],
|
||||||
[NAME, [TYPE, FETCHER]],
|
[NAME, [TYPE, FETCHER]],
|
||||||
],
|
],
|
||||||
|
@ -62,6 +62,9 @@ const isAIBot = (resultOrUA) => [
|
|||||||
// ByteDance
|
// ByteDance
|
||||||
'bytespider',
|
'bytespider',
|
||||||
|
|
||||||
|
// Cohere
|
||||||
|
'cohere-training-data-crawler',
|
||||||
|
|
||||||
// Common Crawl
|
// Common Crawl
|
||||||
'ccbot',
|
'ccbot',
|
||||||
|
|
||||||
|
@ -299,6 +299,16 @@
|
|||||||
"type" : "crawler"
|
"type" : "crawler"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"desc" : "cohere-training-data-crawler",
|
||||||
|
"ua" : "cohere-training-data-crawler (+crawler@cohere.ai)",
|
||||||
|
"expect" :
|
||||||
|
{
|
||||||
|
"name" : "cohere-training-data-crawler",
|
||||||
|
"version" : "undefined",
|
||||||
|
"type" : "crawler"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"desc" : "Coveobot",
|
"desc" : "Coveobot",
|
||||||
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) (compatible; Coveobot/2.0;+http://www.coveo.com/bot.html)",
|
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) (compatible; Coveobot/2.0;+http://www.coveo.com/bot.html)",
|
||||||
|
@ -109,6 +109,16 @@
|
|||||||
"type" : "fetcher"
|
"type" : "fetcher"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"desc" : "Gemini-Deep-Research",
|
||||||
|
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Gemini-Deep-Research; +https://gemini.google/overview/deep-research/) Chrome/135.0.0.0 Safari/537.36",
|
||||||
|
"expect" :
|
||||||
|
{
|
||||||
|
"name" : "Gemini-Deep-Research",
|
||||||
|
"version" : "undefined",
|
||||||
|
"type" : "fetcher"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"desc" : "Google FeedFetcher",
|
"desc" : "Google FeedFetcher",
|
||||||
"ua" : "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)",
|
"ua" : "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)",
|
||||||
@ -189,6 +199,16 @@
|
|||||||
"type" : "fetcher"
|
"type" : "fetcher"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"desc" : "kakaotalk-scrap",
|
||||||
|
"ua" : "facebookexternalhit/1.1; kakaotalk-scrap/1.0; +https://devtalk.kakao.com/t/scrap/33984",
|
||||||
|
"expect" :
|
||||||
|
{
|
||||||
|
"name" : "kakaotalk-scrap",
|
||||||
|
"version" : "1.0",
|
||||||
|
"type" : "fetcher"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"desc" : "Meta-ExternalFetcher",
|
"desc" : "Meta-ExternalFetcher",
|
||||||
"ua" : "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
|
"ua" : "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
|
||||||
@ -289,6 +309,16 @@
|
|||||||
"type" : "fetcher"
|
"type" : "fetcher"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"desc" : "TikTokSpider",
|
||||||
|
"ua" : "Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; TikTokSpider; ttspider-feedback@tiktok.com)",
|
||||||
|
"expect" :
|
||||||
|
{
|
||||||
|
"name" : "TikTokSpider",
|
||||||
|
"version" : "undefined",
|
||||||
|
"type" : "fetcher"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"desc" : "UptimeRobot",
|
"desc" : "UptimeRobot",
|
||||||
"ua" : "Mozilla/5.0 (compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)",
|
"ua" : "Mozilla/5.0 (compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user