[extensions] Add new crawler bots: ChatGLM, Onespot, Startpage

This commit is contained in:
Faisal Salman 2025-06-03 11:03:45 +07:00
parent a3549efc22
commit 72d0c2acb3
3 changed files with 37 additions and 3 deletions

View File

@ -52,10 +52,11 @@ const Crawlers = Object.freeze({
// LinkedInBot - http://www.linkedin.com // LinkedInBot - http://www.linkedin.com
// MJ12bot - https://mj12bot.com/ // MJ12bot - https://mj12bot.com/
// MojeekBot - https://www.mojeek.com/bot.html // MojeekBot - https://www.mojeek.com/bot.html
// Onespot - https://www.onespot.com/identifying-traffic.html
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots // OpenAI's SearchGPT - https://platform.openai.com/docs/bots
// PerplexityBot - https://perplexity.ai/perplexitybot // PerplexityBot - https://perplexity.ai/perplexitybot
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro // SeznamBot - http://napoveda.seznam.cz/seznambot-intro
/((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, /((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
// Applebot - http://apple.com/go/applebot // Applebot - http://apple.com/go/applebot
/(applebot(?:-extended)?)\/?([\w\.]*)/i, /(applebot(?:-extended)?)\/?([\w\.]*)/i,
@ -100,8 +101,8 @@ const Crawlers = Object.freeze({
// Yeti (Naver) // Yeti (Naver)
/(yeti)\/([\w\.]+)/i, /(yeti)\/([\w\.]+)/i,
// aiHitBot / Diffbot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot // aiHitBot / Diffbot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / Webzio-Extended / Screaming Frog SEO Spider / Startpage / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
/((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|openai image downloader|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |line|yisou)spider)\/?([\w\.]*)/i /((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|openai image downloader|(?:magpie-|velenpublicweb)crawler|startpageprivateimageproxy|webzio-extended|(?:chatglm-|line|screaming frog seo |yisou)spider)\/?([\w\.]*)/i
], ],
[NAME, VERSION, [TYPE, CRAWLER]], [NAME, VERSION, [TYPE, CRAWLER]],

View File

@ -110,6 +110,9 @@ const isAIBot = (resultOrUA) => [
// You.com // You.com
'youbot', 'youbot',
// Zhipu AI
'chatglm-spider',
// Zyte // Zyte
'scrapy' 'scrapy'

View File

@ -259,6 +259,16 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "ChatGLM-Spider",
"ua" : "Mozilla/5.0 (compatible; ChatGLM-Spider/1.0; +https://chatglm.cn/)",
"expect" :
{
"name" : "ChatGLM-Spider",
"version" : "1.0",
"type" : "crawler"
}
},
{ {
"desc" : "Coc Coc Bot (web)", "desc" : "Coc Coc Bot (web)",
"ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", "ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)",
@ -620,6 +630,16 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "Onespot",
"ua" : "Mozilla/5.0 (compatible; Onespot-ScraperBot/1.0; +https://www.onespot.com/identifying-traffic.html)",
"expect" :
{
"name" : "Onespot-ScraperBot",
"version" : "1.0",
"type" : "crawler"
}
},
{ {
"desc" : "OpenAI Search", "desc" : "OpenAI Search",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot",
@ -750,6 +770,16 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "Startpage",
"ua" : "StartpagePrivateImageProxy/3.0 (https://www.startpage.com/; support@startpage.com) aiohttp.client/3.11.11",
"expect" :
{
"name" : "StartpagePrivateImageProxy",
"version" : "3.0",
"type" : "crawler"
}
},
{ {
"desc" : "Teoma", "desc" : "Teoma",
"ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)", "ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",