[extensions] Add new crawler: Qwantbot-news, SurdotlyBot, Swiftbot

This commit is contained in:
Faisal Salman
2025-10-01 12:19:46 +07:00
parent bd6bb216bd
commit b3281b7c12
3 changed files with 28 additions and 3 deletions

View File

@@ -486,6 +486,7 @@ const Extension = Object.freeze({
DUCKDUCKGO_BOT: 'DuckDuckBot',
DUCKDUCKGO_FAVICONS_BOT: 'DuckDuckGo-Favicons-Bot',
ELASTIC: 'Elastic',
ELASTIC_SWIFTYPE_BOT: 'Swiftbot',
EXALEAD_EXABOT: 'Exabot',
FIRECRAWL_AGENT: 'FirecrawlAgent',
FREESPOKE: 'Freespoke',
@@ -535,6 +536,7 @@ const Extension = Object.freeze({
PERPLEXITY_BOT: 'PerplexityBot',
QIHOO_360_SPIDER: '360Spider',
QWANT_BOT: 'Qwantbot',
QWANT_BOT_NEWS: 'Qwantbot-news',
REPLICATE_BOT: 'Replicate-Bot',
RUNPOD_BOT: 'RunPod-Bot',
SB_INTUITIONS_BOT: 'SBIntuitionsBot',
@@ -548,6 +550,7 @@ const Extension = Object.freeze({
SOGOU_PIC_SPIDER: 'Sogou Pic Spider',
SOGOU_WEB_SPIDER: 'Sogou web spider',
STARTPAGE: 'Startpage',
SURLY_BOT: 'SurdotlyBot',
TIMPI_BOT: 'Timpibot',
TOGETHER_BOT: 'Together-Bot',
TURNITIN_BOT: 'TurnitinBot',

View File

@@ -63,8 +63,10 @@ const Crawlers = Object.freeze({
// PerplexityBot - https://perplexity.ai/perplexitybot
// SBIntuitionsBot - https://www.sbintuitions.co.jp/bot/
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
// SurdotlyBot - http://sur.ly/bot.html
// Swiftbot - https://swiftype.com/swiftbot
// YepBot - https://yep.com/yepbot/
/((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|sbintuitions|semrush|seznam|yep)bot)\/([\w\.-]+)/i,
/((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|sbintuitions|semrush|seznam|surdotly|swift|yep)bot)\/([\w\.-]+)/i,
// Algolia Crawler
/(algolia crawler(?: renderscript)?)\/?([\w\.]*)/i,
@@ -98,7 +100,7 @@ const Crawlers = Object.freeze({
/(oncrawl) mobile\/([\w\.]+)/i,
// Qwantbot - https://help.qwant.com/bot
/(qwantbot)[-\w]*\/?([\w\.]*)/i,
/(qwantbot(?:-news)?)[-\w]*\/?([\w\.]*)/i,
// SemrushBot - http://www.semrush.com/bot.html
/((?:semrush|splitsignal)bot[-abcfimostw]*)\/?([\w\.-]*)/i,

View File

@@ -965,7 +965,7 @@
"ua" : "Mozilla/5.0 (compatible; Qwantbot-news/2.0; +https://help.qwant.com/bot/)",
"expect" :
{
"name" : "Qwantbot",
"name" : "Qwantbot-news",
"version" : "2.0",
"type" : "crawler"
}
@@ -1100,6 +1100,26 @@
"type" : "crawler"
}
},
{
"desc" : "SurdotlyBot",
"ua" : "Mozilla/5.0 (compatible; SurdotlyBot/1.0; +http://sur.ly/bot.html)",
"expect" :
{
"name" : "SurdotlyBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Swiftbot",
"ua" : "Swiftbot/1.0 (swiftype.com)",
"expect" :
{
"name" : "Swiftbot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Teoma",
"ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",