[extensions] Add new crawlers: Algolia, Baidu, BLEXBot, Botify, Freespoke, Marginalia, MSNBot, OnCrawl, SeekportBot, Siteimprove, TwinAgent, YepBot, ZumBot

This commit is contained in:
Faisal Salman
2025-08-21 21:40:50 +07:00
parent 9003fe5724
commit c9badeb345
2 changed files with 155 additions and 8 deletions

View File

@@ -89,6 +89,16 @@
"type" : "crawler"
}
},
{
"desc" : "Algolia Crawler Renderscript",
"ua" : "Algolia Crawler Renderscript",
"expect" :
{
"name" : "Algolia Crawler Renderscript",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Applebot",
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)",
@@ -149,6 +159,16 @@
"type" : "crawler"
}
},
{
"desc" : "Baidu ADS",
"ua" : "Baidu-ADS",
"expect" :
{
"name" : "Baidu-ADS",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider",
"ua" : "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
@@ -239,6 +259,26 @@
"type" : "crawler"
}
},
{
"desc" : "BLEXBot",
"ua" : "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)",
"expect" :
{
"name" : "BLEXBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "botify",
"ua" : "Desktop: Mozilla/5.0 (compatible; botify; http://botify.com)",
"expect" :
{
"name" : "botify",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Bravebot",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Bravebot/1.0; +https://search.brave.com/help/brave-search-crawler) Chrome/W.X.Y.Z Safari/537.36",
@@ -519,6 +559,16 @@
"type" : "crawler"
}
},
{
"desc" : "Freespoke",
"ua" : "Mozilla/5.0 (compatible; Freespoke/2.0; +https://docs.freespoke.com/search/bot)",
"expect" :
{
"name" : "Freespoke",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "Googlebot-Video",
"ua" : "Googlebot-Video/1.0",
@@ -719,6 +769,16 @@
"type" : "crawler"
}
},
{
"desc" : "Marginalia Search",
"ua" : "search.marginalia.nu",
"expect" :
{
"name" : "marginalia",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Meta-ExternalAgent",
"ua" : "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
@@ -750,6 +810,16 @@
"type" : "crawler"
}
},
{
"desc" : "msnbot",
"ua" : "msnbot/2.0b (+http://search.msn.com/msnbot.htm)",
"expect" :
{
"name" : "msnbot",
"version" : "2.0b",
"type" : "crawler"
}
},
{
"desc" : "Omgili",
"ua" : "omgili/0.5 +https://omgili.com",
@@ -770,6 +840,16 @@
"type" : "crawler"
}
},
{
"desc" : "OnCrawl",
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; OnCrawl Mobile/1.0; +http://www.oncrawl.com/)",
"expect" :
{
"name" : "OnCrawl",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Onespot",
"ua" : "Mozilla/5.0 (compatible; Onespot-ScraperBot/1.0; +https://www.onespot.com/identifying-traffic.html)",
@@ -880,6 +960,16 @@
"type" : "crawler"
}
},
{
"desc" : "SeekportBot",
"ua" : "Mozilla/5.0 (compatible; SeekportBot; +https://bot.seekport.com)",
"expect" :
{
"name" : "SeekportBot",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "SemrushBot",
"ua" : "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)",
@@ -931,7 +1021,27 @@
}
},
{
"desc" : "Sogou",
"desc" : "Siteimprove",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; SiteCheck-sitecrawl by Siteimprove.com; +https://siteimprove.com/bots) Chrome/[VERSION] Safari/537.36",
"expect" :
{
"name" : "Siteimprove",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Sogou Pic Spider",
"ua" : "Sogou Pic Spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07)",
"expect" :
{
"name" : "Sogou Pic Spider",
"version" : "3.0",
"type" : "crawler"
}
},
{
"desc" : "Sogou web spider",
"ua" : "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
"expect" :
{
@@ -990,6 +1100,16 @@
"type" : "crawler"
}
},
{
"desc" : "TwinAgent",
"ua" : "TwinAgent/1.0",
"expect" :
{
"name" : "TwinAgent",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "xAI-Bot",
"ua" : "Mozilla/5.0 (compatible; xAI-Bot/1.0; +https://x.ai/)",
@@ -1050,6 +1170,16 @@
"type" : "crawler"
}
},
{
"desc" : "YepBot",
"ua" : "Mozilla/5.0 (compatible; YepBot/1.0; +http://yep.com/yepbot/)",
"expect" :
{
"name" : "YepBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Yeti",
"ua" : "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/spd)",
@@ -1089,5 +1219,15 @@
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "ZumBot",
"ua" : "Mozilla/5.0 (compatible; ZumBot/1.0; http://help.zum.com/inquiry)",
"expect" :
{
"name" : "ZumBot",
"version" : "1.0",
"type" : "crawler"
}
}
]