[extension][bot] Add AhrefsBot, AhrefsSiteAudit, Dotbot, Rogerbot, Uptimerobot, Coc Coc Bot

This commit is contained in:
Faisal Salman 2024-08-19 11:10:15 +07:00
parent b8d823dd57
commit b1dae13245
3 changed files with 79 additions and 3 deletions

View File

@ -40,15 +40,17 @@ const CLIs = Object.freeze({
const Crawlers = Object.freeze({ const Crawlers = Object.freeze({
browser : [ browser : [
[ [
// AhrefsBot - https://ahrefs.com/robot
// Amazonbot - https://developer.amazon.com/amazonbot // Amazonbot - https://developer.amazon.com/amazonbot
// Applebot - http://apple.com/go/applebot // Applebot - http://apple.com/go/applebot
// Bingbot - http://www.bing.com/bingbot.htm // Bingbot - http://www.bing.com/bingbot.htm
// Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html // DuckDuckBot - http://duckduckgo.com/duckduckbot.html
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/ // FacebookBot - https://developers.facebook.com/docs/sharing/bot/
// GPTBot - https://platform.openai.com/docs/gptbot // GPTBot - https://platform.openai.com/docs/gptbot
// MJ12bot - https://mj12bot.com/ // MJ12bot - https://mj12bot.com/
// SemrushBot - http://www.semrush.com/bot.html // SemrushBot - http://www.semrush.com/bot.html
/((?:amazon|apple|bing|duckduck|facebook|gpt|mj12|semrush)bot)\/([\w\.]+)/i, /((?:ahrefs|amazon|apple|bing|dot|duckduck|facebook|gpt|mj12|semrush)bot)\/([\w\.]+)/i,
// Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001 // Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001
/(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i, /(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i,
@ -56,6 +58,9 @@ const Crawlers = Object.freeze({
// ClaudeBot // ClaudeBot
/(claude(?:bot|-web))\/([\w\.]+)/i, /(claude(?:bot|-web))\/([\w\.]+)/i,
// Coc Coc Bot - https://help.coccoc.com/en/search-engine
/(coccocbot-(?:image|web))\/([\w\.]+)/i,
// Googlebot - http://www.google.com/bot.html // Googlebot - http://www.google.com/bot.html
/(google(?:bot|other)(?:-image|-video|-news|-extended)?|(?:storebot-)?google(?:-inspectiontool)?)\/?([\w\.]*)/i, /(google(?:bot|other)(?:-image|-video|-news|-extended)?|(?:storebot-)?google(?:-inspectiontool)?)\/?([\w\.]*)/i,
@ -176,9 +181,10 @@ const Emails = Object.freeze({
const Fetchers = Object.freeze({ const Fetchers = Object.freeze({
browser : [ browser : [
[ [
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot // ChatGPT-User - https://platform.openai.com/docs/plugins/bot
// BingPreview / Mastodon / Pinterestbot / Redditbot / Telegrambot / Twitterbot // BingPreview / Mastodon / Pinterestbot / Redditbot / Rogerbot / Telegrambot / Twitterbot / UptimeRobot
/(bingpreview|chatgpt-user|mastodon|(?:discord|linkedin|pinterest|reddit|telegram|twitter)bot)\/([\w\.]+)/i, /(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|linkedin|pinterest|reddit|roger|telegram|twitter|uptimero)bot)\/([\w\.]+)/i,
// Slackbot - https://api.slack.com/robots // Slackbot - https://api.slack.com/robots
/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i, /(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i,

View File

@ -1,4 +1,14 @@
[ [
{
"desc" : "AhrefsBot",
"ua" : "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)",
"expect" :
{
"name" : "AhrefsBot",
"version" : "7.0",
"type" : "crawler"
}
},
{ {
"desc" : "Applebot", "desc" : "Applebot",
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)", "ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)",
@ -39,6 +49,26 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "Coc Coc Bot (web)",
"ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)",
"expect" :
{
"name" : "coccocbot-web",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Coc Coc Bot (image)",
"ua" : "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)",
"expect" :
{
"name" : "coccocbot-image",
"version" : "1.0",
"type" : "crawler"
}
},
{ {
"desc" : "ClaudeWeb", "desc" : "ClaudeWeb",
"ua" : "Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)", "ua" : "Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)",
@ -49,6 +79,16 @@
"type" : "crawler" "type" : "crawler"
} }
}, },
{
"desc" : "Dotbot",
"ua" : "Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)",
"expect" :
{
"name" : "DotBot",
"version" : "1.2",
"type" : "crawler"
}
},
{ {
"desc" : "FacebookBot", "desc" : "FacebookBot",
"ua" : "Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/webmasters/facebookbot/", "ua" : "Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/webmasters/facebookbot/",

View File

@ -1,4 +1,14 @@
[ [
{
"desc" : "AhrefsSiteAudit",
"ua" : "Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.128 Mobile Safari/537.36 (compatible; AhrefsSiteAudit/6.1; +http://ahrefs.com/robot/site-audit)",
"expect" :
{
"name" : "AhrefsSiteAudit",
"version" : "6.1",
"type" : "fetcher"
}
},
{ {
"desc" : "BingPreview", "desc" : "BingPreview",
"ua" : "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b", "ua" : "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b",
@ -18,5 +28,25 @@
"version" : "1.0", "version" : "1.0",
"type" : "fetcher" "type" : "fetcher"
} }
},
{
"desc" : "Rogerbot",
"ua" : "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)",
"expect" :
{
"name" : "rogerBot",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "UptimeRobot",
"ua" : "Mozilla/5.0 (compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)",
"expect" :
{
"name" : "UptimeRobot",
"version" : "2.0",
"type" : "fetcher"
}
} }
] ]