[extensions][helpers] Add some new AI bots: Bravebot, Cotoyogi, FirecrawlAgent, HuggingFace-Bot, Kangaroo Bot, PanguBot, Replicate-Bot, RunPod-Bot, Together-Bot, xAI-Bot

This commit is contained in:
Faisal Salman
2025-08-14 20:42:17 +07:00
parent 647b6232bd
commit 975c4860f4
3 changed files with 131 additions and 3 deletions

View File

@@ -229,6 +229,16 @@
"type" : "crawler"
}
},
{
"desc" : "Bravebot",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Bravebot/1.0; +https://search.brave.com/help/brave-search-crawler) Chrome/W.X.Y.Z Safari/537.36",
"expect" :
{
"name" : "Bravebot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Bytespider",
"ua" : "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.1511.1269 Mobile Safari/537.36; Bytespider",
@@ -309,6 +319,16 @@
"type" : "crawler"
}
},
{
"desc" : "Cotoyogi",
"ua" : "Mozilla/5.0 (compatible; Cotoyogi/4.0; +https://ds.rois.ac.jp/center8/crawler/)",
"expect" :
{
"name" : "Cotoyogi",
"version" : "4.0",
"type" : "crawler"
}
},
{
"desc" : "Coveobot",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) (compatible; Coveobot/2.0;+http://www.coveo.com/bot.html)",
@@ -469,6 +489,16 @@
"type" : "crawler"
}
},
{
"desc" : "FirecrawlAgent",
"ua" : "Mozilla/5.0 (compatible; FirecrawlAgent/1.0)",
"expect" :
{
"name" : "FirecrawlAgent",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Googlebot-Video",
"ua" : "Googlebot-Video/1.0",
@@ -589,6 +619,16 @@
"type" : "crawler"
}
},
{
"desc" : "HuggingFace-Bot",
"ua" : "Mozilla/5.0 (compatible; HuggingFace-Bot/1.0; +https://huggingface.co/)",
"expect" :
{
"name" : "HuggingFace-Bot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "iAskBot",
"ua" : "Mozilla/5.0 AppleWebKit/605.1.15 (KHTML, like Gecko; compatible; iAskBot/1.0; +https://iask.ai/) Chrome/120.0.6099.119 Safari/605.1.15",
@@ -609,6 +649,16 @@
"type" : "crawler"
}
},
{
"desc" : "Kangaroo Bot",
"ua" : "Mozilla/5.0 (compatible; Kangaroo Bot/1.0)",
"expect" :
{
"name" : "Kangaroo Bot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Linespider",
"ua" : "Mozilla/5.0 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)",
@@ -710,6 +760,16 @@
"type" : "crawler"
}
},
{
"desc" : "PanguBot",
"ua" : "Mozilla/5.0 (compatible; PanguBot/1.0)",
"expect" :
{
"name" : "PanguBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "PerplexityBot",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)",
@@ -770,6 +830,26 @@
"type" : "crawler"
}
},
{
"desc" : "Replicate-Bot",
"ua" : "Mozilla/5.0 (compatible; Replicate-Bot/1.0; +https://replicate.com/)",
"expect" :
{
"name" : "Replicate-Bot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "RunPod-Bot",
"ua" : "Mozilla/5.0 (compatible; RunPod-Bot/1.0; +https://runpod.io/)",
"expect" :
{
"name" : "RunPod-Bot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "SemrushBot",
"ua" : "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)",
@@ -860,6 +940,16 @@
"type" : "crawler"
}
},
{
"desc" : "Together-Bot",
"ua" : "Mozilla/5.0 (compatible; Together-Bot/1.0; +https://together.ai/)",
"expect" :
{
"name" : "Together-Bot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "TurnitinBot",
"ua" : "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)",
@@ -870,6 +960,16 @@
"type" : "crawler"
}
},
{
"desc" : "xAI-Bot",
"ua" : "Mozilla/5.0 (compatible; xAI-Bot/1.0; +https://x.ai/)",
"expect" :
{
"name" : "xAI-Bot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "VelenPublicWebCrawler",
"ua" : "Mozilla/5.0 (compatible; VelenPublicWebCrawler/1.0; +https://velen.io)",