[submodule:extensions] Add new bots: AI2Bot, aiHitBot, anthropic-ai, cohere-ai, Diffbot, ImagesiftBot, magpie-crawler, Omgilibot, Screaming Frog SEO Spider, Seznambot, Teoma, Timpibot, VelenPublicWebCrawler, Vercelbot, Webzio-Extended, YouBot

This commit is contained in:
Faisal Salman
2024-11-16 18:40:02 +07:00
parent 2181559b01
commit 2b125c8de4
3 changed files with 141 additions and 8 deletions

View File

@@ -49,6 +49,26 @@
"type" : "crawler"
}
},
{
"desc" : "AI2Bot",
"ua" : "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)",
"expect" :
{
"name" : "AI2Bot",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "aiHitBot",
"ua" : "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)",
"expect" :
{
"name" : "aiHitBot",
"version" : "2.9",
"type" : "crawler"
}
},
{
"desc" : "Applebot",
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)",
@@ -131,7 +151,7 @@
},
{
"desc" : "DataForSEO",
"ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot) ",
"ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)",
"expect" :
{
"name" : "DataForSeoBot",
@@ -139,6 +159,16 @@
"type" : "crawler"
}
},
{
"desc" : "Diffbot",
"ua" : "Diffbot/0.1",
"expect" :
{
"name" : "Diffbot",
"version" : "0.1",
"type" : "crawler"
}
},
{
"desc" : "Dotbot",
"ua" : "Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)",
@@ -329,6 +359,26 @@
"type" : "crawler"
}
},
{
"desc" : "ImagesiftBot",
"ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)",
"expect" :
{
"name" : "ImagesiftBot",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "magpie-crawler",
"ua" : "magpie-crawler/1.1 (robots-txt-checker; +http://www.brandwatch.net)",
"expect" :
{
"name" : "magpie-crawler",
"version" : "1.1",
"type" : "crawler"
}
},
{
"desc" : "Meta-ExternalAgent",
"ua" : "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
@@ -360,6 +410,26 @@
"type" : "crawler"
}
},
{
"desc" : "Omgili",
"ua" : "omgili/0.5 +https://omgili.com",
"expect" :
{
"name" : "omgili",
"version" : "0.5",
"type" : "crawler"
}
},
{
"desc" : "Omgilibot",
"ua" : "omgilibot/0.3 +http://www.omgili.com/Crawler.html",
"expect" :
{
"name" : "omgilibot",
"version" : "0.3",
"type" : "crawler"
}
},
{
"desc" : "OpenAI Search",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot",
@@ -410,6 +480,36 @@
"type" : "crawler"
}
},
{
"desc" : "SeznamBot",
"ua" : "Mozilla/5.0 (compatible; SeznamBot/4.0-RC1; +http://napoveda.seznam.cz/seznambot-intro/)",
"expect" :
{
"name" : "SeznamBot",
"version" : "4.0-RC1",
"type" : "crawler"
}
},
{
"desc" : "Teoma",
"ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",
"expect" :
{
"name" : "Teoma",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Timpibot",
"ua" : "Timpibot/0.8 (+http://www.timpi.io)",
"expect" :
{
"name" : "Timpibot",
"version" : "0.8",
"type" : "crawler"
}
},
{
"desc" : "TurnitinBot",
"ua" : "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)",
@@ -420,6 +520,16 @@
"type" : "crawler"
}
},
{
"desc" : "VelenPublicWebCrawler",
"ua" : "Mozilla/5.0 (compatible; VelenPublicWebCrawler/1.0; +https://velen.io)",
"expect" :
{
"name" : "VelenPublicWebCrawler",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "Yahoo! Japan",
"ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)",
@@ -469,5 +579,15 @@
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "YouBot",
"ua" : "YouBot (+http://www.you.com)",
"expect" :
{
"name" : "YouBot",
"version" : "undefined",
"type" : "crawler"
}
}
]

View File

@@ -118,5 +118,15 @@
"version" : "2.0",
"type" : "fetcher"
}
},
{
"desc" : "Vercelbot",
"ua" : "Vercelbot (+https://vercel.com)",
"expect" :
{
"name" : "Vercelbot",
"version" : "undefined",
"type" : "fetcher"
}
}
]