[extensions] Add bots: AdIdxBot, Linespider, LinkedInBot, MicrosoftPreview, OpenAI Image Downloader

This commit is contained in:
Faisal Salman
2025-03-07 00:13:37 +07:00
parent 8a05328ce0
commit a9b31c0f7b
3 changed files with 210 additions and 29 deletions

View File

@@ -9,6 +9,16 @@
"type" : "crawler"
}
},
{
"desc" : "AdIdxBot",
"ua" : "Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)",
"expect" :
{
"name" : "adidxbot",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "AdsBot Mobile Web",
"ua" : "AdsBot-Google (+http://www.google.com/adsbot.html)",
@@ -79,6 +89,16 @@
"type" : "crawler"
}
},
{
"desc" : "Applebot-Extended",
"ua" : "Applebot-Extended",
"expect" :
{
"name" : "Applebot-Extended",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Amazonbot",
"ua" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)",
@@ -89,6 +109,126 @@
"type" : "crawler"
}
},
{
"desc" : "Anthropic AI",
"ua" : "anthropic-ai",
"expect" :
{
"name" : "anthropic-ai",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Archive.org Bot",
"ua" : "ia_archiver/8.1 (Windows 2000 1.9; en-US;)",
"expect" :
{
"name" : "ia_archiver",
"version" : "8.1",
"type" : "crawler"
}
},
{
"desc" : "Archive.org Bot",
"ua" : "Mozilla/5.0 (compatible; archive.org_bot/3.3.0 +https://archive.org/details/archive.org_bot)",
"expect" :
{
"name" : "archive.org_bot",
"version" : "3.3.0",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider",
"ua" : "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
"expect" :
{
"name" : "Baiduspider",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-ads",
"ua" : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0; Baiduspider-ads) Gecko/17.0 Firefox/17.0",
"expect" :
{
"name" : "Baiduspider-ads",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-cpro",
"ua" : "Mozilla/5.0 (compatible; Baiduspider-cpro; +http://www.baidu.com/search/spider.html)",
"expect" :
{
"name" : "Baiduspider-cpro",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-favo",
"ua" : "Baiduspider-favo",
"expect" :
{
"name" : "Baiduspider-favo",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-image",
"ua" : "Baiduspider-image+(+http://www.baidu.com/search/spider.htm)",
"expect" :
{
"name" : "Baiduspider-image",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-news",
"ua" : "Baiduspider-news",
"expect" :
{
"name" : "Baiduspider-news",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-render",
"ua" : "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)",
"expect" :
{
"name" : "Baiduspider-render",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "Baiduspider-video",
"ua" : "Baiduspider-video",
"expect" :
{
"name" : "Baiduspider-video",
"version" : "undefined",
"type" : "crawler"
}
},
{
"desc" : "Bingbot",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/",
"expect" :
{
"name" : "bingbot",
"version" : "2.0",
"type" : "crawler"
}
},
{
"desc" : "Bytespider",
"ua" : "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.1511.1269 Mobile Safari/537.36; Bytespider",
@@ -179,6 +319,16 @@
"type" : "crawler"
}
},
{
"desc" : "DuckDuckBot",
"ua" : "DuckDuckBot/1.1; ( http://duckduckgo.com/duckduckbot.html)",
"expect" :
{
"name" : "DuckDuckBot",
"version" : "1.1",
"type" : "crawler"
}
},
{
"desc" : "Exabot",
"ua" : "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)",
@@ -339,26 +489,6 @@
"type" : "crawler"
}
},
{
"desc" : "Archive.org Bot",
"ua" : "ia_archiver/8.1 (Windows 2000 1.9; en-US;)",
"expect" :
{
"name" : "ia_archiver",
"version" : "8.1",
"type" : "crawler"
}
},
{
"desc" : "Archive.org Bot",
"ua" : "Mozilla/5.0 (compatible; archive.org_bot/3.3.0 +https://archive.org/details/archive.org_bot)",
"expect" :
{
"name" : "archive.org_bot",
"version" : "3.3.0",
"type" : "crawler"
}
},
{
"desc" : "ImagesiftBot",
"ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)",
@@ -369,6 +499,26 @@
"type" : "crawler"
}
},
{
"desc" : "Linespider",
"ua" : "Mozilla/5.0 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)",
"expect" :
{
"name" : "Linespider",
"version" : "1.1",
"type" : "crawler"
}
},
{
"desc" : "LinkedInBot",
"ua" : "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)",
"expect" :
{
"name" : "LinkedInBot",
"version" : "1.0",
"type" : "crawler"
}
},
{
"desc" : "magpie-crawler",
"ua" : "magpie-crawler/1.1 (robots-txt-checker; +http://www.brandwatch.net)",
@@ -462,7 +612,7 @@
},
{
"desc" : "PetalBot",
"ua" : "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot) ",
"ua" : "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)",
"expect" :
{
"name" : "PetalBot",
@@ -520,6 +670,16 @@
"type" : "crawler"
}
},
{
"desc" : "Sogou",
"ua" : "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
"expect" :
{
"name" : "Sogou web spider",
"version" : "4.0",
"type" : "crawler"
}
},
{
"desc" : "Teoma",
"ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",

View File

@@ -139,6 +139,26 @@
"type" : "fetcher"
}
},
{
"desc" : "MicrosoftPreview",
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; MicrosoftPreview/2.0; +https://aka.ms/MicrosoftPreview) Chrome/W.X.Y.Z Safari/537.36",
"expect" :
{
"name" : "MicrosoftPreview",
"version" : "2.0",
"type" : "fetcher"
}
},
{
"desc" : "Pinterestbot",
"ua" : "Mozilla/5.0 (compatible; Pinterestbot/1.0; +http://www.pinterest.com/bot.html)",
"expect" :
{
"name" : "Pinterestbot",
"version" : "1.0",
"type" : "fetcher"
}
},
{
"desc" : "Rogerbot",
"ua" : "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)",