adding detection of MJ12Bot and SemrushBot into extension Crawlers including testing (#738)

This commit is contained in:
Pablo Osés 2024-08-14 00:46:46 -03:00 committed by GitHub
parent 692b175d49
commit 68ae2a76de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 26 additions and 2 deletions

View File

@ -46,7 +46,9 @@ const Crawlers = Object.freeze({
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/
// GPTBot - https://platform.openai.com/docs/gptbot
/((?:amazon|apple|bing|duckduck|facebook|gpt)bot)\/([\w\.]+)/i,
// MJ12bot - https://mj12bot.com/
// SemrushBot - http://www.semrush.com/bot.html
/((?:amazon|apple|bing|duckduck|facebook|gpt|mj12|semrush)bot)\/([\w\.]+)/i,
// Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001
/(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i,

View File

@ -48,7 +48,9 @@ const Crawlers = Object.freeze({
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/
// GPTBot - https://platform.openai.com/docs/gptbot
[/((?:amazon|apple|bing|duckduck|facebook|gpt)bot)\/([\w\.]+)/i],
// MJ12bot - https://mj12bot.com/
// SemrushBot - http://www.semrush.com/bot.html
[/((?:amazon|apple|bing|duckduck|facebook|gpt|mj12|semrush)bot)\/([\w\.]+)/i],
[NAME, VERSION, [TYPE, CRAWLER]],
// Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001

View File

@ -79,6 +79,26 @@
"type" : "crawler"
}
},
{
"desc" : "MJ12bot",
"ua" : "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)",
"expect" :
{
"name" : "MJ12bot",
"version" : "v1.4.8",
"type" : "crawler"
}
},
{
"desc" : "SemrushBot",
"ua" : "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)",
"expect" :
{
"name" : "SemrushBot",
"version" : "7",
"type" : "crawler"
}
},
{
"desc" : "Yahoo! Japan",
"ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)",