From 29677bcd3d2c0436734b8f54fe6283c054bfd45d Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Wed, 21 May 2025 09:58:01 +0700 Subject: [PATCH 1/3] Add new browser: `Edge WebView`, `Edge WebView2` --- src/enums/ua-parser-enums.d.ts | 2 ++ src/enums/ua-parser-enums.js | 2 ++ src/main/ua-parser.js | 19 +++++++++++++++---- test/data/ua/browser/browser-all.json | 20 ++++++++++++++++++++ test/unit/ua-ch.js | 13 +++++++++++++ 5 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/enums/ua-parser-enums.d.ts b/src/enums/ua-parser-enums.d.ts index b41dfc4..44fefbf 100644 --- a/src/enums/ua-parser-enums.d.ts +++ b/src/enums/ua-parser-enums.d.ts @@ -38,6 +38,8 @@ export const Browser: Readonly<{ DUCKDUCKGO: "DuckDuckGo"; ECOSIA: "Ecosia"; EDGE: "Edge"; + EDGE_WEBVIEW: "Edge WebView"; + EDGE_WEBVIEW2: "Edge WebView2"; EPIPHANY: "Epiphany"; FACEBOOK: "Facebook"; FALKON: "Falkon"; diff --git a/src/enums/ua-parser-enums.js b/src/enums/ua-parser-enums.js index 2ae437c..3420acb 100644 --- a/src/enums/ua-parser-enums.js +++ b/src/enums/ua-parser-enums.js @@ -43,6 +43,8 @@ const Browser = Object.freeze({ DUCKDUCKGO: 'DuckDuckGo', ECOSIA: 'Ecosia', EDGE: 'Edge', + EDGE_WEBVIEW: 'Edge WebView', + EDGE_WEBVIEW2: 'Edge WebView2', EPIPHANY: 'Epiphany', FACEBOOK: 'Facebook', FALKON: 'Falkon', diff --git a/src/main/ua-parser.js b/src/main/ua-parser.js index ae91dbd..1e6210c 100755 --- a/src/main/ua-parser.js +++ b/src/main/ua-parser.js @@ -152,11 +152,11 @@ has = function (str1, str2) { if (typeof str1 === OBJ_TYPE && str1.length > 0) { for (var i in str1) { - if (lowerize(str1[i]) == lowerize(str2)) return true; + if (lowerize(str2) == lowerize(str1[i])) return true; } return false; } - return isString(str1) ? lowerize(str2).indexOf(lowerize(str1)) !== -1 : false; + return isString(str1) ? lowerize(str2) == lowerize(str1) : false; }, isExtensions = function (obj, deep) { for (var prop in obj) { @@ -326,7 +326,9 @@ // Most common regardless engine /\b(?:crmo|crios)\/([\w\.]+)/i // Chrome for Android/iOS ], [VERSION, [NAME, PREFIX_MOBILE + 'Chrome']], [ - /edg(?:e|ios|a)?\/([\w\.]+)/i // Microsoft Edge + /webview.+edge\/([\w\.]+)/i // Microsoft Edge + ], [VERSION, [NAME, EDGE+' WebView']], [ + /edg(?:e|ios|a)?\/([\w\.]+)/i ], [VERSION, [NAME, 'Edge']], [ // Presto based @@ -441,6 +443,9 @@ /headlesschrome(?:\/([\w\.]+)| )/i // Chrome Headless ], [VERSION, [NAME, CHROME+' Headless']], [ + /wv\).+chrome\/([\w\.]+).+edgw\//i // Edge WebView2 + ], [VERSION, [NAME, EDGE+' WebView2']], [ + / wv\).+(chrome)\/([\w\.]+)/i // Chrome WebView ], [[NAME, CHROME+' WebView'], VERSION], [ @@ -1232,10 +1237,16 @@ for (var i in brands) { var brandName = brands[i].brand || brands[i], brandVersion = brands[i].version; - if (this.itemType == UA_BROWSER && !/not.a.brand/i.test(brandName) && (!prevName || (/chrom/i.test(prevName) && brandName != CHROMIUM))) { + if (this.itemType == UA_BROWSER && + !/not.a.brand/i.test(brandName) && + (!prevName || + (/Chrom/.test(prevName) && brandName != CHROMIUM) || + (prevName == EDGE && /WebView2/.test(brandName)) + )) { brandName = strMapper(brandName, { 'Chrome' : 'Google Chrome', 'Edge' : 'Microsoft Edge', + 'Edge WebView2' : 'Microsoft Edge WebView2', 'Chrome WebView' : 'Android WebView', 'Chrome Headless' : 'HeadlessChrome', 'Huawei Browser' : 'HuaweiBrowser', diff --git a/test/data/ua/browser/browser-all.json b/test/data/ua/browser/browser-all.json index 3fc0e5e..9f39de9 100644 --- a/test/data/ua/browser/browser-all.json +++ b/test/data/ua/browser/browser-all.json @@ -2229,6 +2229,26 @@ "major" : "74" } }, + { + "desc" : "Microsoft Edge WebView", + "ua" : "Mozilla/5.0 (Windows IoT 10.0; Android 6.0.1; WebView/3.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Mobile Safari/537.36 Edge/18.17763", + "expect" : + { + "name" : "Edge WebView", + "version" : "18.17763", + "major" : "18" + } + }, + { + "desc" : "Microsoft Edge WebView2", + "ua" : "Mozilla/5.0 (Linux; Android 11; SM-G991B Build/RP1A.200720.012; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/91.0.4472.120 Mobile Safari/537.36 EdgW/1.0", + "expect" : + { + "name" : "Edge WebView2", + "version" : "91.0.4472.120", + "major" : "91" + } + }, { "desc" : "Iridium", "ua" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Iridium/43.8 Safari/537.36 Chrome/43.0.2357.132", diff --git a/test/unit/ua-ch.js b/test/unit/ua-ch.js index 18dd09a..7d12d50 100644 --- a/test/unit/ua-ch.js +++ b/test/unit/ua-ch.js @@ -313,6 +313,19 @@ describe('UA-CH Headers tests', () => { } } }, + { + headers : { + 'sec-ch-ua': '" Not;A Brand";v="99", "Microsoft Edge";v="103", "Chromium";v="103", "Microsoft Edge WebView2";v="104"' + }, + expect: { + browser : { + name : 'Edge WebView2', + version : '104', + major : '104', + type : undefined + } + } + }, { headers : { 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "HuaweiBrowser";v="114"' From a3549efc223e5639af47142125c53224890b8b75 Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Fri, 30 May 2025 23:16:02 +0700 Subject: [PATCH 2/3] [extensions] Add new bots: Daumoa, iAskBot, Iframely, Qwantbot --- src/extensions/ua-parser-extensions.js | 12 ++++- test/data/ua/extension/crawler.json | 70 ++++++++++++++++++++++++++ test/data/ua/extension/fetcher.json | 10 ++++ 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 8317f4b..a91bdd9 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -48,13 +48,14 @@ const Crawlers = Object.freeze({ // DuckDuckBot - http://duckduckgo.com/duckduckbot.html // FacebookBot - https://developers.facebook.com/docs/sharing/bot/ // GPTBot - https://platform.openai.com/docs/gptbot + // iAskBot - https://iask.ai // LinkedInBot - http://www.linkedin.com // MJ12bot - https://mj12bot.com/ // MojeekBot - https://www.mojeek.com/bot.html // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot // SeznamBot - http://napoveda.seznam.cz/seznambot-intro - /((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|linkedin|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, + /((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, // Applebot - http://apple.com/go/applebot /(applebot(?:-extended)?)\/?([\w\.]*)/i, @@ -68,6 +69,9 @@ const Crawlers = Object.freeze({ // Coc Coc Bot - https://help.coccoc.com/en/search-engine /(coccocbot-(?:image|web))\/([\w\.]+)/i, + // Daum + /(daum(?:oa)?(?:-image)?)[ \/]([\w\.]+)/i, + // Facebook / Meta // https://developers.facebook.com/docs/sharing/webmasters/web-crawlers /(facebook(?:externalhit|catalog)|meta-externalagent)\/([\w\.]+)/i, @@ -78,6 +82,9 @@ const Crawlers = Object.freeze({ // Internet Archive (archive.org) /(ia_archiver|archive\.org_bot)\/?([\w\.]*)/i, + // Qwantbot - https://help.qwant.com/bot + /(qwantbot)[-\w]*\/?([\w\.]*)/i, + // SemrushBot - http://www.semrush.com/bot.html /((?:semrush|splitsignal)bot[-abcfimostw]*)\/?([\w\.-]*)/i, @@ -222,10 +229,11 @@ const Fetchers = Object.freeze({ // DuckAssistBot - https://duckduckgo.com/duckassistbot/ // Better Uptime / BingPreview / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot // Google Site Verifier / Meta / Yahoo! Japan + // Iframely - https://iframely.com/docs/about // Perplexity-User - https://docs.perplexity.ai/guides/bots // MistralAI-User - https://docs.mistral.ai/robots/ // Yandex Bots - https://yandex.com/bots - /(ahrefssiteaudit|(?:bing|microsoft)preview|(?:chatgpt|mistralai|perplexity)-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, + /(ahrefssiteaudit|(?:bing|microsoft)preview|(?:chatgpt|mistralai|perplexity)-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|iframely|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i, // Bluesky /(bluesky) cardyb\/([\w\.]+)/i, diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index 8ed2d00..bf29428 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -299,6 +299,36 @@ "type" : "crawler" } }, + { + "desc" : "Daum", + "ua" : "Mozilla/5.0 (compatible; MSIE or Firefox mutant;) Daum 4.1", + "expect" : + { + "name" : "Daum", + "version" : "4.1", + "type" : "crawler" + } + }, + { + "desc" : "Daumoa", + "ua" : "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa 4.0", + "expect" : + { + "name" : "Daumoa", + "version" : "4.0", + "type" : "crawler" + } + }, + { + "desc" : "Daumoa-image", + "ua" : "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa-image/1.0", + "expect" : + { + "name" : "Daumoa-image", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Diffbot", "ua" : "Diffbot/0.1", @@ -489,6 +519,16 @@ "type" : "crawler" } }, + { + "desc" : "iAskBot", + "ua" : "Mozilla/5.0 AppleWebKit/605.1.15 (KHTML, like Gecko; compatible; iAskBot/1.0; +https://iask.ai/) Chrome/120.0.6099.119 Safari/605.1.15", + "expect" : + { + "name" : "iAskBot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "ImagesiftBot", "ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)", @@ -620,6 +660,36 @@ "type" : "crawler" } }, + { + "desc" : "Qwantbot", + "ua" : "Mozilla/5.0 (compatible; Qwantbot/1.0_12345; +https://help.qwant.com/bot/)", + "expect" : + { + "name" : "Qwantbot", + "version" : "1.0_12345", + "type" : "crawler" + } + }, + { + "desc" : "Qwantbot", + "ua" : "Mozilla/5.0 (compatible; Qwantbot-prod51071/1.0; +Qwantbot@qwant.com)", + "expect" : + { + "name" : "Qwantbot", + "version" : "1.0", + "type" : "crawler" + } + }, + { + "desc" : "Qwantbot", + "ua" : "Mozilla/5.0 (compatible; Qwantbot-news/2.0; +https://help.qwant.com/bot/)", + "expect" : + { + "name" : "Qwantbot", + "version" : "2.0", + "type" : "crawler" + } + }, { "desc" : "SemrushBot", "ua" : "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)", diff --git a/test/data/ua/extension/fetcher.json b/test/data/ua/extension/fetcher.json index 62154a9..4a05393 100644 --- a/test/data/ua/extension/fetcher.json +++ b/test/data/ua/extension/fetcher.json @@ -129,6 +129,16 @@ "type" : "fetcher" } }, + { + "desc" : "Iframely", + "ua" : "Iframely/1.3.1 (+https://iframely.com/docs/about)", + "expect" : + { + "name" : "Iframely", + "version" : "1.3.1", + "type" : "fetcher" + } + }, { "desc" : "Meta-ExternalFetcher", "ua" : "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", From 72d0c2acb32917a9287b8cb838be015d8e5dcac9 Mon Sep 17 00:00:00 2001 From: Faisal Salman Date: Tue, 3 Jun 2025 11:03:45 +0700 Subject: [PATCH 3/3] [extensions] Add new crawler bots: ChatGLM, Onespot, Startpage --- src/extensions/ua-parser-extensions.js | 7 +++--- src/helpers/ua-parser-helpers.js | 3 +++ test/data/ua/extension/crawler.json | 30 ++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index a91bdd9..b2e31fb 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -52,10 +52,11 @@ const Crawlers = Object.freeze({ // LinkedInBot - http://www.linkedin.com // MJ12bot - https://mj12bot.com/ // MojeekBot - https://www.mojeek.com/bot.html + // Onespot - https://www.onespot.com/identifying-traffic.html // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot // SeznamBot - http://napoveda.seznam.cz/seznambot-intro - /((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, + /((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|iask|linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, // Applebot - http://apple.com/go/applebot /(applebot(?:-extended)?)\/?([\w\.]*)/i, @@ -100,8 +101,8 @@ const Crawlers = Object.freeze({ // Yeti (Naver) /(yeti)\/([\w\.]+)/i, - // aiHitBot / Diffbot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot - /((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|openai image downloader|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |line|yisou)spider)\/?([\w\.]*)/i + // aiHitBot / Diffbot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / Webzio-Extended / Screaming Frog SEO Spider / Startpage / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot + /((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|openai image downloader|(?:magpie-|velenpublicweb)crawler|startpageprivateimageproxy|webzio-extended|(?:chatglm-|line|screaming frog seo |yisou)spider)\/?([\w\.]*)/i ], [NAME, VERSION, [TYPE, CRAWLER]], diff --git a/src/helpers/ua-parser-helpers.js b/src/helpers/ua-parser-helpers.js index 64d7e90..d14667b 100644 --- a/src/helpers/ua-parser-helpers.js +++ b/src/helpers/ua-parser-helpers.js @@ -110,6 +110,9 @@ const isAIBot = (resultOrUA) => [ // You.com 'youbot', + // Zhipu AI + 'chatglm-spider', + // Zyte 'scrapy' diff --git a/test/data/ua/extension/crawler.json b/test/data/ua/extension/crawler.json index bf29428..918ee98 100644 --- a/test/data/ua/extension/crawler.json +++ b/test/data/ua/extension/crawler.json @@ -259,6 +259,16 @@ "type" : "crawler" } }, + { + "desc" : "ChatGLM-Spider", + "ua" : "Mozilla/5.0 (compatible; ChatGLM-Spider/1.0; +https://chatglm.cn/)", + "expect" : + { + "name" : "ChatGLM-Spider", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Coc Coc Bot (web)", "ua" : "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", @@ -620,6 +630,16 @@ "type" : "crawler" } }, + { + "desc" : "Onespot", + "ua" : "Mozilla/5.0 (compatible; Onespot-ScraperBot/1.0; +https://www.onespot.com/identifying-traffic.html)", + "expect" : + { + "name" : "Onespot-ScraperBot", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "OpenAI Search", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot", @@ -750,6 +770,16 @@ "type" : "crawler" } }, + { + "desc" : "Startpage", + "ua" : "StartpagePrivateImageProxy/3.0 (https://www.startpage.com/; support@startpage.com) aiohttp.client/3.11.11", + "expect" : + { + "name" : "StartpagePrivateImageProxy", + "version" : "3.0", + "type" : "crawler" + } + }, { "desc" : "Teoma", "ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",