@@ -148,11 +148,11 @@ user_agent_parsers:
148148 family_replacement : ' Pinterestbot'
149149
150150 # Bots
151- - regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer|Google-InspectionTool)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
151+ - regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|GoogleOther|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer|GPTBot|Google-InspectionTool)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
152152
153153 # AWS S3 Clients
154154 # must come before "Bots General matcher" to catch "boto"/"boto3" before "bot"
155- - regex : ' \b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
155+ - regex : ' \b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|go-v\d| java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
156156
157157 # SAFE FME
158158 - regex : ' (FME)\/(\d+\.\d+)\.(\d+)\.(\d+)'
@@ -179,6 +179,9 @@ user_agent_parsers:
179179 - regex : ' \[FB.{0,300};'
180180 family_replacement : ' Facebook'
181181
182+ # RecipeRadar crawler
183+ - regex : ' (RecipeRadar)/(\d+)\.(\d+)(?:\.(\d+)|)'
184+
182185 # Bots General matcher 'name/0.0'
183186 - regex : ' ^.{0,200}?(?:\/[A-Za-z0-9\.]{0,50}|) {0,2}([A-Za-z0-9 \-_\!\[\]:]{0,50}(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]{0,50}))[/ ](\d+)(?:\.(\d+)(?:\.(\d+)|)|)'
184187 # Bots containing bot(but not CUBOT)
@@ -215,6 +218,13 @@ user_agent_parsers:
215218 # Twitter
216219 - regex : ' (Twitter for (?:iPhone|iPad)|TwitterAndroid)(?:\/(\d+)\.(\d+)|)'
217220 family_replacement : ' Twitter'
221+ # TikTok
222+ - regex : ' (musical_ly) app_version\/(\d+)\.(\d+)\.(\d+)'
223+ family_replacement : ' TikTok'
224+ - regex : ' (musical_ly_)(\d+)\.(\d+)\.(\d+)'
225+ family_replacement : ' TikTok'
226+ - regex : ' (BytedanceWebview)\/[a-z0-9]+'
227+ family_replacement : ' TikTok'
218228
219229 # Phantom app
220230 - regex : ' Mozilla.{1,200}Mobile.{1,100}(Phantom\/ios|Phantom\/android).(\d+)\.(\d+)\.(\d+)'
@@ -487,11 +497,17 @@ user_agent_parsers:
487497 family_replacement : ' Tenta Browser'
488498
489499 # Ecosia on iOS / Android
490- - regex : ' (Ecosia) ios@(\d+)\.(\d+)\.(\d+)\.(\d+)'
500+ - regex : ' (Ecosia) ios@(\d+)(?: \.(\d+)|)(?: \.(\d+)|)(?: \.(\d+)| )'
491501 family_replacement : ' Ecosia iOS'
492- - regex : ' (Ecosia) android@(\d+)\.(\d+)\.(\d+)\.(\d+)'
502+ - regex : ' (Ecosia) android@(\d+)(?: \.(\d+)|)(?: \.(\d+)|)(?: \.(\d+)| )'
493503 family_replacement : ' Ecosia Android'
494504
505+ # VivoBrowser
506+ - regex : ' (VivoBrowser)\/(\d+)\.(\d+)\.(\d+)\.(\d+)'
507+
508+ # HiBrowser
509+ - regex : ' (HiBrowser)\/v(\d+)\.(\d+)\.(\d+)\.(\d+)'
510+
495511 # Chrome Mobile
496512 - regex : ' Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
497513 family_replacement : ' Chrome Mobile WebView'
@@ -1145,6 +1161,29 @@ os_parsers:
11451161 # Special case for new ArcGIS Mobile products
11461162 - regex : ' ArcGISRuntime-(?:Android|iOS)\/\d+\.\d+(?:\.\d+|) \((Android|iOS) (\d+)(?:\.(\d+)(?:\.(\d+)|)|);'
11471163
1164+ # #########
1165+ # Chromecast
1166+ # #########
1167+ # Ex: Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV
1168+ # These are the newer Android-based "Google TV" Chromecast devices.
1169+ # Google stopped updating the Chromecast firmware version in these, so they always say CrKey/1.56.500000. Therefore we extract the more useful Android version instead.
1170+ - regex : ' (Android) (\d+)(?:\.(\d+)).*CrKey'
1171+ os_replacement : ' Chromecast Android'
1172+
1173+ # Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000
1174+ # These are some intermediate "Nest Hub" Chromecast devices running Fuchsia.
1175+ - regex : ' Fuchsia.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
1176+ os_replacement : ' Chromecast Fuchsia'
1177+
1178+ # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker
1179+ - regex : ' Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|).*DeviceType/SmartSpeaker'
1180+ os_replacement : ' Chromecast SmartSpeaker'
1181+
1182+ # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast
1183+ # These are the oldest Chromecast devices that ran Linux.
1184+ - regex : ' Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
1185+ os_replacement : ' Chromecast Linux'
1186+
11481187 # #########
11491188 # Android
11501189 # can actually detect rooted android os. do we care?
@@ -1308,6 +1347,10 @@ os_parsers:
13081347 # #########
13091348 - regex : ' (Tizen)[/ ](\d+)\.(\d+)'
13101349
1350+ # Chrome and Edge on iOS with desktop mode contains Mac OS X, so it must be before any Mac OS check
1351+ - regex : ' Intel Mac OS X.+(CriOS|EdgiOS)/\d+'
1352+ os_replacement : ' iOS'
1353+
13111354 # #########
13121355 # Mac OS
13131356 # @ref: http://en.wikipedia.org/wiki/Mac_OS_X#Versions
@@ -1360,6 +1403,13 @@ os_parsers:
13601403 # Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin
13611404 - regex : ' ^Box.{0,200};(Darwin)/(10)\.(1\d)(?:\.(\d+)|)'
13621405 os_replacement : ' Mac OS X'
1406+
1407+ # #########
1408+ # Hashicorp API
1409+ # APN/1.0 HashiCorp/1.0 Terraform/1.8.0 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go/1.44.261 (go1.19.8; darwin; arm64)
1410+ # #########
1411+ - regex : ' darwin; arm64'
1412+ os_replacement : ' Mac OS X'
13631413
13641414 # #########
13651415 # iOS
@@ -1645,29 +1695,27 @@ os_parsers:
16451695 - regex : ' CFNetwork/.{0,100} Darwin/(21)\.\d+'
16461696 os_replacement : ' iOS'
16471697 os_v1_replacement : ' 15'
1648- - regex : ' CFNetwork/.{0,100} Darwin/22\.0 \.\d+'
1698+ - regex : ' CFNetwork/.{0,100} Darwin/22\.([0-5]) \.\d+'
16491699 os_replacement : ' iOS'
16501700 os_v1_replacement : ' 16'
1651- os_v2_replacement : ' 0 '
1652- - regex : ' CFNetwork/.{0,100} Darwin/22\.1 \.\d+'
1701+ os_v2_replacement : ' $1 '
1702+ - regex : ' CFNetwork/.{0,100} Darwin/(22) \.\d+'
16531703 os_replacement : ' iOS'
16541704 os_v1_replacement : ' 16'
1655- os_v2_replacement : ' 1'
1656- - regex : ' CFNetwork/.{0,100} Darwin/22\.2\.\d+'
1705+ - regex : ' CFNetwork/.{0,100} Darwin/23\.([0-5])\.\d+'
16571706 os_replacement : ' iOS'
1658- os_v1_replacement : ' 16 '
1659- os_v2_replacement : ' 2 '
1660- - regex : ' CFNetwork/.{0,100} Darwin/22\.3 \.\d+'
1707+ os_v1_replacement : ' 17 '
1708+ os_v2_replacement : ' $1 '
1709+ - regex : ' CFNetwork/.{0,100} Darwin/(23) \.\d+'
16611710 os_replacement : ' iOS'
1662- os_v1_replacement : ' 16'
1663- os_v2_replacement : ' 3'
1664- - regex : ' CFNetwork/.{0,100} Darwin/22\.4\.\d+'
1711+ os_v1_replacement : ' 17'
1712+ - regex : ' CFNetwork/.{0,100} Darwin/24\.([0-5])\.\d+'
16651713 os_replacement : ' iOS'
1666- os_v1_replacement : ' 16 '
1667- os_v2_replacement : ' 4 '
1668- - regex : ' CFNetwork/.{0,100} Darwin/(22 )\.\d+'
1714+ os_v1_replacement : ' 18 '
1715+ os_v2_replacement : ' $1 '
1716+ - regex : ' CFNetwork/.{0,100} Darwin/(24 )\.\d+'
16691717 os_replacement : ' iOS'
1670- os_v1_replacement : ' 16 '
1718+ os_v1_replacement : ' 18 '
16711719 - regex : ' CFNetwork/.{0,100} Darwin/'
16721720 os_replacement : ' iOS'
16731721
@@ -1822,12 +1870,6 @@ os_parsers:
18221870
18231871 - regex : ' (WebTV)/(\d+).(\d+)'
18241872
1825- # #########
1826- # Chromecast
1827- # #########
1828- - regex : ' (CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
1829- os_replacement : ' Chromecast'
1830-
18311873 # #########
18321874 # Misc mobile
18331875 # #########
@@ -1868,6 +1910,21 @@ os_parsers:
18681910 # Roku Digital-Video-Players https://www.roku.com/
18691911 - regex : ' ^(Roku)/DVP-(\d+)\.(\d+)'
18701912
1913+ # #########
1914+ # Amazon S3 client boto3
1915+ # Hasicorp API
1916+ # Boto3/1.28.62 md/Botocore#1.31.62 ua/2.0 os/macos#22.4.0 md/arch#arm64 lang/python#3.11.6 md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.31.62
1917+ # APN/1.0 HashiCorp/1.0 Terraform/1.8.1 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go-v2/1.18.0 os/macos lang/go/1.19.8 md/GOOS/darwin md/GOARCH/arm64 api/identitystore/1.16.11
1918+ # #########
1919+ - regex : ' os\/macos[#]?(\d*)[.]?(\d*)[.]?(\d*)'
1920+ os_replacement : ' Mac OS X'
1921+ os_v1_replacement : ' $1'
1922+ os_v2_replacement : ' $2'
1923+ os_v3_replacement : ' $3'
1924+
1925+ # Huawei HarmonyOS
1926+ - regex : ' (HarmonyOS)[\s;]+(\d+|)\.?(\d+|)\.?(\d+|)'
1927+
18711928device_parsers :
18721929
18731930 # ########
@@ -2461,6 +2518,39 @@ device_parsers:
24612518 brand_replacement : ' ChangJia'
24622519 model_replacement : ' $1'
24632520
2521+ # #########
2522+ # Chromecast
2523+ # @ref: https://en.wikipedia.org/wiki/Chromecast#Hardware_and_design
2524+ # #########
2525+ # Ex: Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV
2526+ # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker
2527+ # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast
2528+ # These are the newer Chromecast devices, such as smart speakers, Google TVs, etc. that have an explicit device type.
2529+ - regex : ' CrKey.*DeviceType/([^/]*)'
2530+ brand_replacement : ' Google'
2531+ device_replacement : ' Chromecast'
2532+ model_replacement : ' $1'
2533+
2534+ # Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000
2535+ # These are some intermediate "Nest Hub" Chromecast devices running Fuchsia.
2536+ - regex : ' Fuchsia.*CrKey'
2537+ brand_replacement : ' Google'
2538+ device_replacement : ' Chromecast'
2539+ model_replacement : ' Nest Hub'
2540+
2541+ # Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.47 Safari/537.36 CrKey/1.36.159268
2542+ # These are the first generation of Chromecast devices that ran Linux. They don't specify a device type.
2543+ - regex : ' Linux.*CrKey/1.36'
2544+ brand_replacement : ' Google'
2545+ device_replacement : ' Chromecast'
2546+ model_replacement : ' First Generation'
2547+
2548+ # We have no data on the user agent strings of other models, except that they all report CrKey/
2549+ - regex : ' CrKey/'
2550+ brand_replacement : ' Google'
2551+ device_replacement : ' Chromecast'
2552+ model_replacement : ' Chromecast'
2553+
24642554 # ########
24652555 # Cloudfone
24662556 # @ref: http://www.cloudfonemobile.com/
@@ -5851,7 +5941,7 @@ device_parsers:
58515941 # #########
58525942 # Spiders (this is a hack...)
58535943 # #########
5854- - regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|Google-InspectionTool)'
5944+ - regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar|GPTBot|Google-InspectionTool)'
58555945 regex_flag : ' i'
58565946 device_replacement : ' Spider'
58575947 brand_replacement : ' Spider'
0 commit comments