diff --git a/fixtures/crawlers.yml b/fixtures/crawlers.yml index d3098ea..ba62474 100644 --- a/fixtures/crawlers.yml +++ b/fixtures/crawlers.yml @@ -1,920 +1,8 @@ -"8": - - Mozilla/5.0 (compatible; 008/0.83; http://www.80legs.com/webcrawler.html) Gecko/2008032620 -2ip.ru: - - 2ip.ru CMS Detector (https://2ip.ru/cms/) -360Spider: - - Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider -Aboundexbot: - - Aboundex/0.3 (http://www.aboundex.com/crawler/) -Acoon: - - Mozilla/5.0 (compatible; AcoonBot/4.11.1; +http://www.acoon.de/robot.asp) -AddThis.com: - - AddThis.com robot tech.support@clearspring.com -ADMantX: - - ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com -Agentslug: - - agentslug.com - website monitoring tool -aHrefs Bot: - - "Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/) AppEngine-Google; (+http://code.google.com/appengine; appid: s~proxyfile1-hrd)" -Akregator: - - Akregator/1.2.9; librss/remnants -AlertSite Monitoring: - - Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0 DejaClick/2.9.7.2 -Alexa: - - ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com) -Alexa Crawler: - - ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com) -Alexa Site Audit: - - Mozilla/5.0 (compatible; alexa site audit/1.0; http://www.alexa.com/help/webmasters; ) -Amaya: - - amaya/10 libwww/5.4.0 -Amazon Route53 Health Check: - - Amazon-Route53-Health-Check-Service (ref b0eb04d5-cb5e-40e7-839b-558e52fc3f0d; report http://amzn.to/1vsZADi) -Amorank Spider: - - AmorankSpider/0.1; +http://amorank.com/webcrawler.html -Analytics SEO Crawler: - - Curious George - www.analyticsseo.com/crawler -ApacheBench: - - ApacheBench/2.3 -Apple: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot) -Apple PubSub: - - Apple-PubSub/65.28 -Applebot: - - Mozilla/5.0 (iPhone; CPU iPhone OS 13_4_1 like Mac OS X) AppleWebKit/605.1.15Z (KHTML, like Gecko) Version/13.1 Mobile/15E148 Safari/604.1 (Applebot/0.1) - - Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot) - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1) - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15 (Applebot/0.1) -Application Health Service: - - ApplicationHealthService/1.0 - - ELB-HealthChecker/2.0 -Arachni: - - Arachni/v1.5.1 -archive.org bot: - - Mozilla/5.0 (compatible; special_archiver/3.2.0 +http://www.loc.gov/webarchiving/notice_to_webmasters.html) -ArchiveBox: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/0.6.2 - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) -Ask Jeeves: - - Mozilla/2.0 (compatible; Ask Jeeves/Teoma) -AspiegelBot: - - Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; AspiegelBot) -Awario: - - AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com) -Backlink-Check.de: - - Backlink-Check.de (+http://www.backlink-check.de/bot.html) -BacklinkCrawler: - - BacklinkCrawler (http://www.backlinktest.com/crawler.html) -Baidu: - - Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html) -Baidu Spider: - - Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.8;baidu Transcoder) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729) -BazQux Reader: - - Mozilla/5.0 (compatible; BazQux/2.4; +https://bazqux.com/fetcher; 2 subscribers) -BingBot: - - MSNBot/Nutch-1.5.1 -Bit Discovery: - - bitdiscovery -BitlyBot: - - bitlybot/3.0 -Bitrix: - - Bitrix link preview -Blekkobot: - - Mozilla/5.0 (compatible; Blekkobot; ScoutJet; +http://blekko.com/about/blekkobot) -BLEXBot Crawler: - - Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/) -Bloglovin: - - Bloglovin/1.0 (http://www.bloglovin.com; 1 subscribers) -Blogtrottr: - - Blogtrottr/2.0 -Blogtrottr feed fetcher: - - Blogtrottr/2.0 - - Blogtrottr/3.0 -BoardReader: - - BoardReader Favicon Fetcher /1.0 info@boardreader.com -BoardReader Blog Indexer: - - BoardReader Blog Indexer(http://boardreader.com) -Bountii Bot: - - Mozilla/5.0 (compatible; BountiiBot/1.1; +http://bountii.com/contact.php) -BrandVerity: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/55.0 BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me) -Braze: - - Braze Sender f3c381e4920fede7f46d83610f0abf15fdc10433 -Breaker: - - Breaker/v315 (subscribers=9999; feed-id=123456; url=https://www.breaker.audio/url-slug-to-podcast) -Browsershots: - - Browsershots -BUbiNG: - - BUbiNG (+http://law.di.unimi.it/BUbiNG.html) -Buck: - - Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html) -BuiltWith: - - BW/1.1; bit.ly/3eZNDnO - - BW/1.1; rb.gy/oupwis -Burp Collaborator Scanner: - - Godzilla/17.0 (Unknown Operator; Nexus 5X Build/MMB29P) NoWebKit/5.36 (DOM, like Rhino; n7vbji2k0wt3rsbev55blgx17sdk8lwek97zvo.burpcollaborator.net) TOR/540.5.35487 Torrent 2654.76 - - http://5iojs2zo8rkhlrrwmm7oyl75twztnlo9ex9kz8o.burpcollaborator.net/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36 - - Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'+(select load_file('x97bjuqgzjb9cjiodeygpdyxkoqledf164uzhp5e.burpcollaborator.netszc'))+' - - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 root@8f6gtjctw9glvnasfrq438tprig56wty20ghwkf9.burpcollaborator.net -Butterfly Robot: - - Mozilla/5.0 (Macintosh; Butterfly/1.0; +http://labs.topsy.com/butterfly/) Gecko/2009032608 Firefox/3.0.8 -Bytespider: - - Mozilla/5.0 (compatible; Bytespider; spider-feedback@bytedance.com) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.0.0 Safari/537.36 - - Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.5668.1446 Mobile Safari/537.36; Bytespider;bytespider@bytedance.com -CareerBot: - - Mozilla/5.0 (compatible; CareerBot/1.1; +http://www.career-x.de/bot.html) -Castro 2: - - Castro 2, Episode Duration Lookup -Catchpoint: - - Mozilla/4.0 (compatible; Catchpoint) - - Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Catchpoint) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 - - Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0; Catchpoint) like Gecko - - Mozilla/5.0 (X11; Linux x86_64; Catchpoint) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36 -CATExplorador: - - CATExplorador/1.0beta (sistemes at domini dot cat; http://domini.cat/catexplorador.html) -ccBot crawler: - - CCBot/1.0 (+https://commoncrawl.org/bot.html) - - CCBot/2.0 (http://commoncrawl.org/faq/) -Censys: - - Mozilla/5.0 (compatible; CensysInspect/1.1; +https://about.censys.io/) -CF-UC: - - CF-UC User Agent v.1d.374049 -Chat-GPT: +Google Notebook LLM: + - Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)GoogleOther +OpenAI SearchBot: + - Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot +OpenAI ChatGPT User: - Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot -Chrome Headless: - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/100.0.4896.88 Safari/537.36 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/83.0.4103.61 Safari/537.36 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/92.0.4512.0 Safari/537.36 -ClamAV Website Scanner: - - ClamAV 0.95.3 - - "ClamAV/0.101.5 (OS: linux-gnu, ARCH: x86_64, CPU: x86_64)" -Cliqzbot: - - Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot) -CloudFlare: - - Cloudflare Custom Hostname Verification - - Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +http://www.cloudflare.com/always-online) AppleWebKit/534.34 - - Mozilla/5.0 (compatible; Cloudflare-AMP/1.0; +https://amp.cloudflare.com/doc/fetcher.html) AppleWebKit/534.34 - - "Mozilla/5.0 (compatible;Cloudflare-Healthchecks/1.0;+https://www.cloudflare.com/; healthcheck-id: f867e0e43eca7aa4)" - - nginx-ssl early hints -Collectd: - - collectd/5.5.1 -CommaFeed: - - CommaFeed/1.0 (http://www.commafeed.com) -Comscore: - - Mozilla/5.0 (compatible; proximic; +https://www.comscore.com/Web-Crawler) -Cốc Cốc: - - Mozilla/5.0 (compatible; coccocbot-image/1.0 ; +http://help.coccoc.com/searchengine) - - Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine) -Cốc Cốc Bot: - - Mozilla/5.0 (compatible; coccocbot/1.0; +http://help.coccoc.com/searchengine) -Dareboost test tool: - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 DareBoost -Datadog Agent: - - Datadog Agent/5.10.1 -Datanyze: - - Mozilla/5.0 (X11; Datanyze; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36 -Dataprovider: - - Mozilla/5.0 (compatible; Dataprovider/6.92; +https://www.dataprovider.com/) -Daum: - - Mozilla/5.0 (compatible; MSIE or Firefox mutant;) Daum 4.1 -Dazoobot: - - Mozilla/5.0 (compatible; Dazoobot/0.1; +http://dazoo.fr) -DeepNOC: - - deepnoc - https://deepnoc.com/bot -Discobot: - - Mozilla/5.0 (compatible; discobot/1.0; +http://discoveryengine.com/discobot.html) -Discourse Onebox: - - Discourse Forum Onebox v2.6.0.beta2 - - Discourse/2.6.0.beta3 -Domain Control Violation: - - COMODO DCV - - Sectigo DCV -Domain Re-Animator Bot: - - support@domainreanimator.com -DotBot: - - dotbot -Dotcom Monitor: - - Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) DMBrowser/2.1 (UV) - - Mozilla/5.0 (Linux; U; Android 2.2; en-us; SCH-I800 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1 DMBrowser-BV - - Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36 DMBrowser/2.1 (BV; LV) -Downcast: - - Downcast/2.9.11 (iPhone; iOS 9.2; Scale/2.00) -DuckDuckGo: - - DDG-Android-3.1.0 - - ddg_android/5.90.0 (com.duckduckgo.mobile.android; Android API 28) - - ddg_android/5.90.0 (com.duckduckgo.mobile.android; Android API 30) - - DuckDuckBot-Https/1.1; (+https://duckduckgo.com/duckduckbot) - - DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html) - - DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html) - - DuckDuckGo/0 CFNetwork/1126 Darwin/19.5.0 - - DuckDuckGo/5 (com.duckduckgo.mobile.android; android api 33) - - Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot) - - Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com) -DuckDuckGo Bot: - - Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com) -EA Origin Browser: - - webcollage.original/1.176 - - webcollage/1.182 - - webcollage/1.183 -Easou Spider: - - Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us; EasouSpider; +http://www.easou.com/search/spider.html) -eCairn-Grabber: - - Mozilla/5.0 eCairn-Grabber/1.0 (+http://ecairn.com/grabber) -EMail Exractor: - - EMail Exractor -Embedly: - - Mozilla/5.0 (compatible; Embedly/0.2; +http://support.embed.ly/) -evc-batch: - - Mozilla/5.0 (compatible; evc-batch/2.0) -ExaBot: - - Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails) -Exabot: - - Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot) - - Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails) -ExactSeek Crawler: - - ExactSeek Crawler (nutch 1.4)/Nutch-1.4 (ExactSeek Crawler; http://www.exactseek.com) -Expanse: - - "Expanse, a Palo Alto Networks company, searches across the global IPv4 space multiple times per day to identify customers' presences on the Internet. If you would like to be excluded from our scans, please send IP addresses/domains to: scaninfo@paloaltonetworks.com" -eZ Publish Link Validator: - - eZ Publish Link Validator -Ezooms: - - Mozilla/5.0 (compatible; Ezooms/1.0; help@moz.com) -Facebook: - - facebookcatalog/1.0 - - facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php) - - facebookexternalhit/1.1 - - facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php) - - facebookplatform/1.0 (+http://developers.facebook.com) - - facebot -Feeder.co: - - Mozilla/5.0 (feeder.co; Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 -Findxbot: - - Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com) -FirePHP: - - SAMSUNG-S8000/S8000XXIF3 SHP/VPP/R5 Jasmine/1.0 Nextreaming SMM-MMS/1.92.0 profile/MIDP-2.1 configuration/CLDC-1.1 FirePHP/0.3(Linux LLC 1.2) -Flipboard: - - Mozilla/5.0 (compatible; FlipboardRSS/1.2; +http://flipboard.com/browserproxy) - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:28.0) Gecko/20100101 Firefox/28.0 (FlipboardProxy/1.6; +http://flipboard.com/browserproxy) -Foregenix: - - Foregenix Web Scan 1.100000 (www.foregenix.com/scan) -Fuzz Faster U Fool (https://github.com/ffuf/ffuf): - - Fuzz Faster U Fool v1.5.0-dev -Generic Bot: - - SeopultContentAnalyzer/1.0 -Genieo Web filter: - - Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html) -GettHIT: - - "www.GettHIT.com | Free Traffic Exchange Bot | If you are seeing this, then your website has been listed in our traffic exchange service. | Visit Us : https://www.getthit.com/bot | Macintosh; Intel Mac OS X 10_7_5 (compatible; getthit.com/3.1;)" -Gigablast: - - GigablastOpenSource/1.0 -Gluten Free Crawler: - - Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/) -Gmail Image Proxy: - - Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko Firefox/11.0 (via ggpht.com GoogleImageProxy) -Goo: - - DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/) -Google AdSense: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/537.36 (KHTML, like Gecko, Mediapartners-Google) Chrome/114.0.5735.179 Safari/537.36 -Google Favicon: - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon -Google PageSpeed Insights: - - Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0_1 like Mac OS X; en-us) AppleWebKit/537.4 (KHTML, like Gecko; Google Page Speed Insights) Version/4.0.5 Mobile/8A306 Safari/6531.22.7 -Google Partner Monitoring: - - Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1 google_partner_monitoring FWSzVTDDBz14547302713138T -Google Search Console: - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; Google Search Console) Chrome/41.0.2272.118 Safari/537.36 -Google services: - - AdsBot-Google (+http://www.google.com/adsbot.html) - - APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html) - - "AppEngine-Google; (+http://code.google.com/appengine; appid: s~snapchat-proxy)" - - Dalvik/2.1.0 (Linux; U; Android 5.1.1_r1; Samsung Galaxy S4 Build/SGH-I337M)(googleweblight) - - FeedFetcher-Google; (+http://www.google.com/feedfetcher.html) - - Google-Cloud-Tasks - - GoogleAssociationService - - Mozilla/5.0 (compatible; Google-InspectionTool/1.0) - - Mozilla/5.0 (compatible; Google-Site-Verification/1.0) - - Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/a) - - Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko; googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19 - - Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko; googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19,gzip(gfe) - - Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.92 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) - - Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.108 Mobile Safari/537.36 (compatible; Google-InspectionTool/1.0) - - Mozilla/5.0 (Macintosh; intel mac os x 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.175 Safari/537.36 Chrome-Lighthouse - - Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko Firefox/11.0 (via ggpht.com GoogleImageProxy) - - Mozilla/5.0 (Windows; Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko; googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Chrome/84.0.4147.108 Safari/537.36 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google (+https://developers.google.com/+/web/snippet/) - - Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/84.0.4147.108 Safari/537.36 - - Nokia5800d-1/5.0 (SymbianOS/9.4; Series60/5.3 Nokia5800d-1/111.050.1511; Mozilla/5.0; Profile/MIDP-2.1 Configuration/CLDC-1.1; googleweblight) AppleWebKit/533.4 (KHTML, like Gecko, googleweblight) NokiaBrowser/8.3.7.4 Mobile Safari/533.4 3gpp-gba -Google Structured Data Testing Tool: - - Google-Structured-Data-Testing-Tool +https://search.google.com/structured-data/testing-tool) -Googlebot: - - Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012; DuplexWeb-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Mobile Safari/537.36 -Got: - - got (https://github.com/sindresorhus/got) -Grapeshot: - - Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1. 4 (compatible; GrapeshotCrawler/2.0; +https://www.grapeshot.com/crawler/) -GTmetrix: - - Mozilla/5.0 (X11; Linux x86_64; GTmetrix https://gtmetrix.com/) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 -GuzzleHttp: - - GuzzleHttp/7 -Heritrix: - - Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +http://worio.com) -Heureka Feed: - - Heurekabot-Feed/1.0 (+https://sluzby.heureka.cz/napoveda/heurekabot/) -Hexometer: - - Hexometer -HighWinds Content Delivery System: - - HWCDN/GFS v1.80.995-4.38.2369.el7 CDS/AT2 - - HWCDN/GFS v1.80.995-4.38.2369.el7 CDS/DA2 - - HWCDN/GFS v1.89.1003-1.3.2692.el7 CDS/AT2 -Hobbit: - - Hobbit bbtest-net/4.3.0-0.beta2 -HTTP Clients (Software Library): - - Apache-HttpClient/4.3.6 (java 1.5) - - axios/0.18.1,gzip(gfe) - - axios/0.19.2 - - axios/0.21.4, XFF:35.228.69.34 - - curl/7.64.1 - - Go 1.1 package http - - go-http-client/1.1 - - go-resty/2.3.0 (https://github.com/go-resty/resty) - - grpc-java-netty/1.28.1 - - grpc-node/1.24.2 grpc-c/8.0.0 (linux; chttp2; ganges) - - grpc-ruby/1.21.0 grpc-c/7.0.0 (linux; chttp2; gandalf) - - http-kit/2.0 - - Jakarta Commons-HttpClient/3.1 - - Java/1.8.0_121 - - libwww-perl/6.04 - - lua-resty-http/0.10 (Lua) ngx_lua/10000 - - LWP Network - - lwp-trivial/5.810 - - Microsoft-CryptoAPI/10.0 - - Mozilla/5.0libwww-perl/5.826 - - node-fetch - - okhttp/3.4.1 - - PHP/5.2.14 - - PostmanRuntime/7.9.1 - - python-requests/2.21.0 - - Python-urllib/2.7 - - RestSharp/106.5.4.0 - - scalaj-http/2.4.2 - - WinHTTP -HTTPMon: - - HTTPMon/1.0b (http://www.httpmon.com) -HubPages: - - HubPages V0.2.2 (http://hubpages.com/help/crawlingpolicy) -HubSpot: - - HubSpot Website Grader (web-crawlers@hubspot.com) -Hydra by addthis: - - Mozilla/4.0 (Hydra) - - Mozilla/5.0 (Hydra Proxy) -ICC-Crawler: - - ICC-Crawler/2.0 (Mozilla-compatible; ; http://www.nict.go.jp/en/univ-com/plan/crawl.html) -IDG/IT: - - IDG/IT (http://spaziodati.eu/) -IFTTT: - - IFTTT-Protocol/v1 -IIS Site Analysis: - - iisai/1.0 (+http://www.iis.net/iisai.html) -inoreader: - - Mozilla/5.0 (compatible; inoreader.com; 2 subscribers) -Interasco: - - InterascoAgent/1.0 -Invision: - - Invision Community 4 -IP-Guide Crawler: - - IP-Guide.com Crawler/1.0 (https://ip-guide.com) -IPS Agent: - - Mozilla/5.0 (compatible; ips-agent) -JetBrains Omea Reader: - - JetBrains Omea Reader 2.2 (http://www.jetbrains.com/omea/reader/) -Jorgee Vulnerability Scanner: - - Mozilla/5.0 Jorgee -jsjcw_scanner: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 jsjcw_scanner -Kagi: - - Mozilla/5.0 (compatible; Kagibot/1.0; +https://kagi.com/bot) -Kaspersky: - - Kaspersky Lab CFR link resolver cfradmins@kaspersky.com -keycdn: - - keycdn-tools/br - - keycdn-tools/h2 - - keycdn-tools/perf -Knowledge AI: - - The Knowledge AI -Kuberneters: - - applicationhealthservice/1.0 - - kube-probe/1.15 -l9tcpid (github.com/LeakIX/l9tcpid): - - l9tcpid/v1.1.0 -Larbin web crawler: - - larbin_2.6.3 larbin2.6.3@unspecified.mail -LCC: - - LCC (+http://corpora.informatik.uni-leipzig.de/crawler_faq.html) -Let's Encrypt Validation: - - Mozilla/5.0 (compatible; Let's Encrypt validation server; +https://www.letsencrypt.org) -Liferea: - - Liferea/1.10.6 (Linux; en_US.UTF8; http://liferea.sf.net/) AppleWebKit (KHTML, like Gecko) -Lighthouse: - - Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Mobile Safari/537.36 Chrome-Lighthouse -Linespider: - - Mozilla/5.0 (compatible; linespider/1.1; +https://lin.ee/4dwxkth) -Linkdex Bot: - - Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; linkdexbot-mobile/2.1; +http://www.linkdex.com/about/bots/) -Linkedin: - - linkedinbot/1.0 (compatible; mozilla/5.0; apache-httpclient +http://www.linkedin.com) -LinkedIn Bot: - - LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com) -LTX71: - - ltx71 - (http://ltx71.com/) -ltx71: - - ltx71 - (http://ltx71.com/) -M2E Pro Cron Service: - - M2E Pro Cron Service/1.0 -Magpie-Crawler: - - magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net) -Mail.Ru Bot: - - Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/Fast/2.0; +http://go.mail.ru/help/robots) -masscan: - - masscan/1.0 (https://github.com/robertdavidgraham/masscan) -Mastodon Bot: - - http.rb/3.2.0 (Mastodon/2.4.3; +https://uwu.social/) -Meanpath Bot: - - Mozilla/5.0 (compatible; meanpathbot/1.0; +http://www.meanpath.com/meanpathbot.html) -MediaHubMX: - - MediaHubMX/2 -Medusa by pymedusa: - - python/unicode Medusa/0.2.14 (Windows; 10; 3174120f-3388-11e9-b8bd-1c1b0d9d2a41) - - python/unicode Medusa/0.2.14 (Windows; 10; d26d37e1-3389-11e9-8d14-1c1b0d9d2a41) -MetaInspector: - - MetaInspector/5.4.0 (+https://github.com/jaimeiniesta/metainspector) -MetaJobBot: - - Mozilla/5.0 (compatible; MetaJobBot; http://www.metajob.de/crawler) -Mixnode: - - Mozilla/5.0 (Mixnode) AppleWebKit/537.36 (KHTML, like Gecko) -Mixrank Bot: - - Mozilla/5.0 (compatible; MixrankBot; crawler@mixrank.com) -MJ12 Bot: - - Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+) -Mnogosearch: - - Mnogosearch-3.1.21 -MojeekBot: - - Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html) -Monit: - - Monit/5.23.0 -Monitor.Us: - - Mozilla/5.0 (compatible; www.monitor.us - free monitoring service; http://www.monitor.us) -Morningscore: - - Mozilla/5.0 (Morningscore/1.0) -Moz: - - Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com) - - rogerbot/1.2 (https://moz.com/help/guides/moz-procedures/what-is-rogerbot, rogerbot-crawler+aardwolf-crawler-45@moz.com) -Munin: - - munin/http_loadtime -Nagios check_http: - - check_http/v1.5 (nagios-plugins 1.5) -NalezenCzBot: - - NalezenCzBot/1.0 (http://www.nalezen.cz/about-crawler) -nbertaupete95: - - Mozilla/5.0/Firefox/42.0 - nbertaupete95(at)gmail.com -Netcraft Survey Bot: - - Netcraft SSL Server Survey - contact info@netcraft.com -NetLyzer FastProbe: - - NetLyzer FastProbe -NetNewsWire: - - NetNewsWire/4.0.0 (Mac OS X; http://netnewswireapp.com/mac/; gzip-happy) -Netvibes: - - "Netvibes (http://www.netvibes.com/; 8 subscribers; feedID: 2244192)" -Newsbeuter: - - newsbeuter/2.4 (Linux 3.2.0-23-generic; i686; http://www.newsbeuter.org/) libcurl/7.22.0 GnuTLS/2.12.14 zlib/1.2.3.4 libidn/1.23 librtmp/2.3 -NewsBlur: - - NewsBlur Page Fetcher - 7 subscribers - http://www.newsblur.com/site/3966817/analytics-piwik (Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3) - - NewsBlur/4.0.1 CFNetwork/672.1.13 Darwin/14.0.0 -NewsBlur Mobile App: - - NewsBlur iPad App v3.6 -NewsGator: - - NewsGatorOnline/2.0 (http://www.newsgator.com; 2 subscribers) -Newspaper: - - newspaper/0.0.8 - - newspaper/0.2.8 -Ngios Monitor: - - check_http/v2.2.1 (nagios-plugins 2.2.1) -Nibbler (nibbler.silktide.com): - - nibbler -NLCrawler: - - nlcrawler/1.0 (+http://northernlight.com/) -Nmap: - - Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html) -Nodejs: - - undici -Nuclei: - - Nuclei - Open-source project (github.com/projectdiscovery/nuclei) -Nutch-based Bot: - - your sipder name/Nutch-1.7 -Nuzzel: - - Nuzzel -oBot: - - Mozilla/5.0 (compatible; oBot/2.3.1; http://www.xforce-security.com/crawler/) -Octopus: - - Octopus 1.0.2 -Off By One: - - Mozilla/4.7 (compatible; OffByOne; Windows 2000) Webster Pro V3.4 -Omgili bot: - - omgilibot/0.3 +http://www.omgili.com/Crawler.html -Openindex Spider: - - Mozilla/5.0 (compatible; OpenindexSpider; +http://www.openindex.io/en/webmasters/spider.html) -OpenLinkProfiler: - - Mozilla/5.0 (compatible; spbot/4.0.9; +http://OpenLinkProfiler.org/bot ) -OpenWebSpider: - - OpenWebSpider v0.1.4 (http://www.openwebspider.org/) -Orange Bot: - - Mozilla/5.0 (Windows; U; Windows NT 5.1;fr;rv:1.8.1) VoilaBotCollector BETA 0.1 (http://www.voila.com/) -Other Site Monitor Bots: - - LogicMonitor SiteMonitor/1.0 -Outbrain: - - Mozilla/5.0 (Java) outbrain -Owler: - - Owler (ows.eu/owler) -Pageburst: - - Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko; compatible; pageburst) CriOS/79.0.3945.117 Mobile/13B143 Safari/601.1.46 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; compatible; pageburst) Chrome/109.0.5414.101 Safari/537.36 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; compatible; pageburst) Chrome/111.0.5563.146 Safari/537.36 -PaperLiBot: - - Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li) -PDRL: - - pdrl.fm Analyzer / 1.0.0 -PerimeterX: - - PerimeterX Integration Services -PetalBot: - - Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot) -Phantomas: - - phantomas/1.11.0 (PhantomJS/1.9.8; linux x64) -PhantomJS: - - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1 -PHP Server Monitor: - - Mozilla/5.0 (compatible; phpservermon/3.1.1; +http://www.phpservermonitor.org) -Picsearch bot: - - psbot/0.1 (+http://www.picsearch.com/bot.html) -Pingdom: - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/) -Pingdom Bot: - - Pingdom.com_bot_version_1.4_(http://www.pingdom.com/) -Pinterest: - - Mozilla/5.0 (compatible; Pinterestbot/1.0; http://www.pinterest.com/bot.html) -Placid.app: - - placid.app/v1 -PocketParser: - - PocketParser/2.0 (+https://getpocket.com/pocketparser_ua) -Postman: - - PostmanRuntime/7.29.0 -PritTorrent: - - PritTorrent/1.0 -Prometheus: - - check_http/v2.1.2 (monitoring-plugins 2.1.2) - - prometheus/2.18.1 -proximic: - - Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php) -Pulp: - - Pulp/1.5.2 (iPad; http://www.acrylicapps.com/pulp/) -QuerySeekerSpider: - - QuerySeekerSpider ( http://queryseeker.com/bot.html ) -Quora Link Preview: - - Quora Link Preview/1.0 (http://www.quora.com) -Qwantify: - - Mozilla/5.0 (compatible; Qwantify/2.2w; +https://www.qwant.com/)/* -Rainmeter: - - Rainmeter WebParser plugin -RamblerMail Image Proxy: - - RamblerMail/6.0 (incompatible; ImageProxy/6.0) -ReadKit: - - ReadKit/7017 CFNetwork/673.2.1 Darwin/13.1.0 (x86_64) (MacBookPro10%2C1) -RebelMouse: - - RebelMouse/0.1 Mozilla/5.0 (compatible; http://rebelmouse.com) Gecko/20100101 Firefox/7.0.1 -Reddit Bot: - - Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback) -Reeder: - - Reeder/3.2 CFNetwork/672.1.12 Darwin/14.0.0 -Request-Promise: - - Request-Promise -Rest Client github.com/rest-client/rest-client: - - rest-client/2.1.0 - - rest-client/2.1.0 (linux x86_64) ruby/2.7.6p219 -Rigor Synythetic Monitoring: - - Mozilla/5.0 (X11; Linux x86_64; Rigor) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36 - - Mozilla/5.0 (X11; Linux x86_64; rv:45.0; Rigor) Gecko/20100101 Firefox/45.0 -Rogerbot: - - rogerbot/1.0 (http://www.moz.com/dp/rogerbot, rogerbot-crawler@moz.com) -ROI Hunter: - - ROI Hunter; https://api-dev.roihunter.com -RSS Feed Readers: - - Bloglines/3.1 (http://www.bloglines.com) - - Feed Wrangler/1.0 (3 subscribers; feed-id=248559; http://feedwrangler.net; Allow like Gecko) - - Feedbin - 9 subscribers - - FeedBurner/1.0 (http://www.FeedBurner.com) - - FeedDemon/4.5 (http://www.feeddemon.com/; Microsoft Windows XP) - - FeeddlerRSS 2.4 (iPad; iPhone OS 5.1.1; en_US) - - Feedspot http://www.feedspot.com - - Fever/1.38 (Feed Parser; http://feedafever.com; Allow like Gecko) - - FreshRSS/1.12.0 (Linux; https://freshrss.org) - - kouio.com RSS reader - 6 subscribers - - MagpieRSS/0.72 (+http://magpierss.sf.net) - - MetaFeedly/1.0 (http://www.feedly.com) - - Mozilla/5.0 (feeder.co; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36 - - Mozilla/5.0 (Windows NT 6.1) AppleWebKit/602.1 (KHTML, like Gecko) QuiteRSS/0.18.12 Safari/602.1 - - RSS Junkie Daemon - - RssBandit/1.9.0.1002 (.NET CLR 2.0.50727.7512; WinNT 6.2.9200.0; http://www.rssbandit.org) - - RSSOwl/2.2.1.201312301316 (X11; U; en) - - RSSRadio (Push Notification Scanner;support@dorada.co.uk) - - Tiny Tiny RSS/1.11.4c63934 (http://tt-rss.org/) -RuxitSynthetic: - - Chrome/80.0.3987.87 Safari/537.36 RuxitSynthetic/1.0 v6191106029376332690 t7889551165227354132 - - Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1 RuxitSynthetic/1.0 -SafeDNSBot: - - SafeDNSBot (https://www.safedns.com/searchbot) -Scrapy: - - Scrapy/1.0.3.post6+g2d688cd (+http://scrapy.org) -Screaming Frog SEO Spider: - - Screaming Frog SEO Spider/2.22 -Screeenly: - - screeenly-bot 2.0 -ScreenerBot: - - ScreenerBot Crawler Beta 2.0 (+http://www.ScreenerBot.com) -Seekport: - - Mozilla/5.0 (compatible; Seekport Crawler; http://seekport.com/) -Semrush Bot: - - Mozilla/5.0 (compatible; SemrushBot/0.97; +http://www.semrush.com/bot.html) -Sensika Bot: - - SensikaBot/x.33 (+http://sensika.com) -Sentry Bot: - - sentry/8.6.0 (https://getsentry.com) -SEOENGBot: - - SEOENGWorldBot/1.0 (+http://www.seoengine.com/seoengbot.htm) -SEOkicks-Robot: - - Mozilla/5.0 (compatible; SEOkicks-Robot; +http://www.seokicks.de/robot.html) -Seoscanners.net: - - Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net) -Server Density: - - Server Density Service Monitoring v2 -Seznam Bot: - - SeznamBot/3.0 (+http://fulltext.sblog.cz/) -Seznam Email Proxy: - - Mozilla/5.0 SeznamEmailProxy/2.0.174 -Seznam Zbozi.cz: - - Seznam-Zbozi-robot/3.0 -SeznamBot: - - Mozilla/5.0 (compatible; SeznamBot/4.0-RC1 +http://napoveda.seznam.cz/seznambot-intro/) -Shared Web Credentials: - - swcd (unknown version) CFNetwork/1128.0.1 Darwin/19.6.0 -ShopAlike: - - Mozilla/5.0 (ShopAlike; LadenZeile) FeedBot -Shopify Partner: - - shopify-partner-homepage-scraper -ShopWiki: - - ShopWiki/1.0 ( +http://www.shopwiki.com/wiki/Help:Bot) -SilverReader: - - SilverReader/1.0; http://silverreader.com -SimplePie: - - SimplePie/1.3.1 (Feed Parser; http://simplepie.org; Allow like Gecko) Build/20121030175911 -Sistrix: - - Mozilla/5.0 (compatible; Optimizer) - - Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4143.7 Mobile Safari/537.36 Chrome-Lighthouse - - Python-urllib/3.7 -SISTRIX Crawler: - - Mozilla/5.0 (compatible; SISTRIX Crawler; http://crawler.sistrix.net/) -SISTRIX Optimizer: - - Mozilla/5.0 (compatible; SISTRIX Optimizer; Uptime; +https://www.sistrix.com/faq/uptime) -Site24x7 Website Monitoring: - - Site24x7 -Siteimprove: - - Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) LinkCheck by Siteimprove.com -SiteScoreBot: - - SiteScoreBot v20210315 - https://sitescore.ai -SiteSucker: - - SiteSucker for macOS/2.10.5 -Sixy.ch: - - sixy.ch/1.0 -Skype: - - Mozilla/5.0 (Windows nt 6.1; wow64) SkypeURIPreview Preview/0.5 -Skype URI Preview: - - Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5 -Slackbot: - - Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots) -SMTBot: - - Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot) -Snapchat Proxy: - - "AppEngine-Google; (+http://code.google.com/appengine; appid: s~snapchat-proxy)" -Sogou: - - Mozilla/5.0 (Linux; Android 9; NX569J Build/PQ3A.190505.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.106 Mobile Safari/537.36 AWP/2.0 SogouMSE,SogouMobileBrowser/5.21.8 - - Sogou head spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07) - - Sogou Orion spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07) - - Sogou Pic Spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07) - - Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07) - - Sogou-Test-Spider/4.0 (compatible; MSIE 5.5; Windows 98) -Sogou Spider: - - Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm -Soso Spider: - - Sosospider+(+http://help.soso.com/webspider.htm) -Sparkler: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Sparkler/0.2.0-SNAPSHOT -Speedcurve: - - Mozilla/5.0 (iPhone; CPU iPhone OS 15_0_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1 PTST/SpeedCurve/230120.120134 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 PTST/SpeedCurve/230120.120134 -Spinn3r: - - Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.19; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/2010040121 Firefox/3.0.19 -Splash: - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) splash Version/10.0 Chrome/98.0.4758.80 Safari/537.36 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/602.1 (KHTML, like Gecko) splash Version/10.0 Safari/602.1 - - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/602.1 (KHTML, like Gecko) splash Version/9.0 Safari/602.1 -Spotify: - - Spotify/1.0 -Sputnik Bot: - - Mozilla/5.0 (compatible; SputnikImageBot/2.2) -sqlmap: - - sqlmap/1.1.8.2#dev (http://sqlmap.org) -SSL Labs: - - SSL Labs (https://www.ssllabs.com/about/assessment.html) -Startpagina Linkchecker: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/536.30.1 (KHTML, like Gecko) (compatible; Startpagina-Linkchecker/1.0; +https://www.startpagina.nl/linkchecker) -StatusCake: - - Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/98 Safari/537.4 (StatusCake) -Steam: - - Valve/Steam HTTP Client 1.0 -Superfeedr Bot: - - "Superfeedr bot/2.0 http://superfeedr.com - Make your feeds realtime: get in touch!" -Survey Bot: - - Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.13) Gecko/2009073022 Firefox/3.5.2 (.NET CLR 3.5.30729) SurveyBot/2.3 (DomainTools) -SWCD: - - SWCD (Unknown Version) CFNetwork/1107.1 Darwin/19.0.0 -SyntheticsAgent: - - Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/605.36 (KHTML, like Gecko) Chrome/107.0.4183.121 Mobile Safari/537.36 SyntheticsAgent/1670084315573 -TagInspector: - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) TagInspector/500.1 Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42 -Tarmot Gezgin: - - Tarmot Gezgin/1.0 (compatible; TarmotGezgin/1.1; +http://www.tarmot.com/gezgin) -Telegram: - - TelegramBot (like TwitterBot) -TelegramBot: - - TelegramBot (like TwitterBot) -The Knowledge AI: - - The Knowledge AI -theoldreader: - - Mozilla/5.0 (compatible; theoldreader.com; 1 subscribers; feed-id=aaa) -Thumbor: - - Thumbor/6.7.5, Mozilla/5.0 (Linux; Android 9; RAZER Phone Build/NMF26F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 AlohaBrowser/2.13.1 -TinEye Crawler: - - TinEye-bot/0.02 (see http://www.tineye.com/crawler.html) -TLSProbe: - - TLSProbe/1.0 (+https://scan.trustnet.venafi.com/) -ToolBot: - - "SEO Consulting; Redirect Checker Tool V.02; IP:" -TraceMyFile: - - Mozilla/5.0 (compatible; tracemyfile/1.0) -Trackable: - - Trackable/0.1 NNjCeA -Trendiction Bot: - - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11 -TurnitinBot: - - TurnitinBot/3.0 (http://www.turnitin.com/robot/crawlerinfo.html) -TweetedTimes Bot: - - TweetedTimes Bot/1.0 (Mozilla/5.0 Compatible, +http://tweetedtimes.com) -Tweetmeme Bot: - - Mozilla/5.0 (compatible; TweetmemeBot/3.0; +http://tweetmeme.com/) -Twingly Recon: - - Mozilla/5.0 (compatible; Twingly Recon; twingly.com) -Twitterbot: - - Twitterbot/1.0 -UkrNet Mail Proxy: - - Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 (via secureurl.fwdcdn.com - mail.ukr.net proxy) -UniversalFeedParser: - - UniversalFeedParser/5.2.1 +https://code.google.com/p/feedparser/ -updown.io monitoring: - - updown.io daemon 2.2 -Uptime Robot: - - Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/) -Uptimebot: - - Mozilla/5.0 (compatible; Uptimebot/1.0; +http://www.uptime.com/uptimebot) -URLAppendBot: - - Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html) -Vagabondo: - - Mozilla/4.0 (compatible; Vagabondo/4.0; http://webagent.wise-guys.nl/; http://www.wise-guys.nl/) -Var: - - Anonymous, Mozilla/5.0 (Linux; Android 11; CPH2211) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.166 Mobile Safari/537.36 OPR/65.2.3381.61420 - - Anonymous, Mozilla/5.0 (Linux; Android 9; RAZER Phone Build/NMF26F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 AlohaBrowser/2.13.1 - - Anonymous, Mozilla/5.0 (X11; Linux Mint x86_64) AppleWebKit/537.37 (KHTML, like Gecko) Chrome/50.0.2272.105 Safari/537.37 Vivaldi/1.5.201.2 -Visual Site Mapper Crawler: - - Mozilla/5.0 (compatible; VSMCrawler; http://www.visualsitemapper.com/crawler/) -VK Share Button: - - Mozilla/5.0 (compatible; vkShare; +http://vk.com/dev/Share) -W3C CSS Validator: - - Jigsaw/2.3.0 W3C_CSS_Validator_JFouffa/2.0 (See ) -W3C I18N Checker: - - W3C_I18n-Checker/1.0 (http://validator.w3.org/services) -W3C Link Checker: - - W3C-checklink/4.81 libwww-perl/5.836 -W3C Markup Validation Service: - - W3C_Validator/1.767 -W3C MobileOK Checker: - - W3C-mobileOK/DDC-1.0 (see http://www.w3.org/2006/07/mobileok-ddc) -W3C Unified Validator: - - W3C_Unicorn/1.0 (http://validator.w3.org/services) -Wappalyzer: - - Mozilla/5.0 (compatible; Wappalyzer; +https://github.com/AliasIO/Wappalyzer) -WebbCrawler: - - WebbCrawler 1.0 ( http://badcheese.com/crawler.html ) -WebGazer: - - WebGazer/1.0 (+https://www.webgazer.io) -Weborama: - - weborama-fetcher (+http://www.weborama.com) -WebPageTest: - - Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4) Build/MPJ24.139-64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Mobile Safari/537.36 PTST/180829.190838 -WebPurify: - - WebPurify(callback) -WebSitePulse: - - websitepulse checker/1.1 (compatible; MSIE 5.5; Netscape 4.75; Linux) -Wechat Dev Tools: - - Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.12(0x17000c2f) NetType/4G Language/zh_CN wechatdevtools qcloudcdn-xinan - - Mozilla/5.0 (Linux; Android 0; MIX 2S Build/QKQ1.190828.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045129 Mobile Safari/537.36 MMWEBID/5228 MicroMessenger/7.0.14.1660(0x27000E37) Process/tools NetType/WIFI Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan - - Mozilla/5.0 (Linux; Android 11; FIG-AL10 Build/HUAWEIFIG-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045223 Mobile Safari/537.36 MMWEBID/1214 MicroMessenger/7.0.14.1660(0x27000E39) Process/tools NetType/WIFI Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan - - Mozilla/5.0 (Linux; Android 7.1.2; M6 Note Build/N2G47H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045223 Mobile Safari/537.36 MMWEBID/9551 MicroMessenger/7.0.14.1660(0x27000E37) Process/tools NetType/4G Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan -WeSEE:Search: - - WeSEE:Search/0.1 (Alpha, http://www.wesee.com/en/support/bot/) -WhatsApp: - - WhatsApp/2.21.12.21 A - - whatsapp/2.2123.8 n -WikiDo: - - WikiDo/1.1 (http://wikido.com; crawler@wikido.com) -Wininet-APIs https://web.archive.org/web/20100715071639/http://blogs.msdn.com/b/jpsanders/archive/2009/04/17/how-to-get-certificate-information-using-wininet-apis.aspx: - - Test Certificate Info -WooRank: - - Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1 (compatible; woorankreview/2.0; +https://www.woorank.com/) -Woorank Test Tool: - - Mozilla/5.0 (compatible; woorankreview/2.0; +https://www.woorank.com/) - - Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1 (compatible; woorankreview/2.0; +https://www.woorank.com/) - - Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1 (compatible; woorankreview/2.0; +https://www.woorank.com/) - - Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4420.0 Mobile Safari/537.36 Chrome-Lighthouse (compatible; woorankreview/2.0; +https://www.woorank.com/) -WordPress: - - WordPress/4.7.2; https://example.com -Wotbox: - - Wotbox/2.01 (+http://www.wotbox.com/bot/) -XaxisSemanticsClassifier: - - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36 XaxisSemanticsClassifier/1.0 -XenForo: - - XenForo/2.x (https://www.example.com) -Xymon Site Monitor: - - Xymon xymonnet/4.3.17 -YaCy: - - yacybot (freeworld/global; x86 Windows XP 5.1; java 1.7.0_21; GMT+04:00/ru) http://yacy.net/bot.html -Yahoo Ad monitoring: - - Mozilla/5.0 (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html) tands-prod-eng.hlfs-prod---sieve.hlfs-rest_client/1624415525-0 -Yahoo Gemini: - - Mozilla/5.0 (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html) -Yahoo! Japan BRW: - - Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716) -Yahoo! Link Preview: - - Yahoo:LinkExpander:Slingstone -Yahoo! Slurp: - - Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) -Yandex: - - Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) -Yandex Bot: - - Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel) - - Mozilla/5.0 (compatible; YandexVerticals/1.0; http://yandex.com/bots) -Yeti (Naver): - - Mozilla/5.0 (compatible; yeti/1.1; +http://naver.me/spd) - - Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.0 Safari/537.36 (compatible; Yeti/1.1; +http://naver.me/spd) - - Yeti/1.1 (Naver Corp.; http://help.naver.com/robots/) -Youdao Bot: - - Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; ) -Yourls: - - YOURLS v1.5.1 +http://yourls.org/ (running on http://fhort.com) -Yunyun Bot: - - Mozilla/5.0 (compatible; YYSpider; +http://www.yunyun.com/spider.html) -Zao: - - Zao/0.1 (http://www.kototol.org/zao) -Ze List: - - zelist.ro feed parser (+http://www.zelist.ro) -Zendesk: - - Zendesk Webhook -Zookabot: - - Zookabot/2.5;++http://zookabot.com -Zoom Webhook: - - Zoom Marketplace/1.0a -ZoomBot (seozoom.it): - - zoombot (linkbot 1.0 http://suite.seozoom.it/bot.html) -ZumBot: - - ZumBot/1.0 (ZUM Search; http://help.zum.com/inquiry) -ZZZ Miscellaneous Glitches and Errornous User Agent Strings: - - "123" - - - - Chrome - - default_user_agent - - ipad - - iphone 6 plus;afengineurl=https://intoli.com:443;traceId=63028f8e-c5fc-4846-993f-59a96268a85d - - Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/) - - Mozilla/5.0 (Linux; Android 10; FIG-AL10 Build/HUAWEIFIG-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045223 Mobile Safari/537.36 MMWEBID/1214 MicroMessenger/7.0.14.1660(0x27000E39) Process/tools NetType/4G Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan - - Mozilla/5.0 (Linux; Android 10; M6 Note Build/N2G47H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045223 Mobile Safari/537.36 MMWEBID/9551 MicroMessenger/7.0.14.1660(0x27000E37) Process/tools NetType/4G Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan - - pisya - - "POST /parser Host: user-agents.net action=parse&format=[json|xml]&string=Mozilla%2F5.0%20%28Linux%3B%20Android%2012%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F96.0.4664.104%20Mobile%20DuckDuckGo%2F5%20Safari%2F537.36" - - search.marginalia.nu - - U2FsdGVkX1+uKxeH2946/bMTDvtm/Fr0nWjvFR/oPtc64LSh1Gg0qkbJUIhLpSw5h/mjF86TFOXrl4U2SG1KBi4BC0EfphyIzeOxVXkpBWHDMfJnkrFGrubrRGjmJNIN49DKkOcjVgq2/iVDBMSAQe30k9wNIDtflfnlrOrmDPkXiYNjLbohSHLaNWS/GK5hu62gkOH25c9i1B+jMq5kc590HoQqJ0o4es9QrEnwluMsYPbQy14LxgPjeCQveiPHPXtkSM7TmfTY53HEJdbFHylstSOJNTQclbL67BKx33M= +OpenAI GPTBot: + - Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot \ No newline at end of file diff --git a/scripts/build/pattern.js b/scripts/build/pattern.js index 71ca596..d1c0ad2 100755 --- a/scripts/build/pattern.js +++ b/scripts/build/pattern.js @@ -11,7 +11,7 @@ const patterns = JSON.parse( const pattern = new RegExp( patterns - .map((pattern) => pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) + .map((pattern) => pattern.pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) .join("|"), ).source; diff --git a/src/index.ts b/src/index.ts index 2ffaa87..8a1c073 100644 --- a/src/index.ts +++ b/src/index.ts @@ -23,7 +23,7 @@ export function getPattern(): RegExp { /** * A list of bot identifiers to be used in a regular expression against user agent strings. */ -export const list: string[] = patternsList; +export const list: string[] = patternsList.map((pattern) => pattern.pattern); /** * Check if the given user agent includes a bot pattern. Naive implementation (less accurate). diff --git a/src/patterns.json b/src/patterns.json index 8a34d33..2ac2353 100644 --- a/src/patterns.json +++ b/src/patterns.json @@ -1,175 +1,39 @@ [ - " daum[ /]", - " deusu/", - " yadirectfetcher", - "(?:^|[^g])news(?!sapphire)", - "(? list.slice(); +const clone = (): string[] => list.map((p) => p.pattern).slice(); describe("efficiency", () => { describe(`Redundant rules: no rule can be removed. Check each one against ${crawlers.length} user agent strings`, () => { diff --git a/tests/spec/test.ts b/tests/spec/test.ts index 82fcd18..d2ac3b7 100644 --- a/tests/spec/test.ts +++ b/tests/spec/test.ts @@ -14,8 +14,8 @@ import { fullPattern } from "../../src/pattern"; import { crawlers, browsers } from "../../fixtures"; let isaiInstance: any; -const BOT_USER_AGENT_EXAMPLE = - "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; +const AI_USER_AGENT_EXAMPLE = + "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot"; const BROWSER_USER_AGENT_EXAMPLE = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91 Safari/537.36"; @@ -38,30 +38,30 @@ describe("isai", () => { expect(list).toBeInstanceOf(Array); expect(list.every((item) => typeof item === "string")).toBe(true); }); - test("isai: bot user agect string is recognised as bot", () => { - expect(isai(BOT_USER_AGENT_EXAMPLE)).toBe(true); + test("isai: bot user agent string is recognised as an AI", () => { + expect(isai(AI_USER_AGENT_EXAMPLE)).toBe(true); }); test("isaiMatch: find pattern in bot user agent string", () => { - expect(isaiMatch(BOT_USER_AGENT_EXAMPLE)).toBe("Google"); + expect(isaiMatch(AI_USER_AGENT_EXAMPLE)).toBe("Google"); }); test("isaiMatches: find all patterns in bot user agent string", () => { - expect(isaiMatches(BOT_USER_AGENT_EXAMPLE)).toContain("Google"); - expect(isaiMatches(BOT_USER_AGENT_EXAMPLE)).toHaveLength(4); + expect(isaiMatches(AI_USER_AGENT_EXAMPLE)).toContain("Google"); + expect(isaiMatches(AI_USER_AGENT_EXAMPLE)).toHaveLength(4); }); test("isaiPattern: find first pattern in bot user agent string", () => { - expect(isaiPattern(BOT_USER_AGENT_EXAMPLE)).toBe( + expect(isaiPattern(AI_USER_AGENT_EXAMPLE)).toBe( "(? { - expect(isaiPatterns(BOT_USER_AGENT_EXAMPLE)).toContain( + expect(isaiPatterns(AI_USER_AGENT_EXAMPLE)).toContain( "(? { const customisai = createisai(/bot/i); - expect(customisai(BOT_USER_AGENT_EXAMPLE)).toBe(true); + expect(customisai(AI_USER_AGENT_EXAMPLE)).toBe(true); }); test("createisaiFromList: create custom isai function with custom pattern", () => { const ChromeLighthouseUserAgentStrings: string[] = [ @@ -165,7 +165,7 @@ describe("isai", () => { expect(misidentifiedStrings).toEqual([]); expect(successCount).toBe(crawlers.length); }); - test(`✘ ${browsers.length} user agent string should not be recognised as crawler`, () => { + test(`✘ ${browsers.length} user agent string should not be recognised as an AI`, () => { let successCount = 0; let misidentifiedStrings: string[] = []; browsers.forEach((browser) => {