diff --git a/composer.json b/composer.json index 864001e..4ed3638 100644 --- a/composer.json +++ b/composer.json @@ -17,10 +17,5 @@ "lib/phpUserAgent.php", "lib/phpUserAgentStringParser.php" ] - }, - "extra": { - "branch-alias": { - "dev-master": "1.0-dev" - } } } diff --git a/lib/phpUserAgent.php b/lib/phpUserAgent.php index 2e29b0d..61c1d7d 100644 --- a/lib/phpUserAgent.php +++ b/lib/phpUserAgent.php @@ -135,6 +135,31 @@ public function isUnknown() return empty($this->browserName); } + /** + * Returns true if user agent is a known bot/crawler + * + * @return boolean + */ + public function isBot() + { + return in_array($this->getBrowserName(), $this->getKnownBots()); + } + + /** + * Returns an array of strings identifying know bots + * + * @return array + */ + public function getKnownBots() + { + return array( + 'googlebot', + 'msnbot', + 'yahoobot', + 'facebookbot' + ); + } + /** * @return string combined browser name and version */ diff --git a/lib/phpUserAgentStringParser.php b/lib/phpUserAgentStringParser.php index e33548e..e76d3ad 100644 --- a/lib/phpUserAgentStringParser.php +++ b/lib/phpUserAgentStringParser.php @@ -39,7 +39,7 @@ public function parse($userAgentString = null) /** * Detect quickly informations from the user agent string - * + * * @param string $userAgentString user agent string * @return array user agent informations array */ @@ -83,7 +83,7 @@ protected function doParse($userAgentString) // Find operating system $pattern = '#'.join('|', $this->getKnownOperatingSystems()).'#'; - + if (preg_match($pattern, $userAgent['string'], $match)) { if (isset($match[0])) @@ -94,7 +94,7 @@ protected function doParse($userAgentString) // Find engine $pattern = '#'.join('|', $this->getKnownEngines()).'#'; - + if (preg_match($pattern, $userAgent['string'], $match)) { if (isset($match[0])) @@ -143,12 +143,13 @@ public function getFilters() 'filterOperaVersion', 'filterYahoo', 'filterMsie', + 'filterFacebookBot', ); } /** * Add a filter to be called when parsing a user agent - * + * * @param string $filter name of the filter method */ public function addFilter($filter) @@ -174,9 +175,10 @@ protected function getKnownBrowsers() 'gecko', 'chrome', 'googlebot', + 'facebookbot', 'iphone', 'msnbot', - 'applewebkit' + 'applewebkit', ); } @@ -188,11 +190,12 @@ protected function getKnownBrowsers() protected function getKnownBrowserAliases() { return array( - 'shiretoko' => 'firefox', - 'namoroka' => 'firefox', - 'shredder' => 'firefox', - 'minefield' => 'firefox', - 'granparadiso' => 'firefox' + 'shiretoko' => 'firefox', + 'namoroka' => 'firefox', + 'shredder' => 'firefox', + 'minefield' => 'firefox', + 'granparadiso' => 'firefox', + 'facebookexternalhit' => 'facebookbot', ); } @@ -204,12 +207,30 @@ protected function getKnownBrowserAliases() protected function getKnownOperatingSystems() { return array( - 'windows', - 'macintosh', - 'linux', - 'freebsd', - 'unix', - 'iphone' + + 'Windows 8', + 'Windows 7', + 'Windows Vista', + 'Windows Server 2003/XP x64', + 'Windows XP', + 'Windows XP', + 'Windows 2000', + 'Windows ME', + 'Windows 98', + 'Windows 95', + 'Windows 3.11', + 'Mac OS X', + 'Mac OS 9', + 'Macintosh', + 'Ubuntu', + 'iPhone', + 'iPod', + 'iPad', + 'Android', + 'BlackBerry', + 'Mobile', + 'Linux', + ); } @@ -220,7 +241,29 @@ protected function getKnownOperatingSystems() */ protected function getKnownOperatingSystemAliases() { - return array(); + return array( + 'windows nt 6.2' => 'Windows 8', + 'windows nt 6.1' => 'Windows 7', + 'windows nt 6.0' => 'Windows Vista', + 'windows nt 5.2' => 'Windows Server 2003/XP x64', + 'windows nt 5.1' => 'Windows XP', + 'windows xp' => 'Windows XP', + 'windows nt 5.0' => 'Windows 2000', + 'windows me' => 'Windows ME', + 'win98' => 'Windows 98', + 'win95' => 'Windows 95', + 'win16' => 'Windows 3.11', + 'mac os x' => 'Mac OS X', + 'mac_powerpc' => 'Mac OS 9', + 'ubuntu' => 'Ubuntu', + 'iphone' => 'iPhone', + 'ipod' => 'iPod', + 'ipad' => 'iPad', + 'android' => 'Android', + 'blackberry' => 'BlackBerry', + 'webos' => 'Mobile', + 'linux' => 'Linux', + ); } /** @@ -234,7 +277,7 @@ protected function getKnownEngines() 'gecko', 'webkit', 'trident', - 'presto' + 'presto', ); } @@ -299,6 +342,7 @@ protected function filterYahoo(array &$userAgent) /** * MSIE does not always declare its engine + * IE11 hasn't msie in his user agent string */ protected function filterMsie(array &$userAgent) { @@ -306,16 +350,32 @@ protected function filterMsie(array &$userAgent) { $userAgent['engine'] = 'trident'; } + if (empty($userAgent['browser_name']) && ($userAgent['engine'] == 'trident') && strpos($userAgent['string'], 'rv:')) + { + $userAgent['browser_name'] = 'msie'; + $userAgent['browser_version'] = preg_replace('|.+rv:([0-9]+(?:\.[0-9]+)+).+|', '$1', $userAgent['string']); + } } - /** - * Android has a safari like signature - */ - protected function filterAndroid(array &$userAgent) { - if ('safari' === $userAgent['browser_name'] && strpos($userAgent['string'], 'android ')) { - $userAgent['browser_name'] = 'android'; - $userAgent['operating_system'] = 'android'; - $userAgent['browser_version'] = preg_replace('|.+android ([0-9]+(?:\.[0-9]+)+).+|', '$1', $userAgent['string']); - } + /** + * Android has a safari like signature + */ + protected function filterAndroid(array &$userAgent) + { + if ('safari' === $userAgent['browser_name'] && strpos($userAgent['string'], 'Android ')) + { + $userAgent['operating_system'] = preg_replace('|.+(Android [0-9]+(?:\.[0-9]+)+).+|', '$1', $userAgent['string']); + } + } + + /** + * Facebook external hit + */ + protected function filterFacebookBot(array &$userAgent) + { + if ('facebookexternalhit' === $userAgent['browser_name']) + { + $userAgent['browser_name'] = 'facebookbot'; } + } } diff --git a/test/StringParserTest.php b/test/StringParserTest.php index 5906682..b18a59b 100644 --- a/test/StringParserTest.php +++ b/test/StringParserTest.php @@ -6,43 +6,43 @@ $tests = array( // Namoroka Ubuntu 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2pre) Gecko/20100116 Ubuntu/9.10 (karmic) Namoroka/3.6pre' - => array('firefox', '3.6', 'linux', 'gecko'), + => array('firefox', '3.6', 'Linux', 'gecko'), // Namoroka Mac 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100105 Firefox/3.6' - => array('firefox', '3.6', 'macintosh', 'gecko'), + => array('firefox', '3.6', 'Mac OS X', 'gecko'), // Chrome Mac 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; fr-fr) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10' - => array('chrome', '4.0', 'macintosh', 'webkit'), + => array('chrome', '4.0', 'Mac OS X', 'webkit'), //Safari Mac 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; fr-fr) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10' - => array('safari', '4.0', 'macintosh', 'webkit'), + => array('safari', '4.0', 'Mac OS X', 'webkit'), // Opera 9 Windows 'Opera/9.61 (Windows NT 6.0; U; en) Presto/2.1.1' - => array('opera', '9.61', 'windows', 'presto'), + => array('opera', '9.61', 'Windows Vista', 'presto'), // Opera 10 Windows 'Opera/9.80 (Windows NT 5.1; U; en) Presto/2.2.15 Version/10.10' - => array('opera', '10.10', 'windows', 'presto'), + => array('opera', '10.10', 'Windows XP', 'presto'), // Firefox Linux 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.17) Gecko/2010010604 Linux Mint/7 (Gloria) Firefox/3.0.17' - => array('firefox', '3.0', 'linux', 'gecko'), + => array('firefox', '3.0', 'Linux', 'gecko'), // Firefox Windows 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 GTB6 (.NET CLR 3.5.30729)' - => array('firefox', '3.5', 'windows', 'gecko'), + => array('firefox', '3.5', 'Windows 7', 'gecko'), // Firefox OSX 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.8) Gecko/20100202 Firefox/3.5.8' - => array('firefox', '3.5', 'macintosh', 'gecko'), + => array('firefox', '3.5', 'Mac OS X', 'gecko'), // Chrome Linux 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.43 Safari/532.5' - => array('chrome', '4.0', 'linux', 'webkit'), + => array('chrome', '4.0', 'Linux', 'webkit'), // Speedy Spider 'Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)' @@ -50,15 +50,15 @@ // Minefield Mac 'Gecko 20100113Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.3a1pre) Gecko/20100113 Minefield/3.7a1pre' - => array('firefox', '3.7', 'macintosh', 'gecko'), + => array('firefox', '3.7', 'Mac OS X', 'gecko'), // IE7 Windows 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0; GTB6; SLCC1; .NET CLR 2.0.50727; OfficeLiveConnector.1.3; OfficeLivePatch.0.0; .NET CLR 3.5.30729; InfoPath.2; .NET CLR 3.0.30729; MSOffice 12)' - => array('msie', '7.0', 'windows', 'trident'), + => array('msie', '7.0', 'Windows Vista', 'trident'), // IE6 Windows 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; DigExt)' - => array('msie', '6.0', 'windows', 'trident'), + => array('msie', '6.0', 'Windows 2000', 'trident'), // Feedfetcher Google 'Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 2 subscribers; feed-id=6924676383167400434)' @@ -78,31 +78,42 @@ // Iphone 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_1_2 like Mac OS X; de-de) AppleWebKit/528.18 (KHTML, like Gecko) Mobile/7D11' - => array('applewebkit', '528.18', 'iphone', 'webkit'), + => array('applewebkit', '528.18', 'iPhone', 'webkit'), - // Motorola Xoom - 'Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13' => - array('android', '3.0', 'android', 'webkit'), + // Motorola Xoom + 'Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13' + => array('safari', '4.0', 'Android 3.0', 'webkit'), - // Samsung Galaxy Tab - 'Mozilla/5.0 (Linux U Android 2.2 es-es GT-P1000 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1' => - array('android', '2.2', 'android', 'webkit'), + // Samsung Galaxy Tab + 'Mozilla/5.0 (Linux U Android 2.2 es-es GT-P1000 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1' + => array('safari', '4.0', 'Android 2.2', 'webkit'), - // Google Nexus - 'Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1' => - array('android', '2.2', 'android', 'webkit'), + // Google Nexus + 'Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1' + => array('safari', '4.0', 'Android 2.2', 'webkit'), - // HTC Desire + // HTC Desire + 'Mozilla/5.0 (Linux; U; Android 2.1-update1; de-de; HTC Desire 1.19.161.5 Build/ERE27) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17' + => array('safari', '4.0', 'Android 2.1', 'webkit'), - 'Mozilla/5.0 (Linux; U; Android 2.1-update1; de-de; HTC Desire 1.19.161.5 Build/ERE27) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17' => - array('android', '2.1', 'android', 'webkit'), + 'Mozilla/5.0 (Linux; U; Android 2.3.6; ru-ru; GT-B5512 Build/GINGERBREAD) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1' + => array('safari', '4.0', 'Android 2.3.6', 'webkit'), - 'Mozilla/5.0 (Linux; U; Android 2.3.6; ru-ru; GT-B5512 Build/GINGERBREAD) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1' => - array('android', '2.3.6', 'android', 'webkit'), + // Nexus 7 + 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19' + => array('chrome', '18.0', 'Android 4.1.1', 'webkit'), - // Nexus 7 - 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19' => - array('android', '4.1.1', 'android', 'webkit'), + // Ipad + 'Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329 Safari/8536.25' + => array('safari', '6.0', 'iPad', 'webkit'), + + // Facebook + 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + => array('facebookbot', '1.1', null, null), + + // IE 11 + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' + => array('msie', '11.0', 'Windows 7', 'trident'), ); $t = new lime_test(count($tests)); @@ -120,6 +131,6 @@ ); $result = $parser->parse($userAgentString); - + $t->is_deeply($result, $expected, $userAgentString.' -> '.implode(', ', $result)); } diff --git a/test/UserAgentTest.php b/test/UserAgentTest.php index c92c275..feec580 100644 --- a/test/UserAgentTest.php +++ b/test/UserAgentTest.php @@ -13,7 +13,7 @@ $t->is($userAgent->getBrowserVersion(), '3.6', '$userAgent->getBrowserVersion() works'); -$t->is($userAgent->getOperatingSystem(), 'linux', '$userAgent->getOperatingSystem() works'); +$t->is($userAgent->getOperatingSystem(), 'Linux', '$userAgent->getOperatingSystem() works'); $t->is($userAgent->getEngine(), 'gecko', '$userAgent->getEngine() works');