Diff
8 years ago
dashboard
7 years ago
rest-api
7 years ago
.htaccess
7 years ago
Diff.php
14 years ago
GeoLite2-Country.mmdb
7 years ago
IPTraf.php
8 years ago
IPTrafList.php
7 years ago
WFLSPHP52Compatability.php
7 years ago
compat.php
8 years ago
conntest.php
7 years ago
cronview.php
8 years ago
dbview.php
8 years ago
diffResult.php
8 years ago
email_genericAlert.php
7 years ago
email_newIssues.php
7 years ago
email_unlockRequest.php
8 years ago
email_unsubscribeRequest.php
7 years ago
flags.php
7 years ago
live_activity.php
8 years ago
menu_dashboard.php
7 years ago
menu_dashboard_options.php
7 years ago
menu_firewall.php
7 years ago
menu_firewall_blocking.php
7 years ago
menu_firewall_blocking_options.php
8 years ago
menu_firewall_waf.php
7 years ago
menu_firewall_waf_options.php
7 years ago
menu_options.php
7 years ago
menu_scanner.php
7 years ago
menu_scanner_credentials.php
8 years ago
menu_scanner_options.php
8 years ago
menu_support.php
7 years ago
menu_tools.php
7 years ago
menu_tools_diagnostic.php
7 years ago
menu_tools_importExport.php
7 years ago
menu_tools_livetraffic.php
7 years ago
menu_tools_twoFactor.php
7 years ago
menu_tools_whois.php
8 years ago
menu_wordfence_central.php
7 years ago
noc1.key
7 years ago
sysinfo.php
8 years ago
unknownFiles.php
8 years ago
viewFullActivityLog.php
8 years ago
wf503.php
7 years ago
wfAPI.php
7 years ago
wfActivityReport.php
7 years ago
wfAdminNoticeQueue.php
8 years ago
wfArray.php
7 years ago
wfBrowscap.php
8 years ago
wfBrowscapCache.php
7 years ago
wfBulkCountries.php
7 years ago
wfCache.php
9 years ago
wfCentralAPI.php
7 years ago
wfConfig.php
7 years ago
wfCrawl.php
8 years ago
wfCredentialsController.php
7 years ago
wfCrypt.php
7 years ago
wfDB.php
7 years ago
wfDashboard.php
7 years ago
wfDateLocalization.php
8 years ago
wfDiagnostic.php
7 years ago
wfDict.php
8 years ago
wfDirectoryIterator.php
7 years ago
wfHelperBin.php
11 years ago
wfHelperString.php
11 years ago
wfIPWhitelist.php
7 years ago
wfImportExportController.php
7 years ago
wfIssues.php
7 years ago
wfJWT.php
7 years ago
wfLockedOut.php
7 years ago
wfLog.php
7 years ago
wfMD5BloomFilter.php
8 years ago
wfModuleController.php
7 years ago
wfNotification.php
8 years ago
wfOnboardingController.php
7 years ago
wfPersistenceController.php
8 years ago
wfRESTAPI.php
7 years ago
wfScan.php
7 years ago
wfScanEngine.php
7 years ago
wfSchema.php
7 years ago
wfStyle.php
7 years ago
wfSupportController.php
7 years ago
wfUnlockMsg.php
7 years ago
wfUpdateCheck.php
8 years ago
wfUtils.php
7 years ago
wfVersionCheckController.php
8 years ago
wfView.php
10 years ago
wfViewResult.php
8 years ago
wordfenceClass.php
7 years ago
wordfenceConstants.php
7 years ago
wordfenceHash.php
7 years ago
wordfenceScanner.php
7 years ago
wordfenceURLHoover.php
7 years ago
wfCrawl.php
188 lines
| 1 | <?php |
| 2 | require_once('wfUtils.php'); |
| 3 | class wfCrawl { |
| 4 | const GOOGLE_BOT_VERIFIED = 'verified'; |
| 5 | const GOOGLE_BOT_FAKE = 'fakeBot'; |
| 6 | const GOOGLE_BOT_UNDETERMINED = 'undetermined'; |
| 7 | |
| 8 | public static function isCrawler($UA){ |
| 9 | $browscap = new wfBrowscap(); |
| 10 | $b = $browscap->getBrowser($UA); |
| 11 | if (!$b || $b['Parent'] == 'DefaultProperties') { |
| 12 | $IP = wfUtils::getIP(); |
| 13 | return !wfLog::isHumanRequest($IP, $UA); |
| 14 | } |
| 15 | else if (isset($b['Crawler']) && $b['Crawler']) { |
| 16 | return true; |
| 17 | } |
| 18 | |
| 19 | return false; |
| 20 | } |
| 21 | public static function verifyCrawlerPTR($hostPattern, $IP){ |
| 22 | $table = wfDB::networkTable('wfCrawlers'); |
| 23 | $db = new wfDB(); |
| 24 | $IPn = wfUtils::inet_pton($IP); |
| 25 | $status = $db->querySingle("select status from $table where IP=%s and patternSig=UNHEX(MD5('%s')) and lastUpdate > unix_timestamp() - %d", $IPn, $hostPattern, WORDFENCE_CRAWLER_VERIFY_CACHE_TIME); |
| 26 | if($status){ |
| 27 | if($status == 'verified'){ |
| 28 | return true; |
| 29 | } else { |
| 30 | return false; |
| 31 | } |
| 32 | } |
| 33 | $host = wfUtils::reverseLookup($IP); |
| 34 | if(! $host){ |
| 35 | $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'noPTR', '', 'noPTR', ''); |
| 36 | return false; |
| 37 | } |
| 38 | if(preg_match($hostPattern, $host)){ |
| 39 | $resultIPs = wfUtils::resolveDomainName($host); |
| 40 | $addrsMatch = false; |
| 41 | foreach($resultIPs as $resultIP){ |
| 42 | if($resultIP == $IP){ |
| 43 | $addrsMatch = true; |
| 44 | break; |
| 45 | } |
| 46 | } |
| 47 | if($addrsMatch){ |
| 48 | $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'verified', $host, 'verified', $host); |
| 49 | return true; |
| 50 | } else { |
| 51 | $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'fwdFail', $host, 'fwdFail', $host); |
| 52 | return false; |
| 53 | } |
| 54 | } else { |
| 55 | $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'badPTR', $host, 'badPTR', $host); |
| 56 | return false; |
| 57 | } |
| 58 | } |
| 59 | public static function isGooglebot($userAgent = null){ |
| 60 | if ($userAgent === null) { |
| 61 | $userAgent = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : ''; |
| 62 | } |
| 63 | return (bool) preg_match('/Googlebot\/\d\.\d/', $userAgent); |
| 64 | } |
| 65 | public static function isGoogleCrawler($userAgent = null){ |
| 66 | if ($userAgent === null) { |
| 67 | $userAgent = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : ''; |
| 68 | } |
| 69 | foreach (self::$googPat as $pat) { |
| 70 | if (preg_match($pat . 'i', $userAgent)) { |
| 71 | return true; |
| 72 | } |
| 73 | } |
| 74 | return false; |
| 75 | } |
| 76 | private static $googPat = array( |
| 77 | '@^Mozilla/5\\.0 \\(.*Google Keyword Tool.*\\)$@', |
| 78 | '@^Mozilla/5\\.0 \\(.*Feedfetcher\\-Google.*\\)$@', |
| 79 | '@^Feedfetcher\\-Google\\-iGoogleGadgets.*$@', |
| 80 | '@^searchbot admin\\@google\\.com$@', |
| 81 | '@^Google\\-Site\\-Verification.*$@', |
| 82 | '@^Google OpenSocial agent.*$@', |
| 83 | '@^.*Googlebot\\-Mobile/2\\..*$@', |
| 84 | '@^AdsBot\\-Google\\-Mobile.*$@', |
| 85 | '@^google \\(.*Enterprise.*\\)$@', |
| 86 | '@^Mediapartners\\-Google.*$@', |
| 87 | '@^GoogleFriendConnect.*$@', |
| 88 | '@^googlebot\\-urlconsole$@', |
| 89 | '@^.*Google Web Preview.*$@', |
| 90 | '@^Feedfetcher\\-Google.*$@', |
| 91 | '@^AppEngine\\-Google.*$@', |
| 92 | '@^Googlebot\\-Video.*$@', |
| 93 | '@^Googlebot\\-Image.*$@', |
| 94 | '@^Google\\-Sitemaps.*$@', |
| 95 | '@^Googlebot/Test.*$@', |
| 96 | '@^Googlebot\\-News.*$@', |
| 97 | '@^.*Googlebot/2\\.1.*$@', |
| 98 | '@^AdsBot\\-Google.*$@', |
| 99 | '@^Google$@' |
| 100 | ); |
| 101 | |
| 102 | |
| 103 | /** |
| 104 | * Has correct user agent and PTR record points to .googlebot.com domain. |
| 105 | * |
| 106 | * @param string|null $ip |
| 107 | * @param string|null $ua |
| 108 | * @return bool |
| 109 | */ |
| 110 | public static function isVerifiedGoogleCrawler($ip = null, $ua = null) { |
| 111 | static $verified; |
| 112 | if (!isset($verified)) { |
| 113 | $verified = array(); |
| 114 | } |
| 115 | if ($ip === null) { |
| 116 | $ip = wfUtils::getIP(); |
| 117 | } |
| 118 | if (array_key_exists($ip, $verified)) { |
| 119 | return $verified[$ip]; |
| 120 | } |
| 121 | if (self::isGoogleCrawler($ua)) { |
| 122 | if (self::verifyCrawlerPTR(wordfence::getLog()->getGooglePattern(), $ip)) { |
| 123 | $verified[$ip] = true; |
| 124 | return $verified[$ip]; |
| 125 | } |
| 126 | $noc1Status = self::verifyGooglebotViaNOC1($ip); |
| 127 | if ($noc1Status == self::GOOGLE_BOT_VERIFIED) { |
| 128 | $verified[$ip] = true; |
| 129 | return $verified[$ip]; |
| 130 | } |
| 131 | else if ($noc1Status == self::GOOGLE_BOT_FAKE) { |
| 132 | $verified[$ip] = false; |
| 133 | return $verified[$ip]; |
| 134 | } |
| 135 | |
| 136 | return true; //We were unable to successfully validate Googlebot status so default to being permissive |
| 137 | } |
| 138 | $verified[$ip] = false; |
| 139 | return $verified[$ip]; |
| 140 | } |
| 141 | |
| 142 | /** |
| 143 | * Attempts to verify whether an IP claiming to be Googlebot is actually Googlebot. |
| 144 | * |
| 145 | * @param string|null $ip |
| 146 | * @return string |
| 147 | */ |
| 148 | public static function verifyGooglebotViaNOC1($ip = null) { |
| 149 | $table = wfDB::networkTable('wfCrawlers'); |
| 150 | if ($ip === null) { |
| 151 | $ip = wfUtils::getIP(); |
| 152 | } |
| 153 | $db = new wfDB(); |
| 154 | $IPn = wfUtils::inet_pton($ip); |
| 155 | $patternSig = 'googlenoc1'; |
| 156 | $status = $db->querySingle("select status from $table |
| 157 | where IP=%s |
| 158 | and patternSig=UNHEX(MD5('%s')) |
| 159 | and lastUpdate > unix_timestamp() - %d", |
| 160 | $IPn, |
| 161 | $patternSig, |
| 162 | WORDFENCE_CRAWLER_VERIFY_CACHE_TIME); |
| 163 | if ($status === 'verified') { |
| 164 | return self::GOOGLE_BOT_VERIFIED; |
| 165 | } else if ($status === 'fakeBot') { |
| 166 | return self::GOOGLE_BOT_FAKE; |
| 167 | } |
| 168 | |
| 169 | $api = new wfAPI(wfConfig::get('apiKey'), wfUtils::getWPVersion()); |
| 170 | try { |
| 171 | $data = $api->call('verify_googlebot', array( |
| 172 | 'ip' => $ip, |
| 173 | )); |
| 174 | if (is_array($data) && !empty($data['verified'])) { |
| 175 | // Cache results |
| 176 | $db->queryWrite("INSERT INTO {$table} (IP, patternSig, status, lastUpdate) VALUES ('%s', UNHEX(MD5('%s')), '%s', unix_timestamp()) ON DUPLICATE KEY UPDATE status = VALUES(status), lastUpdate = VALUES(lastUpdate)", $IPn, $patternSig, 'verified'); |
| 177 | return self::GOOGLE_BOT_VERIFIED; |
| 178 | } else { |
| 179 | $db->queryWrite("INSERT INTO {$table} (IP, patternSig, status, lastUpdate) VALUES ('%s', UNHEX(MD5('%s')), '%s', unix_timestamp()) ON DUPLICATE KEY UPDATE status = VALUES(status), lastUpdate = VALUES(lastUpdate)", $IPn, $patternSig, 'fakeBot'); |
| 180 | self::GOOGLE_BOT_FAKE; |
| 181 | } |
| 182 | } catch (Exception $e) { |
| 183 | // Do nothing, bail |
| 184 | } |
| 185 | return self::GOOGLE_BOT_UNDETERMINED; |
| 186 | } |
| 187 | } |
| 188 |