useSession = false; $this->setCookieFile("./cookie_".microtime(true)."_".rand(0,999999).".txt"); $this->userAgent = ""; // TODO: Add a representative userAgent } //// // Sets the cookie file. // // WARNING: Use this mutator instead of directly changing the file-name // since this function will take care of cross-platform issues. //// function setCookieFile($fileName){ $this->cookieFile = $fileName; // Make the filename work with windows. if(substr(PHP_OS, 0, 3) == 'WIN'){ $this->cookieFile = str_replace('\\','/', getcwd().'/'.$this->cookieFile); } } //// // If useThem is true, sessions will be used. If a file-name is passed in, that // will be used as the cookie-file (instead of the default). //// function useSession($useThem, $fileName=""){ $this->useSession = $useThem; if($fileName != ""){ $this->setCookieFile($fileName); } } //// // Does an HTTP GET on the url passed in and returns the source-code of the page. //// function getPage($url){ $retVal = ""; $ch = curl_init(); if($this->useSession){ curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookieFile); //initiates cookie file if needed curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookieFile); // Uses cookies from previous session if exist } curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); if(startsWith($url, "https")){ curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); } curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);// allow redirects curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); if($this->userAgent != ""){ curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent); } $retVal = curl_exec($ch); // Make the strange behavior where curl_exec returns boolean true on an empty document into something more like what we'd expect. if($retVal === true){ $retVal = ""; } else if($retVal === false){ $errorStr = curl_error($ch); dispError($errorStr); } curl_close($ch); return $retVal; } //// // Posts the data to the given url and returns the code of the resulting page. //// function postPage($url, $postData=array()){ $retVal = ""; // Turn the array of data into a string for cURL. $postFields = ""; foreach($postData as $pKey=>$pVal){ $postFields .= (($postFields=="")?"":"&"); // separate key/val pairs from each other with "&" $postFields .= "$pKey=".urlencode($pVal); } $ch = curl_init(); if($this->useSession){ curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookieFile); //initiates cookie file if needed curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookieFile); // Uses cookies from previous session if exist } curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); if(startsWith($url, "https://")){ curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); } curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);// allow redirects curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // return into a variable curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields); if($this->userAgent != ""){ curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent); } $retVal = curl_exec($ch); curl_close($ch); return $retVal; } // end postPage() //// // Since not all sites are expected to co-operate, this will let us fake them out. //// function setUserAgent($userAgentString){ $this->userAgent = $userAgentString; } // end setUserAgent() } // end class Scraper ?>