在抓取需要用户登录的数据时,常使用到curl函数,本文给大家分享利用 php curl() 函数实现登录并抓取数据,curl在抓取网页的效率方面是比较高的,而且支持多线程。
平时使用 file_get_contents() 函数比较快捷,但效率就要稍低些,另外,使用curl函数时需要开启curl扩展。
<?php //error_reporting(0); $cookieVerify = dirname(__FILE__)."/verify.tmp"; $cookieSuccess = dirname(__FILE__)."/1769.tmp"; if(!$_POST){ // 获取cookie并保存 $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.idc75.com/manage/userlogin.html"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieVerify); $rs = curl_exec($ch); curl_close($ch); // 带上cookie抓取验证码,必须带上cookie,否则验证码不对应 $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.idc75.com/include/getcode.php"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieVerify); $rs = curl_exec($ch); // 把验证码在本地生成,二次拉取验证码可能无法通过验证 @file_put_contents("verify.jpg",$rs); curl_close($ch); // 手工验证码表单 echo "<form action=\"\" method=\"post\"><input type=\"text\" name=\"vcode\"><img src=\"verify.jpg\" /><br><input type=\"submit\" value=\"ok\"></form>"; }else{ // 登录 $ch = curl_init(); // 用户名\密码 $user = "abc123"; $pass = "123456"; $verify = $_POST["vcode"]; $url = "http://www.idc75.com/userlogin.php?action=login"; // 返回结果存放在变量中,不输出 curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieVerify); curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120); curl_setopt($ch, CURLOPT_POST, true); $fields_post = array("username"=> $user, "userpwd"=> $pass, "logintype"=>1,"vcode"=>$verify); $headers_login = array("User-Agent" => "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"); $fields_string = ""; foreach($fields_post as $key => $value){ $fields_string .= $key . "=" . $value . "&"; } $fields_string = rtrim($fields_string , "&"); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers_login); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieSuccess); curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); $result= curl_exec($ch); print_r($result); curl_close($ch); // 登录成功,查看1769.tmp cookie文件有相应用户名等信息 } ?>