'; $myfile = fopen('D:\歌词.txt.txt','r'); echo '1'; $info = []; $num = 0; $number = 0; while($line = fgets($myfile)){ //获取用户名 $net_name_index = strpos($line,'用户名:'); $net_name_end = strpos($line,'email:',$net_name_index); $net_name = trim(substr($line,$net_name_index+strlen('用户名:'),$net_name_end-($net_name_index+strlen('用户名:')))); //获取email $email_index = strpos($line,'email:',$net_name_end); $email_end = strpos($line,'真名:',$email_index); $email = trim(substr($line,$email_index+strlen('email:'),$email_end-($email_index+strlen('email:')))); //获取真名 $name_index = strpos($line,'真名:',$email_end); $name_end = strpos($line,'身份证号:',$name_index); $name = trim(substr($line,$name_index+strlen('真名:'),$name_end-($name_index+strlen('真名:')))); //获取身份证号 $idCard_index = strpos($line,'身份证号:',$name_end); $idCard_end = strpos($line,'绑定手机号',$idCard_index); $idCard = trim(substr($line,$idCard_index+strlen('身份证号:'),$idCard_end-($idCard_index+strlen('身份证号:')))); if(strlen($idCard)!=18){ continue; } $number = $number+1; //获取手机号 $phone_number_index = strpos($line,'绑定手机号',$idCard_end); $phone_number_end = strpos($line,'账户可',$phone_number_index); $phone_number = trim(substr($line,$phone_number_index+strlen('绑定手机号'),$phone_number_end-($phone_number_index+strlen('绑定手机号')))); //获取银行卡号 $bankCard_index = strpos($line,'行卡号:',$phone_number_end); $bankCard_end = strpos($line,'银行:',$bankCard_index); $bankCard = trim(substr($line,$bankCard_index+strlen('行卡号:'),$bankCard_end-($bankCard_index+strlen('行卡号:')))); //这么多重复代码。我甚至可以写个类 //抓取身份证号信息集 $idCrad_url = 'http://qq.ip138.com/idsearch/index.asp?action=idcard&userid='.$idCard; $idCrad_curl = curl($idCrad_url,'gb2312'); $idCard_result = getIDinfo($idCrad_curl); $idnex = $num++; if(strlen($bankCard)>15&&strlen($bankCard)<20){ $bankCard_url = 'http://www.cardcn.com/search.php?word='.$bankCard; $bankCard_curl = curl($bankCard_url); if(substr_count($bankCard_curl,'对不起')==0){ $bankCard_result = getBankinfo($bankCard_curl); $info[$idnex]['bankCard_info'] = $bankCard_result; } } $info[$idnex]['net_name'] = $net_name; $info[$idnex]['email'] = $email; $info[$idnex]['name'] = $name; $info[$idnex]['idCard'] = $idCard; $info[$idnex]['phone_number'] = $phone_number; $info[$idnex]['bankCard'] = $bankCard; $info[$idnex]['idCrad_info'] = $idCard_result; } cl_slqi($info); echo $number; }//$url :html链接//return :解析后的html文档(字符串)//获取CURL请求的输出信息,这个可以爬取https,非常好function curl($url,$coding='utf-8') { //初始化 $ch = curl_init(); //设置选项,包括url curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0);//不返回response头部信息 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //TRUE 将curl_exec()获取的信息以字符串返回,而不是直接输出。 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //支持重定向 //不验证证书和host curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); $result = curl_exec($ch); //释放curl句柄 curl_close($ch); //如果网站不是utf-8编码的话要转码 if($coding!='utf-8'){ $result= iconv($coding,"utf-8//IGNORE",$result); } return $result; }//处理并返回身份证信息function getIDinfo($crul){ $sex_index = strpos($crul,'别:'); $date_index = strpos($crul,'生日期:',$sex_index); $idcard_place_index = strpos($crul,';地:',$date_index); $idcard_place_end = strpos($crul,''),4)); $id_info['sex'] = trim(substr($crul,$sex_index+strlen('别:'),3)); $id_info['idCard_space'] = trim(substr($crul,$idcard_place_index+strlen(';地:'),$idcard_place_end-($idcard_place_index+strlen(';地:')))); return $id_info;}//处理并返回银行卡信息function getBankinfo($bank_crul){ $bank_info = []; //银行卡归属地 $back_space_index = strpos($bank_crul,'e">归属信息:'); $back_space_end = strpos($bank_crul,'',$back_space_index); $bank_info['back_space'] = trim(substr($bank_crul,$back_space_index+strlen('e">归属信息:'),$back_space_end-($back_space_index+strlen('e">归属信息:')))); //银行名称 $bank_name_index = strpos($bank_crul,'e">银行名称:',$back_space_end); $bank_name_end = strpos($bank_crul,'',$bank_name_index); $bank_info['bank_name'] = trim(substr($bank_crul,$bank_name_index+strlen('e">银行名称:'),$bank_name_end-($bank_name_index+strlen('e">银行名称:')))); //银行卡名称 $bankCard_name_index = strpos($bank_crul,'e">银行卡名:',$bank_name_end); $bankCard_name_end = strpos($bank_crul,'',$bankCard_name_index); $bank_info['bankCard_name'] = trim(substr($bank_crul,$bankCard_name_index+strlen('e">银行卡名:'),$bankCard_name_end-($bankCard_name_index+strlen('e">银行卡名:')))); //银行卡种类 $bank_info['bank_kind'] = getKeyWord($bank_crul,'