以前写的一段正则抓取安居客经纪人信息的表达式,但是不确定现在还有么有用了!
$url="http://shanghai.anjuke.com/brokerinfo.php?bid={$i}";
$content= file_get_contents($url);
//preg_match_all ('/
- (.*)[^<]/i', $content, $matches);
//preg_match_all('/]*>[\s]*([A-Z\s]*)?[\s]*<\/div>/', $content, $name);
//preg_match_all ('/
- 姓 名:]*>([^\x00-\x80])?[^<]*/', $content, $name);
preg_match_all ('/
- 姓 名:]*)>([^\x00-\x80])?[^<]*/', $content, $name); preg_match_all ('/
- 注册时间:([0-9]{4}-[0-9]{1,2}-[0-9]{1,2})?[^<]*/', $content, $date); preg_match_all ('/
- 手机号码:(\d{11})?[^<]*/', $content, $cell_phone); preg_match_all ('/
- 服务区域:([^\x00-\x80])?[^<]*/', $content, $area); preg_match_all ('/
- 上次登录:([^\x00-\x80])?[^<]*/', $content, $last_logn); preg_match_all ('/
- 电子邮件:(^[_.0-9a-z-]+@[0-9a-z][0-9a-z-]+.[a-z]{2,3}$)?[^<]*/', $content, $email); $username = str_replace("
- 姓 名:",'', str_replace ($name[1][0],'', $name[0][0])); $email = str_replace ("
- 电子邮件:",'',$email[0][0]); $phone = $cell_phone[1][0]; $date = $date[1][0]; $area = str_replace ("
- 服务区域:",'',$area[0][0]); $last_logn = str_replace ("
- 上次登录:",'',$last_logn[0][0]); $user_info = array(); $user_info[$i] ="\"" .$i. "\",\"". $username ."\",\"". $phone ."\",\"". $email ."\",\"". $date ."\",\"". $area ."\",\"". $last_logn ."\"\n"; if ($username != ''){ if (fwrite($fp, $user_info[$i])) { echo "成功地将 ". $user_info[$i] ." 写入到文件 " .$filename."
"; } } } - 姓 名:]*)>([^\x00-\x80])?[^<]*/', $content, $name); preg_match_all ('/