采集源码
include ‘./simple_html_dom.php’;
$url = ‘http://juanpi.uz.taobao.com/’;
$sdom = new simple_html_dom($url);$ul= $sdom->find(‘ul[class=main-goods-list]’,0);
$lis = $ul->find(‘li’);
$conts = array();
foreach ($lis as $k=> $li) {$conts[$k][‘url’] = $li->find(‘a’,0)->href;
$conts[$k][‘pic_url’] = $li->find(‘img’,0)->src;
$conts[$k][‘pic_urls’] = $li->find(‘img’,0)->getAttribute(‘data-ks-lazyload’);
$conts[$k][‘title’] = $li->find(‘a[class=title]’,0)->plaintext;
$conts[$k][‘conprice’] = $li->find(‘span[class=price-current]’,0)->plaintext;
$conts[$k][‘price’] = $li->find(‘span[class=price-old]’,0)->plaintext;
$conts[$k][‘stime’] = $li->find(‘span[class=state-time]’,0)->plaintext;
$conts[$k][‘shop_type’]= $li->find(‘div[class=btn-new] a strong’,0)->plaintext;
if($conts[$k][‘pic_urls’]){
$conts[$k][‘pic_url’] = $conts[$k][‘pic_urls’];
unset($conts[$k][‘pic_urls’]);
}
}// 找下一页
$pgnext = $sdom->find(‘a[class=pg-next]’,0)->href;print_r($pgnext);
print_r($conts);
?>
未经允许不得转载:开心乐窝-乐在其中 » simple_html_dom 采集实战 (对卷皮U站商品进行采集)