use LWP::UserAgent;
use HTML::TreeBuilder;
use LWP::Simple;
use URI;
use Encode;
@list_url=();
@download_url=();
foreach (1..16)#在新浪微盤里面搜索perl會(huì)有16個(gè)頁(yè)面的結(jié)果
{
my $url = URI->new('http://vdisk.weibo.com/search/');
my($keyword,$sortby,$page) = ("perl","default",$_);#對(duì)perl這個(gè)關(guān)鍵詞做測(cè)試
$url->query_form
(
# All form pairs:
'keyword' => $keyword,
'sortby' => $sortby,
'page' => $page,
);
push @list_url,$url;
}
my $ua = LWP::UserAgent->new;
#open fh,">aa.txt";
foreach (@list_url)#對(duì)我們自己合成的目標(biāo)url做循環(huán)爬取適合的鏈接
{
my $response = $ua->get($_);
$html=$response->content;
my $tree = HTML::TreeBuilder->new; # empty tree
$tree->parse($html) or print "error : parse html ";
@pdf_name=$tree->find_by_attribute("class","sort_name_intro") or print "error : cannot find pdf_name ";
foreach (@pdf_name)
{
$node=$_->look_down(_tag=>'a');
$a=$node->attr('href');
$b=encode("cp936", decode("utf-8",$node->attr('title')));
$c="$a\t$b";
push @download_url,$c;#把目標(biāo)鏈接的url及文件名添加到下載列表
}
}
foreach (@download_url)
{
@tmp=split;
$html=get($tmp[0]);
$html=~/fileDown\.init.*?\"url\":\"(.*?)\",/;#這個(gè)是關(guān)鍵,我找了半天才找到該頁(yè)面的真實(shí)url地址
$a=$1;
$a=~s/\\//g;
print $a;
getstore("$a","$tmp[1]");
}
|