注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

Bioinformatics home

 
 
 

日志

 
 

一个简单的多socket http下载原型  

2009-11-13 00:40:07|  分类: Perl |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |

一个简单的多socket http下载原型
基于perl,使用IO::Select实现,并非多线程。可指定分几部分下载。
基本上没有作异常处理,没有处理redirect,甚至也没有判断对range头的响应是否为206.

还好的是它还可以工作,比wget快几倍地下载,挺好玩的.

perl module:

package HttpClient;

use strict;
use warnings;

use IO::Socket::INET;
use Data::Dumper;

my $crlf = “\r\n”;

my $buf_size = 8 * 1024;

sub new {
my $class = shift;
my %cnf = (@_);
my $self = {
state => ‘init’,
url => $cnf{url},
‘total_parts’ => $cnf{’total_parts’},
part => $cnf{part},
‘content_length’ => $cnf{’content_length’},
};
my $url = $self->{url};
my $host = $1 if $url =~ m{://([^/]*)};
my $file = $1 if $url =~ m{/([^/]*)$};
if ( defined $self->{part} ) {
$file .= “.part” . $self->{part};
}
$self->{host} = $host;
$self->{file} = $file;
my $port = 80;
$port = $1 if $host =~ /:(\d+)/;
my $sock = IO::Socket::INET->new(
PeerAddr => $host,
PeerPort => $port,
Proto => ‘tcp’,
Blocking => 0,
)
or die “can’t connect to server:$!\n”;
select($sock);
$| = 1;
select(STDOUT);
$self->{sock} = $sock;
bless $self, $class;
return $self;
}

sub sock {
return shift->{sock};
}

sub get_request_header {
my $self = shift;
return $self->{request} if defined $self->{request};
my $request =
“GET $self->{url} HTTP/1.1$crlf”
. “Host: $self->{host}$crlf”
. “Connection: close$crlf”;
if ( defined $self->{’total_parts’}
and defined $self->{part}
and defined $self->{’content_length’} )
{
my $length = $self->{’content_length’};
my $total_parts = $self->{’total_parts’};
my $part = $self->{part};
my $part_size = int( $length / $total_parts );
my $start_pos = $part_size * $part;
my $recved = 0;
if (-e $self->{file}) {
$recved = -s $self->{file};
$start_pos+=$recved;
}

my $recv_size =
( $part == $total_parts – 1 ) ? $length-$part*$part_size : $part_size;
$self->{start_pos} = $start_pos;
$self->{recv_size} = $recv_size-$recved;
print “part $self->{part} recv_size=$self->{recv_size},start_pos=$start_pos,recved=$recved,parts=$total_parts,length=$length\n”;
$request .=
“Range: bytes=$start_pos-” . ( $start_pos + $recv_size-1 ) . $crlf;
}
$request .= $crlf;
$self->{request} = $request;
return $request;
}

sub parse_header {
my ($self) = @_;
my $data = $self->{data};
return 1 if $self->{state} =~ /body/;
return 0 unless defined $data;
return 0 unless $data =~ m{^(.*?)(\r\n\r\n|\n\n)}s;
my $header_content = $1;
my $header_end = $2;
print $header_content, “\n”;
my @headers = split /\r?\n/, $header_content;
die “invalid header\n” unless scalar(@headers) > 0;
my $status_line = shift @headers;
$self->{status_line} = $status_line;
$self->{code} = $2 if $status_line =~ m{HTTP/1(.1)? (\d+)};
my $last_header;
my $header = {};

foreach my $line (@headers) {
if ( $line =~ /^\s+(.*)$/ ) {
$header->{$last_header} .= ” $1″;
}
elsif ( $line =~ /^([^:]+): (.*)$/ ) {
$last_header = $1;
my $value = $2;
$header->{$last_header} = $value;
}
else {
print “invalid header:$line\n”;
}
}
$self->{header} = $header;
$self->{’content_length’} = $header->{’Content-Length’}
unless defined $self->{’content_length’};
$self->{recv_size} = $self->{’content_length’};
$self->{data} = substr($data,length($header_content)+length($header_end));
$self->{state} = ‘body’;
return 1;
}

sub recv_data {
my ( $self, $data ) = @_;
if ( defined $self->{data} ) {
$self->{data} .= $data;
}
else {
$self->{data} = $data;
}
}

sub save_data {
my ( $self, $read_select ) = @_;
my $fh = $self->{fh};
if ( !defined $fh ) {
open $fh, “>$self->{file}” or die “can’t open file $self->{file} :$!\n”;
binmode $fh,”:bytes”;
$self->{fh} = $fh;
}
my $write_len = $self->{write_len} || 0;
my $recv_size = $self->{recv_size};
my $data = $self->{data};
my $max_len = length($data);
return unless $max_len > 0;
if ($max_len+$write_len > $recv_size) {
$max_len = $recv_size – $write_len ;
my $part = $self->{part} || 0;
print “part=$part,max_len=$max_len,write_len=$write_len\n”;
}

if ( $max_len == 0 ) {
$self->{done} = 1;
close $self->{fh};
$read_select->remove( $self->sock );
close $self->{sock};
print “$self->{file} recved $write_len bytes\n”;
$self->{parent}->child_done($self) if $self->{parent};
return;
}
my $len = syswrite( $fh, $data, $max_len )
or die “write data failed :$!\n”;
$self->{data} = substr( $data, $len );
$write_len += $len;
$self->{write_len} = $write_len;
}

sub child_done {
my ( $self, $child ) = @_;
$child->{done} = 1;
return unless $self->{done};
foreach my $c ( @{ $self->{children} } ) {
return unless $c->{done};
}

print “merge file\n”;
open FH, “>>$self->{file}”;
print “first part size:”,-s $self->{file},”\n”;
seek( FH, 0, 2 );
foreach my $c ( @{ $self->{children} } ) {
print “$c->{file} size:”,-s $c->{file},”\n”;
open CFH, “<$c->{file}”;

print “merge $c->{file}\n”;
my $buf;
for ( ; ; ) {
my $len = sysread( CFH, $buf, $buf_size );
last if !defined $len || $len == 0;
syswrite( FH, $buf, $len );
}
close CFH;
unlink $c->{file};
}
close FH;
}

sub handle_read {
my ( $self, $sock, $read_select, $write_select, $sock_client ) = @_;

my $data;
my $len = sysread( $sock, $data, $buf_size );
if ( $len == 0 ) {
print “sock $sock finished\n”;
$read_select->remove($sock);
close $sock;
print “$self->{file} size=”,-s $self->{file},”\n”;
$self->{parent}->child_done($self) if $self->{parent};
return;
}
$self->recv_data($data);
if ( $self->{state} !~ /body/ and $self->parse_header ) {
if ( !defined $self->{parent} and defined $self->{content_length} ) {
my $parts = $self->{total_parts} || 5;
$self->{children} = [];
my $length = $self->{’content_length’};
my $part_size = int( $length / $parts );
$self->{recv_size} = $part_size;
print “parent recv_size=$self->{recv_size}\n”;
foreach my $part ( 1 .. $parts – 1 ) {
my $child = HttpClient->new(
url => $self->{url},
‘total_parts’ => $parts,
part => $part,
‘content_length’ => $self->{content_length},
);
$sock_client->{ $child->sock } = $child;
$child->{parent} = $self;
push @{ $self->{children} }, $child;
$read_select->add( $child->sock );
$write_select->add( $child->sock );
}
}
}
else {
$self->save_data($read_select);
}
}

sub handle_write {
my ( $self, $sock, $read_select, $write_select, $sock_client ) = @_;

my $offset = 0;
$offset = $self->{request_offset} if defined $self->{request_offset};
my $request = $self->get_request_header;
if ( $offset == 0 ) {
print “try to send request\n”;
print $request;
}
print “offset=$offset\n”;
my $len = syswrite( $sock, $request, length($request) – $offset, $offset );
if ( !defined $len ) {
print STDERR “write failed:$!\n”;
$read_select->remove($sock);
$write_select->remove($sock);
}
else {
$offset += $len;
$self->{request_offset} = $offset;
if ( $offset == length($request) ) {
$write_select->remove($sock);
}
}
}

sub start {
my ($self) = @_;
use IO::Select;
my $r = IO::Select->new;
$r->add( $self->sock );

my $w = IO::Select->new;
$w->add( $self->sock );

my $sock_client = { $self->sock => $self };

use Time::HiRes qw(time);
my $start_time = time;
for ( ; ; ) {
last if ( $r->count == 0 );
my ( $rout, $wout, $eout ) = IO::Select->select( $r, $w, $r );
last unless defined $rout;

foreach my $sock ( @{$wout} ) {
my $c = $sock_client->{$sock};
if ( !defined $c ) {
die “oops,can’t find httpclient for $sock\n”;
}
$c->handle_write( $sock, $r, $w, $sock_client );
}
foreach my $sock ( @{$rout} ) {
my $c = $sock_client->{$sock};
if ( !defined $c ) {
die “oops,can’t find httpclient for $sock\n”;
}
$c->handle_read( $sock, $r, $w, $sock_client );
}
}
my $end_time = time;
my $used_time = $end_time – $start_time;
my $speed = $self->{content_length} / $used_time;
print “Done,spend $used_time seconds,speed:$speed bytes/seconds\n”;
}

1;


test perl script:


#!/usr/bin/perl
use strict;
use warnings;

use lib ‘.’;

use HttpClient;
use Getopt::Long;
$| = 1;
#my $url = ‘http://eclipse.cdpa.nsysu.edu.tw/downloads/drops/R-3.2.1-200609210945/eclipse-SDK-3.2.1-linux-gtk.tar.gz’;

my $url = ”;
my $total_parts = 1;
my $result = GetOptions (”url|u=s” => \$url,
                        “parts|p=i”   => \$total_parts,
                        );
unless ($result and $url=~m{://}) {
    print <<HELP
usage: perl http.pl –url=url [--parts=parts]
HELP
;
    exit;
}

my $client = HttpClient->new(url=>$url,’total_parts’=>$total_parts);
$client->start();
 

  评论这张
 
阅读(970)| 评论(0)
推荐 转载

历史上的今天

在LOFTER的更多文章

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017