### ### Web 上のコンテンツを読む perl スクリプト ### A perl script to retrieve contents on the internet via http. ### (C) 2001-2004 笠井 崇文 (web@kasai.fm) 最終更新日: 2004年 3月 5日 ### (C) 2001-2004 Takafumi Kasai Last modified: 5 May, 2004 ### 本スクリプトは、自由に転載・引用・配布・使用・改変していただいて結構です。 ### 但し、その結果何らかの損害を被った場合も、責任を負いかねます。 ### あらかじめご了承ください。 ### と言いつつ虫の好い話ですが、バグや無駄な部分等を発見されましたら、上記 ### アドレスまでご一報頂ければ幸いです。 ### You can quote, distribute, use, and alter this script without ### any permission. But I never bear responsibility for any possible ### damage by this script. However, I hope to receive reports of ### bugs or wasteful part in this script from you. # ============================================================== # e 要件 / Requirements # # ・perl 5 以降 / perl 5 or later # ・以下の perl モジュール / perl modules follows: # Socket # FileHandle # MIME::Base64 # (ActivePerl 等に付属のものや、CPAN からダウンロードしたものを別途ご用意下さい) # (You can get them from ActiveStates or CPAN) # e 使用方法 / Usage # # (1) このスクリプト(getHTTP)を、@INC にパスの通った場所に置く。 # (2) 呼び出し元のプログラムで、 # require('getHTTP'); # と書く。 # (3) # ($http_response, $errorMessage) = &getHTTP('http://www.kasai.fm/'); # のように、Web 上のコンテンツを読みこめる。 # $http_response に、読み込んだコンテンツが入る。 # もしエラーが起ると、$errorMessage にエラー理由が入る。 # (1) Put this script where @INC can reach. # (2) In the caller script, put this code: # require('getHTTP'); # (3) # ($http_response, $errorMessage) = &getHTTP('http://www.kasai.fm/'); # will retrieve contents on the internet. # e 認証の必要なページを見る場合 / In case authorization required # ($http_response, $errorMessage) = &getHTTP('URL'=>'http://www.kasai.fm/', # 'UserID'=>'Mihata', 'Password'=>'Tatenashi'); # e プロクシを使う場合 / In case you use a proxy # ($http_response, $errorMessage) = &getHTTP('URL'=>'http://www.kasai.fm/', 'Proxy'=>'someproxy.com:8080'); # e 認証の必要なプロクシを使う場合 / In case you use a proxy which requires authorization # ($http_response, $errorMessage) = &getHTTP('URL'=>'http://www.kasai.fm/', 'Proxy'=>'someproxy.com:8080', # 'ProxyUserID'=>'Alladin', 'ProxyPassword'=>'open_sesami'); # ============================================================== use Socket; use FileHandle; #use MIME::Base64; require('BASE64'); sub getHTTP{ local $\ = ""; my %arg = ( @_ ); my $data; my ( $uri, $proxy_address, $proxy_port, $http_version, $user_id, $password, $proxy_user_id, $proxy_password, $referrer, $referer, $agent ) = ( $arg{URL} || $_[0], $arg{Proxy} || "", $arg{ProxyPort} || "8080", $arg{HTTP} || "1.1", $arg{UserID} || "", $arg{Password} || "", $arg{ProxyUserID} || "", $arg{ProxyPassword} || "", $arg{Referrer} || "", $arg{Referer} || "", $arg{UserAgent} || "HTTP client version 1.02 (www.kasai.fm)" ); my ( $scheme, $domain, $server_address, $server_port, $path ); my ( $target_address, $target_port, $target_path, $target_ip ); ######### URI parsing ### retrieve scheme ( "http://" ) if ( $uri =~ s!^(\w+?)://!! ){ $scheme = $1; return ("", "Can't handle '$scheme'. Only 'http' is possible" ) if ( $scheme !~ /^http$/i ); } else{ $scheme = 'http'; } ### retrieve domain, port and path ( "hogehoge:8080/foo/bar.html" ) ( $domain, $path ) = split( /\//, $uri, 2 ); ( $server_address, $server_port ) = split( /:/, $domain, 2 ); $server_address ||= "localhost"; $server_port ||= getservbyname( $scheme, "tcp" ); ### complete info about your proxy server if ( $proxy_address and !$proxy_port ){ ( $proxy_port ) = $proxy_address =~ /:(\d+)/; return ("", "Proxy port is undefined.". "Specify 'ProxyPort'=>8080 or 'Proxy'=>'${proxy_address}:8080'" ) unless ( $proxy_port ); } ### switch arguments according to if you use a proxy server ( $target_address, $target_port, $target_path ) = $proxy_address ? ( $proxy_address, $proxy_port, "${scheme}://${server_address}:${server_port}/${path}" ) : ( $server_address, $server_port, "/$path" ); ######### SOCKET (create) $target_ip = inet_aton( $target_address ) || return ("", "Can't connect to $target_address" ); $sock_address = pack_sockaddr_in( $target_port, $target_ip ); socket(SOCKET, PF_INET, SOCK_STREAM, 0) || return ("", "Can't create socket on $target_address"); ######### SOCKET (connect) connect(SOCKET, $sock_address) or return ("", "Can't connect socket on $sock_address"); autoflush SOCKET (1); ######### Send HTTP GET request if ( $http_version eq "1.1" ) { print SOCKET "GET $target_path HTTP/1.1\n"; print SOCKET "Host: $target_address\n"; print SOCKET "Connection: close\n"; } else { print SOCKET "GET $target_path HTTP/1.0\n"; } if ( $user_id ){ print SOCKET "Authorization: Basic\n "; print SOCKET &base64'b64encode("${user_id}:${password}")."\n"; } if ( $proxy_user_id ){ print SOCKET "Proxy-Authorization: Basic\n "; print SOCKET &base64'b64encode("${proxy_user_id}:${proxy_password}")."\n"; } print SOCKET "User-Agent: $agent\n" if ( $agent ); print SOCKET "Referrer: $referrer\n" if ( $referrer ); print SOCKET "Referer: $referer\n" if ( $referer ); print SOCKET "Accept: text/html; */*\n"; print SOCKET "\n"; ######### Receive HTTP response via SOCKET while ( ) { chomp; $data .= "$_\n"; } ######### SOCKET (close); take down the session close(SOCKET); ######### return return ($data, ""); } 1;