###
### Web 上のコンテンツを読む perl スクリプト
### A perl script to retrieve contents on the internet via http.

### (C) 2001-2004 笠井 崇文 (web@kasai.fm) 最終更新日: 2004年 3月 5日
### (C) 2001-2004 Takafumi Kasai           Last modified: 5 May, 2004

### 本スクリプトは、自由に転載・引用・配布・使用・改変していただいて結構です。
### 但し、その結果何らかの損害を被った場合も、責任を負いかねます。
### あらかじめご了承ください。

### と言いつつ虫の好い話ですが、バグや無駄な部分等を発見されましたら、上記
### アドレスまでご一報頂ければ幸いです。

### You can quote, distribute, use, and alter this script without 
###  any permission. But I never bear responsibility for any possible
###  damage by this script.  However, I hope to receive reports of
###  bugs or wasteful part in this script from you.

# ==============================================================

#  要件 / Requirements
#
# ・perl 5 以降 / perl 5 or later
# ・以下の perl モジュール / perl modules follows:
#      Socket
#      FileHandle
#      MIME::Base64
# (ActivePerl 等に付属のものや、CPAN からダウンロードしたものを別途ご用意下さい)
# (You can get them from ActiveStates or CPAN)

#  使用方法 / Usage
#
# (1) このスクリプト(getHTTP)を、@INC にパスの通った場所に置く。
# (2) 呼び出し元のプログラムで、
#       require('getHTTP');
#     と書く。
# (3) 
#       ($http_response, $errorMessage) = &getHTTP('http://www.kasai.fm/');
#     のように、Web 上のコンテンツを読みこめる。

#     $http_response に、読み込んだコンテンツが入る。
#     もしエラーが起ると、$errorMessage にエラー理由が入る。

# (1) Put this script where @INC can reach.
# (2) In the caller script, put this code:
#     require('getHTTP');
# (3) 
#     ($http_response, $errorMessage) = &getHTTP('http://www.kasai.fm/');
#    will retrieve contents on the internet.

#  認証の必要なページを見る場合 / In case authorization required

# ($http_response, $errorMessage) = &getHTTP('URL'=>'http://www.kasai.fm/',
#  'UserID'=>'Mihata', 'Password'=>'Tatenashi');

#  プロクシを使う場合 / In case you use a proxy 

# ($http_response, $errorMessage) = &getHTTP('URL'=>'http://www.kasai.fm/', 'Proxy'=>'someproxy.com:8080');

#  認証の必要なプロクシを使う場合 / In case you use a proxy which requires authorization

# ($http_response, $errorMessage) = &getHTTP('URL'=>'http://www.kasai.fm/', 'Proxy'=>'someproxy.com:8080',
#  'ProxyUserID'=>'Alladin', 'ProxyPassword'=>'open_sesami');

# ==============================================================

use Socket;
use FileHandle;
#use MIME::Base64;
require('BASE64');

sub getHTTP{
	local $\ = "";

	my %arg = ( @_ );
	my $data;
	my ( $uri, $proxy_address, $proxy_port, $http_version, $user_id, $password,
		$proxy_user_id, $proxy_password, $referrer, $referer, $agent ) =
	(
		$arg{URL}           || $_[0],
		$arg{Proxy}         || "",
		$arg{ProxyPort}     || "8080",
		$arg{HTTP}          || "1.1",
		$arg{UserID}        || "",
		$arg{Password}      || "",
		$arg{ProxyUserID}   || "",
		$arg{ProxyPassword} || "",
		$arg{Referrer}      || "",
		$arg{Referer}       || "",
		$arg{UserAgent}     || "HTTP client version 1.02 (www.kasai.fm)"
	);

	my ( $scheme, $domain, $server_address, $server_port, $path );
	my ( $target_address, $target_port, $target_path, $target_ip );


######### URI parsing

	### retrieve scheme ( "http://" )
	
	if ( $uri =~ s!^(\w+?)://!! ){
		$scheme = $1;
		
		return ("",
			"Can't handle '$scheme'. Only 'http' is possible"
		) if ( $scheme !~ /^http$/i );
	}
	else{
		$scheme = 'http';
	}


	### retrieve domain, port and path ( "hogehoge:8080/foo/bar.html" )
	
	( $domain, $path ) = split( /\//, $uri, 2 );
	( $server_address, $server_port ) = split( /:/, $domain, 2 );
	
	$server_address ||= "localhost";
	$server_port    ||= getservbyname( $scheme, "tcp" );


	### complete info about your proxy server

	if ( $proxy_address and !$proxy_port ){
		( $proxy_port ) = $proxy_address =~ /:(\d+)/;
		
		return ("",
			"Proxy port is undefined.".
			"Specify 'ProxyPort'=>8080 or 'Proxy'=>'${proxy_address}:8080'"
		) unless ( $proxy_port );
	}


	### switch arguments according to if you use a proxy server
	
	( $target_address, $target_port, $target_path ) = $proxy_address ?
		(
			$proxy_address,
			$proxy_port,
			"${scheme}://${server_address}:${server_port}/${path}"
		) :
		(
			$server_address,
			$server_port,
			"/$path"
		);


 ######### SOCKET (create)
 
	$target_ip    = inet_aton( $target_address ) || return ("", "Can't connect to $target_address" );
	$sock_address = pack_sockaddr_in( $target_port, $target_ip );
	
	socket(SOCKET, PF_INET, SOCK_STREAM, 0) || return ("", "Can't create socket on $target_address");


 ######### SOCKET (connect)
 
	connect(SOCKET, $sock_address) or return ("", "Can't connect socket on $sock_address");
	autoflush SOCKET (1);


 ######### Send HTTP GET request
 
	if ( $http_version eq "1.1" ) {
		print SOCKET "GET $target_path HTTP/1.1\n";
		print SOCKET "Host: $target_address\n";
		print SOCKET "Connection: close\n";
	}
	else {
		print SOCKET "GET $target_path HTTP/1.0\n";
	}

	if ( $user_id ){
		print SOCKET "Authorization: Basic\n ";
		print SOCKET &base64'b64encode("${user_id}:${password}")."\n";
	}

	if ( $proxy_user_id ){
		print SOCKET "Proxy-Authorization: Basic\n ";
		print SOCKET &base64'b64encode("${proxy_user_id}:${proxy_password}")."\n";
	}

	print SOCKET "User-Agent: $agent\n"  if ( $agent    );
	print SOCKET "Referrer: $referrer\n" if ( $referrer );
	print SOCKET "Referer: $referer\n"   if ( $referer  );

	print SOCKET "Accept: text/html; */*\n";
	print SOCKET "\n";


	######### Receive HTTP response via SOCKET

	while ( <SOCKET> ) {
		chomp;
		$data .= "$_\n";
	}


	######### SOCKET (close); take down the session
	
	close(SOCKET);


	######### return

	return ($data, "");
}

1;