#!/usr/tools/bin/perl
#
# $Id: uri.pl,v 1.2 1994/05/11 22:36:54 connolly Exp $
#

package URI;

#
# A word is a sequence of non-markup characters
#
$Word = '[^/=;?#]*';

@Schemes = ('http', 'ftp', 'wais',
	    'local-file', 'anon-ftp', 'afs',	#  MIME access-types
				# that should work just as well
	    'gopher', # @@ not sure about the syntax of this one
	    'file' # obsolete
	    );

sub relative{
    local($_, $here) = @_;
    local($scheme, $path, $hostport, $search, $fragment);

    $scheme = $1 if s*^($Word):**; # @# syntax of scheme?

    if(!$scheme){
	($scheme, $path, $hostport, $search, $fragment) = &absolute($here);
    }

    #
    # We can't assume anything about the syntax unless we recognize
    # the scheme
    #
    if(grep($scheme eq $_, @Schemes)
       || $scheme =~ /^x-/){	# we'll assume experimental schemes use
				# the same conventions
	if(s*^//($Word)**){
	    # HTParse.c says hostnames are case-insensitive,
	    # but they can't have %-style escapes
	    #$hostport = &unescape($1);
	    $hostport = $1;
	    $hostport =~ tr/A-Z/a-z/;
	}

	# HTParse.c says ID's don't have escapes
	$fragment = $1 if s*#($Word)$**;
	if(s*\?($Word)$**){
	    $search = &unescape($1);
	    $search =~ s/\+/ /g;
	}
	$path = &unescape($_);

	warn "sorry, relative paths not implemented yet"
	    unless $path =~ m#^/#;
    }

    return ($scheme, $path, $hostport, $search, $fragment);
}


sub absolute{
    local($_) = @_;
    local($scheme, $hostport, $path, $search, $fragment);

    $scheme = $1 if s*^([A-Za-z0-9\.-]+):**; # @# syntax of scheme?

    die "no scheme in url: $_" unless $scheme;

    #
    # We can't assume anything about the syntax unless we recognize
    # the scheme
    #
    if(grep($scheme eq $_, @Schemes)
       || $scheme =~ /^x-/){	# we'll assume experimental schemes use
				# the same conventions
	if(s*^//($Word)**){
	    $hostport = &unescape($1);
	    $hostport =~ tr/A-Z/a-z/; # HTParse.c says hostnames are case-insensitive
	}
	$fragment = &unescape($1) if s*#($Word)$**;
	$search = &unescape($1) if s*\?($Word)$**;
	$path = &unescape($_);
    }

    return ($scheme, $path, $hostport, $search, $fragment);
}


sub unescape{
    local($_) = @_;

    s/%(..)/pack(c,hex($1))/eg;

    return $_;
}


sub escape{
        local($_) = @_;
        s-[/?=;:]-sprintf("%%%02X", ord($&))-ge; # list of illegal chars?
        return $_;
}

1;
