# $Id: slurpItin.pl,v 1.2 2003/12/08 02:13:52 connolly Exp $

use strict;

my($rdfNS) = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
my($rdfsNS) = "http://www.w3.org/2000/01/rdf-schema#";
my($dcNS) = "http://purl.org/dc/elements/1.1/";
my($kNS) = "http://opencyc.sourceforge.net/daml/cyc.daml#";
my($dtNS) = "http://www.w3.org/2001/XMLSchema#";
my($tNS) = "http://www.w3.org/2000/10/swap/pim/travelTerms#";
my($aNS) = "http://www.daml.org/2001/10/html/airport-ont#";

my(%Things);

my(%monthNameToNum);
%monthNameToNum = ('JAN', 1,
		   'FEB', 2,
		   'MAR', 3,
		   'APR', 4,
		   'MAY', 5,
		   'JUN', 6,
		   'JUL', 7,
		   'AUG', 8,
		   'SEP', 9,
		   'OCT', 10,
		   'NOV', 11,
		   'DEC', 12
		  );

&bind("r", $rdfNS);
&bind("dt", $dtNS); # datatypes
&bind("k", $kNS);
&bind("dc", $dcNS);
&bind("t", $tNS);
&bind("apt", $aNS);


my($gen) = 1;

my($state) = 'start';

slurp();

sub slurp{
    my($event);

    my($trip) = genSym('trip'); #@@traveller

    while(<>){
	if($state eq 'start'){
	    if(/_________/){
		$state = 'flight';
	    }
	}
	elsif($state eq 'flight'){
	    my($stops, $fltNum, $carrierName);

	    next unless /^Flight:/;

	    # Flight:  	AirTran Airways flight 438  (Non-Stop)
	    if(s/\(([^\)]+)\)\s*$//){
		$stops = $1;
		s/\s+$//;
	    } else { die "no match for stops in $state" }
	    if(s/\s*flight (\d+)$//){
		$fltNum = $1;
		s/\s+$//;
	    } else { die "no match for flight number in $state: $_" }
	    if(s/Flight:\s+(.*)//){
		$carrierName = $1;
	    } else { die "no match in carrier name $state" }
	    
	    $event = genSym("flt$fltNum");
	    makeStatement($trip, $kNS . "subEvents", $event);
	    makeStatement($event, $tNS . "flightNumber", '', $fltNum);

	    my($carrier);
	    $carrier = the($kNS . 'nameOfAgent', $carrierName, $carrierName);
	    makeStatement($carrier, $rdfNS . 'type', $kNS . 'AirlineCompany');
	    makeStatement($event, $tNS . "carrier", $carrier);
	    
	    $state = 'depAr';
	}
	elsif($state eq 'depAr'){
	    my($eventName, $airportName, $iataCode, $placeName);

	    #Depart:  	Kansas City, MO (MCI) - TERMINAL BUILDING A

	    if(/^(Depart|Arrive):\s+([^\(]+)\((\w+)\) - (.*)/){
		$eventName = $1;
		$airportName = $2;
		$iataCode = $3;
		$placeName = $4;

		$airportName =~ s/\s*$//;
	    }else { die "no match in $state: $_" }

	    my($place);
	    $place = genSym("where");
	    makeStatement($event, $kNS . 'fromLocation', $place);
	    makeStatement($place, $kNS . 'nameString', '', $placeName);

	    my($apt) = "http://www.daml.org/cgi-bin/airport?" . $iataCode;
	    makeStatement($apt, $kNS . "nameString", '', $airportName);
	    makeStatement($apt, $rdfNS . "type", $kNS . "Airport-Physical");
	    makeStatement($apt, $aNS . "iataCode", '', $iataCode);

	    makeStatement($place, $kNS . 'inRegion', $apt);

	    $_ = <>;

	    if(/^\"\s+(...), (...) (..) at (\d+):(..)(..)/){
	      my($dow, $mon, $dd, $hr, $mm, $ap ) = ($1, $2, $3, $4, $5, $6);
	      my($ti, $calday);

	      $hr += 12 if $ap eq 'pm' && $hr < 12;
	      $hr = 0 if ($ap eq 'am' && $hr == 12);

	      $ti = sprintf("%02d:%02d", $hr, $mm);
	      $calday = theDay($dd, $mon, '03', #@@ kludge
			       $dow);

	      if($eventName eq 'Depart'){
		  makeStatement($event, $kNS . 'fromLocation', $apt);
		  makeStatement($event, $tNS . 'departureTime', '', $ti);
		  makeStatement($event, $kNS . "startingDate", $calday);
	      }else{
		  makeStatement($event, $kNS . 'toLocation', $apt);
		  makeStatement($event, $tNS . 'arrivalTime', '', $ti);
		  makeStatement($event, $kNS . "endingDate", $calday);
	      }
	     
	      $state = 'flight' if $eventName eq 'Arrive';
	  }
	}
	else{
	    die "state $state what???";
	}
    }
}



# cribbed from
# Id: grokTravItin.pl,v 1.14 2003/09/16 14:57:54 connolly Exp

sub fmtDate{
  my($dd, $mon, $yy) = @_;
  $mon = uc(substr($mon, 0, 3));
  my($mm);
  $mm = $monthNameToNum{$mon};
  die "bad month: $mon" unless $mm >= 1 && $mm <= 12;
  return sprintf("%04d-%02d-%02d",
		 2000+$yy, #@@BUG: y3k
		 $mm, $dd);
}

sub theDay{
  my($dd, $mon, $yy, $dow) = @_;
  my($day, $d);

  $day = fmtDate($dd, $mon, $yy);
  $d = the($dtNS . 'date', $day, "day$dow$dd");

  my(%days);
  %days = ('Mon', 'Monday',
	   'Tue', 'Tuesday',
	   'Wed', 'Wednesday',
	   'Thu', 'Thursday',
	   'Fri', 'Friday',
	   'Sat', 'Saturday',
	   'Sun', 'Sunday');

  die "huh? $dow" unless $days{$dow};
  $dow = $days{$dow};

  makeStatement($d, $rdfNS . 'type', $kNS . $dow);
  makeStatement($kNS . $dow, $rdfNS . 'type', $kNS . 'DayOfWeekType'); #@@ not yet a published part of opencyc, but mentioned in comments.
  makeStatement($kNS . $dow, $kNS . 'nameString', '', $dow);

  return $d;
}


sub makeStatement{
  my($s, $p, $or, $ol) = @_;

  # keep existentials existential...
  $s = "<$s>" unless $s =~ /^_:/;
  $p = "<$p>" unless $p =~ /^_:/;
  
  if($or){
    $or = "<$or>" unless $or =~ /^_:/;
    print "$s $p $or.\n";
  }else{
    print "$s $p \"$ol\".\n"; #@@BUG: string quoting
  }
}

sub bind{
  my($pfx, $ns) = @_;
  printf("\@prefix %s: <%s>.\n", $pfx, $ns);
}

sub introduce{
  my($id, $cls) = @_;

  my($it) = "#$id";
  makeStatement($it, $rdfNS . 'type', $cls) if $cls;

  return $it;
}

sub genSym{
  my($hint) = @_;

  $hint =~ s/[^a-zA-Z0-9]//g; # make it a safe name

  $gen++;
  return "_:${hint}_$gen";
}


sub the{
  # this assumes $prop is a daml:UniqueProperty
  my($prop, $val, $hint) = @_;
  my($ret);

  $ret = $Things{$prop, $val};
  return $ret if $ret;
  $ret = genSym($hint);
  makeStatement($ret, $prop, '', $val);
  $Things{$prop, $val} = $ret;
  return $ret;
}

sub theAirport{
  my($iata, $name) = @_;

  my($apt) = "http://www.daml.org/cgi-bin/airport?" . $iata;

  makeStatement($apt, $kNS . "nameString", '', $name) if $name;
  makeStatement($apt, $rdfNS . "type", $kNS . "Airport-Physical");
  makeStatement($apt, $aNS . "iataCode", '', $iata);

  return $apt;
}
