"""markupAgenda.py -- mark up the 62nd IETF agenda

see Makefile for usage

"""
__version__ = '$Id: markupAgenda.py,v 1.10 2005/03/09 17:34:48 connolly Exp $'

import sys

TXFORM='http://www.w3.org/2005/03dc-msp/gleanAgenda.xsl'
SRC='http://www.ietf.org/meetings/agenda_62.txt'
ICS='agenda_62.ics'
RDF='agenda_62.rdf'


def main(argv):
    markup(lines(sys.stdin), sys.stdout.write)


def markup(ls, wr):
    ti = ls.next().strip()
    da = ls.next().strip()
    ver = ls.next().strip()
    dummy = ls.next()

    startdoc(wr, ti, da)
    auditTrail(wr, ver)
    
    for hd, lines in days(ls):
        wr("<div><h2>%s</h2>\n" % hd)

        wr("<table border='1'>")
	wr("<thead>\n")
	tableRow(wr, ('From', 'To', None, None, 'Description', 'Place'), 1)
	wr("</thead>\n")
        
        for st, en, desc, where, sessions in timeSlots(lines):
            wr("<tbody>\n")
	    tableRow(wr, (st, en, None, None, desc, where))

	    if sessions and len(sessions[0]) == 4:
		for place, area, name, desc in sessions:
		    tableRow(wr, ('', '', area, name, desc, place))
	    else:
		if sessions:
		    wr("<tr><td></td><td></td><td>Plenary</td><td></td><td><pre>")
		    for dummy, s in sessions:
			wr(asContent(s) + "\n")
		    wr("</pre></td></tr>\n")

            wr("</tbody>\n")
            
        wr("</table>\n</div>\n")

    # area director footer stuff
    wr("<pre>")
    for s in ls:
	wr(asContent(s) + "\n")
    wr("</pre>")

    wr("</body>\n</html>\n")



#########
# reading/parsing utilites

def lines(fp):
    """iterate over lines, stripping off newlines
    """
    for l in fp:
        l = l.rstrip()

        # patch:
        # OPS  opsec      Operational Security Capabilities for IP Network
        # Infrastructure WG
        if l.startswith('OPS  opsec'):
            buf = l
        elif l == 'Infrastructure WG':
            yield buf + l
        else:
            yield l


Weekdays = ('SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY',
            'THURSDAY', 'FRIDAY', 'SATURDAY')

def days(ls):
    """turn an iterator over lines into an iterator over days,
    where a day is pair of a heading string and
    an iterator over the lines under the day heading
    """

    hd = ls.next()
    lines = []
    while 1:
        try:
            s = ls.next()
        except StopIteration:
            yield (hd, iter(lines))
            return

        if s.startswith("==="): break

        if s[:s.find(',')] in Weekdays:
	    if s.find("Continued") > 0: continue
            yield (hd, iter(lines))
            hd = s
            lines = []
        else:
            lines.append(s)


def timeSlots(lines):
    """turn an iterator over day-lines into an iterator over day structures,
    where a day structure is a pair of a heading string
    and a list of timeslot items

    0900-1130 Morning Sessions	
    APP  webdav     WWW Distributed Authoring and Versioning WG
    INT  dhc        Dynamic Host Configuration WG
    """
    
    hd = None
    sessions = []
    while 1:
        try:
            s = lines.next()
        except StopIteration:
            yield aboutSession(hd, sessions)
            return
        
        if not s: continue

        #print >>sys.stderr, s

        if len(s) > 0 and s[0].isdigit():
            if hd:
                yield aboutSession(hd, sessions)
            sessions = []
            hd = s
        else:
	    if s[0].isspace(): # plenary session description. RFE: list markup
		sessions.append((None, s))
	    else:
		place = s[:11].strip()
		area, name, desc = s[11:].split(None, 2)
		sessions.append((place, area, name, desc))
            

def aboutSession(hd, sessions):
    try:
        st, hd = hd.split('-', 1)
        en, hd = hd.split(None, 1)
    except ValueError:
        # 2230            Late Night Session
        st, hd = hd.split(None, 1)
        en = '2359' #hmm...

    if '-' in hd:
	i = hd.rfind('-')
        desc, where = hd[:i], hd[i+1:]
    else:
        desc, where = hd, ''
    return (st, en, desc, where, sessions)

def asContent(s):
    return s.replace('&', '&amp;') # RFE: &lt; too


###########
# writing utilities

def startdoc(wr, ti, da):
    wr("""<html xmlns='http://www.w3.org/1999/xhtml'>
    <head profile='http://www.w3.org/2003/g/data-view'>
    <title>%s</title></head>
    <link rel='transformation' href='%s' />
    <body>
    <h1>%s</h1>
    <h3>%s</h3>
    """ % (ti, TXFORM, ti, da) )


def auditTrail(wr, ver):
    wr("""<address>%s
    <br />generated from <a href='%s'>original</a> using <a href="markupAgenda.py">markupAgenda.py</a>; see derived <a href='%s'>ics</a>, <a href='%s'>RDF/XML</a> versions, and <a href="Makefile">Makefile</a> for details</address>
    """ % (ver, SRC, ICS, RDF))


def tableRow(wr, cols, hd=0):
    """in cols, None indicates a cell spanned by the next non-None text
    """

    if hd: n = 'th'
    else: n = 'td'
    wr(" <tr>")
    span = 1
    for col in cols:
	if col is None:
	    span += 1
	else:
	    wr("<%s" % n)
	    if span > 1: wr(" colspan='%d'" % span)
	    wr(">%s</%s>"  % (asContent(col), n) )
	    span = 1
    wr(" </tr>\n")


#######

def _test():
    import doctest
    doctest.testmod()


if __name__ == '__main__':
    if '--test' in sys.argv: _test()
    else: main(sys.argv)


# $Log: markupAgenda.py,v $
# Revision 1.10  2005/03/09 17:34:48  connolly
# move plenary cells again
#
# Revision 1.9  2005/03/09 17:34:00  connolly
# move plenary cells around a bit
#
# Revision 1.8  2005/03/09 17:32:26  connolly
# move plenary descriptions from summar to description
#
# Revision 1.7  2005/03/08 06:42:41  connolly
# fixed amp escaping
#
# Revision 1.6  2005/03/08 06:17:36  connolly
# - updated for March 4 version of agenda_62.txt
#  - sessions have a new 1st column for place
#  - handle plenary session description
#  - area director stuff at end
# - factored out markup code: startDoc, tableRow()
# - trimmed whitespace from title, date, version
#
# Revision 1.5  2005/02/25 22:29:11  connolly
# fixed grddl markup, title
#
# Revision 1.4  2005/02/25 22:26:43  connolly
# grddl markup
# link to original, derived versions
#
# Revision 1.3  2005/02/25 06:49:16  connolly
# audit trail in the address
#
# Revision 1.2  2005/02/25 06:44:15  connolly
# schedule structure done. XML well-formed.
#
# Revision 1.1  2005/02/25 06:04:31  connolly
# separated days
#
