#!/usr/bin/python
"""Convert ldif to n3 format.

Syntax:    python ldif2n3.py  <file>

    This program is or was http://www.w3.org/2000/10/swap/pim/ldif2p3.py
    $Id: ldif2n3.py,v 1.6 2007/11/02 14:21:35 timbl Exp $
    
    -v  verbose
    -l  pure ldiff vocabulary in output, no translation to foaf etc 
    -m  hide mailbox for privacy, only generate hash
"""
import sys
import string
import os
import re
import sha, binascii, base64


from swap.notation3 import stringToN3 # http://www.w3.org/2000/10/swap/notation3.py


version = "$Id: ldif2n3.py,v 1.6 2007/11/02 14:21:35 timbl Exp $"[1:-1]

global verbose
global hideMailbox

def macroSubstitute(line, dict):
    return line  #@@@@@@

# map = { 'telephone:Number': 'v:work-tel' }.

def convert(path):
    """Convert LDIF format to n3"""
    global nochange
    global verbose
    global hideMailbox

    dict = {}

    print "# http://www.w3.org/DesignIssues/Notation3"
    print "# Generated from", path
    print "# Generated by  ", version
    print
    print "@prefix foaf: <http://xmlns.com/foaf/0.1/>."
    print "@prefix ldif: <http://www.w3.org/2007/ont/ldif#>."
    print

    input = open(path, "r")
    buf = input.read()  # Read the file
    input.close()

    nextLine = 0
    
    blank = re.compile(r" *\r?\n")  #"
#    lines = []
    inPerson = 0
    dataline = re.compile(r'([a-zA-Z0-9_]*): +(.*)')
    base64line = re.compile(r'([a-zA-Z0-9_]*):: +(.*)')
    urline = re.compile(r'([a-zA-Z0-9_]*):< +(.*)')
    commentLine = re.compile(r'^#.*')

    
    asFoaf = { "cn": "foaf:name" }
    
    while nextLine < len(buf):  # Iterate over lines
        l = ""
        while 1:  # unfold continuation lines
            eol = buf.find("\n", nextLine)
            if eol <0:
                l += buf[nextLine:]
                nextLine = len(buf);
                break
            if eol+1 < len(buf) and buf[eol+1] == ' ':  # DOES LDIF fold lines??
                l += buf[nextLine:eol]
                nextLine = eol+2 # After the '\n '                
                continue
            l += buf[nextLine:eol]
            nextLine = eol+1
            break
        while l and l[-1:] in "\r\n": l = l[:-1]
        
        if commentLine.match(l): continue

        m = blank.match(l)
        if m:
            print "    ]."
            inPerson = 0
            continue
        valtype = 'LITERAL'
        m = dataline.match(l)
        if m:
            field = m.group(1)
            value = m.group(2)
        else:
            m = base64line.match(l)
            if m:
                field = m.group(1)
                value = base64.decodestring(m.group(2))
            else:
                m = urlline.match(l)
                if m:
                    field = m.group(1)
                    value = m.group(2)
                    valtype = 'SYMBOL' 
        if m:
            if not inPerson:
                print "    ["
                inPerson = 1
                
            if field == "objectclass":
                if value == "top": continue # Zero content info
                print '\ta ldif:%s; '% (value[0:1].upper() + value[1:])
            
            elif field in ["mail", "email", "mozillaSecondEmail"]:  ## @@ distinguish?
                mboxUri = "mailto:" + value
                hash = binascii.hexlify(sha.new(mboxUri).digest())
                print '\tfoaf:mbox_sha1sum %s;' % (stringToN3(hash, singleLine=1))
                if not hideMailbox:
                    print '\tfoaf:mbox <%s>;' % (mboxUri)
                    
            elif field in ["telephoneNumber", "homePhone", 'fax', 'pager', 'mobile']:
                print '\tldif:%s <tel:%s>;' % (field, value.replace(' ','-'))

            else:
            
                if field == "modifytimestamp" and value == "0Z":
                    continue;  # ignore

                obj = stringToN3(value, singleLine=0)
                pred = asFoaf.get(field, '\tldif:'+field)
                if not (hideMailbox and field == "dn"):
                    print '\t%s %s; '% (pred, obj)

            continue

        print "# ERROR: Unknown line format:", l
    print "]."
        

def do(path):
    if verbose: sys.stderr.write("# make2n3: converting " + path + "\n")
    return convert(path)
        
######################################## Main program

nochange = 1
verbose = 0
hideMailbox = 0
pureLDIF = 0
doall = 0
files = []

for arg in sys.argv[1:]:
    if arg[0:1] == "-":
        if arg == "-?" or arg == "--help":
            print 
        elif arg == "-v": verbose = 1
        elif arg == "-m": hideMailbox = 1
        elif arg == "-l": pureLDIF = 1
        else:
            print """Bad option argument.""" + __doc__
            sys.exit(-1)
    else:
        files.append(arg)

# if files == []: files = [ "Makefile" ] # Default to Makefile

for path in files:
    do(path)
