#!/usr/bin/python # # # Open source. W3C licence. # """ See http://www.w3.org/2002/12/cal/vcard-notes See Namespace and python iCalendar (vcard?) implementtions dc:title ; s:comment "Defines text/directory . the line-folding and basic record type structure". dc:title "vCard MIME Directory Profile". dc:title "Representing vCard Objects in RDF/XML". an project used by written by danc """ import sys, string, re, os # export PYTHONPATH=$PYTHONPATH:/devel/WWW/2002/12/cal import icslex # from http://www.w3.org/2002/12/cal/icslex.py import notation3 # http://www.w3.org/2000/10/swap/notation3.py from notation3 import stringToN3 lineLength = 76 # A few clear of 80, as used by AB base64 wrapping singleTextField = [ 'fn', 'title', 'bday', 'description', 'note', 'x-abuid','x-abadr', 'x-aim', 'x-abrelatednames', 'x-abshowas' ] kludgeTags = [ 'v:x-ablabel', 'v:x-abadr'] # These have sideeffects # From http://www.ietf.org/rfc/rfc2426.txt adr-type typeFields = { 'adr': [ "dom" , "intl" , "postal" , "parcel" , "home" , "work" , "pref" ], # , iana-type / x-name 'bday': [ 'date' ], 'tel': ["home" , "work", "pref", "voice", "fax", "msg", "cell", "pager", "bbs", "modem", "car", "isdn", "video", "pcs" , 'main' # @@ Added for AB ], 'email': [ "internet", "x400" ,'home', 'work' # @@ Added as they turn up in AB ], 'x-aim': ['home', 'work' # @@ Added as they turn up in AB ], 'url': [ 'home', 'work', 'foaf'] #unofficial } relationshipModifiers = { 'home':1, 'work':1, 'main':1 } # These make work-adr etc # Others are a class of phone/email/etc fieldProperties = { # @@@ These are rather long localnames IMHO 'n': [ 'family-name', 'given-name', 'additional-name', 'prefix', 'suffix' ], 'adr': [ 'post-office-box', 'extended-address', 'street-address', 'locality', 'region', 'postal-code', 'country-name' ], 'org': [ 'organization-name', 'organization-unit'] } def zapOut(str, allowed): """Only allow the characters given. Strings of consecutive unallowed characters are replaced with a single underscore character""" str2 = "" for i in range(len(str)): if str[i] in allowed: str2 = str2 + str[i] else: if str2[-1:] != "_": str2 = str2 + "_" return str2 def munge(str): return zapOut(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") def splitBy(stri, delim, unescape=1): "Split by unescaped delimiters" result = [] begin = 0 escaped = 0 escape = '\\' while begin < len(stri): i, unesc = begin, "" while i < len(stri): ch = stri[i] if escaped: escaped = 0 if ch in "nN": unesc += '\n' else: unesc += ch else: if ch == delim: break if ch == escape: escaped = 1 else: unesc += ch i = i + 1 if unescape: result.append(unesc) else: result.append(stri[begin:i]) begin = i+1 return result def wr(s): sys.stdout.write(s.encode('utf-8')) def extract(path): global nochange global verbose total = 0 wr( """# n3 http://www.w3.org/DesignIssues/Notation3. # From vCard data in %s # Extracted by $Id: vcard2n3.py,v 1.8 2014/09/09 13:12:23 timbl Exp $ @prefix : <#>. @prefix loc: <#loc_>. @prefix s: . @prefix log: . @prefix v: . @prefix vc: . @prefix abl: . @prefix user: <#>. """ % path) input = open(path, "r") b = input.read() input.close() wr("# Length: " + `len(b)`+ "starts ") wr(" ".join(["%2x"%ord(ch) for ch in b[:8]])) wr("\n") if ord(b[0])==0 and ord(b[1]) == ord('B'): # UTF16 with MSB byte order unmarked d = "\xfe\xff" # Add byte order mark buf = (d+b).decode('utf-16') wr( " #Warning: UTF-16 was not byte order marked.\n") else: buf = b.decode('utf-8') group_line = re.compile(r'^([a-zA-Z0-9]+)\.(.*)') field_value = re.compile(r'^([A-Za-z0-9_-]*):(.*)') group = None # cardData = "" groupData = {} groupPred = {} def readBareLine(buf): "Return None for EOF or a line, even including a final widowed line" global bufpo # can't pass by ref begin = bufpo if begin == len(buf): return None bufpo = buf.find('\n', begin) if bufpo < 0: bufpo = len(buf) line = buf[begin:] else: line = buf[begin:bufpo] bufpo += 1 # After \n while line [-1:] == "\r": line = line[:-1] # Strip triling CRs return line def startGroup(g): # print "# Start group <%s>" % `g` groupData[g] = [] # Pairs of p and o groupPred[g] = "loc:%s" % munge(g) # Unless overwritten def endGroup(g): #print "# End group <%s> data:%s" % (`g`, groupData[g]) pos = groupData[g] kludges, data = [], [] for i in range(len(pos)): p, o = pos[i] if p in kludgeTags: kludges.append((p,o)) else: data.append((p,o)) if len(data) == 1: # The AddressBook model, one data item + kludges dp, do = data[0] for p,o in kludges: if p == 'v:x-ablabel': dp = o # Override predicate if p == 'v:x-abadr': assert do[-1:] == ']'; do = do[:-1]+' '+ p +' '+ o +';]' # annnotate object return "%s %s;\n" % (dp, do) if len(kludges) != 0: raise ValueError("Unknown Group pattern:"+`pos`) res = "" for p,o in pos: res += " %s %s;" %(p,o) return "%s [ # %s\n%s];\n" % (groupPred[g], g, res) def lineFold(str1, str2): x = str1.rfind('\n') if x < 0: x = 0 if len(str1) - x + len(str2) > lineLength: return str1+ "\n\t" + str2 return str1 + str2 def orderedFields(value, map): cardData = "" beg = 0 for i in range(len(map)): end = beg while 1: end = value.find(";", end) if end>0 and value[end-1] == "\\": end += 1 continue break if end < 0: end = len(value) st = " ".join(splitBy(value[beg:end], ',')) if st: cardData = lineFold(cardData, ' v:%s %s;' % \ (map[i], stringToN3(st, singleLine=1))) beg=end+1 if beg > len(value): break return cardData def predicateObject(n, props, value): "Return a pair of the predicate and object as N3 strings" modifiers = "" datatype = None classes = [] for prop, val in props: if prop == 'type': vals = val.lower() for val in splitBy(vals, ','): if val == 'internet' and n == 'email': pass elif val == 'pref': # Preferred @@ - how represent? pass elif val in typeFields.get(n, []): if relationshipModifiers.get(val, 0): if modifiers: print "# @@ multiple modifiers in: "+line modifiers = val + '-' + modifiers else: classes.append('vc:'+val[0].upper()+val[1:]) else: raise ValueError("Unhandled type %s in: %s" %(val, line)) elif prop == 'value': # This means datatype datatype = val if val == 'date': pass # Date-times from AB certainly look like w3c not iCal dates elif val == 'uri': pass else: raise ValueError ('Unimplemented data type:'+val) elif prop == 'base64' or (prop == 'encoding' and val.lower() == 'b'): value = value.replace(' ','') res = "" while value: res += value[:lineLength] + "\n" value = value[lineLength:] return 'v:'+n, '[ v:base64 """%s"""]\n' % (res) # Special case else: raise ValueError('Unknown property %s with value %s' & (prop, val)) classSpec = "" if classes: classSpec = 'a '+(', '.join(classes)) map = fieldProperties.get(n,None) pred = 'v:%s%s' % (modifiers, n) if map: if classSpec: classSpec = '\n\t'+classSpec if n == 'n': # Special case assert classSpec == "" return '', orderedFields(value, map) # Naked fields - see notes return pred, '[' + orderedFields(value, map) + classSpec + ']' if n == 'version': assert value == "3.0", "value found: "+`value` return "", "" if n == 'x.ablabel': return "", "" # used elsewhere if n == 'categories': # Really should relate these to classes, but this roundtrips obj = ", ".join(['"'+x+'"' for x in splitBy(value, ',')]) return pred, obj unesc = splitBy(value, ';') if len(unesc) != 1: raise ValueError("Unescaped semicolon in value: "+ value) unesc = unesc[0] obj = None if n == 'tel': if value[0] != '+': print "# @@ Warning: not international form tel: "+value obj = '' % (value.replace(' ','-')) elif n == 'url': obj = '<%s>' % (value) elif n == 'email': obj = '' % (value) if obj: # Any case so far is a form of URI if classSpec: wr('%s %s.\n' %(obj, classSpec)) return pred, obj elif n in singleTextField : # Single text if classSpec: raiseValueError("Unexpected class on %s: %s"%(n,`classSpec`)) return pred, stringToN3(unesc, singleLine=0) # @@@ N3 escaping raise ValueError('Unknown tag:'+n) global bufpo bufpo = 0 nextLine = readBareLine(buf) while 1: line = nextLine while 1: nextLine = readBareLine(buf) if not nextLine or nextLine[0] != ' ': break line += nextLine[1:] if line is None : break # EOF # wr( "# line: " +line[:100]) m = group_line.match(line) if m: g = m.group(1) line = m.group(2) if group != g: if group is not None: cardData += endGroup(group) if g is not None: startGroup(g) group = g n, props, value = icslex.parseLine(line) # for prop, val in props: # if prop == 'type': # val = val.lower() # if group .startswith("item"): # AB hack # groupPred[group] = "loc:"+val if n == 'x-ablabel': pred = 'v:'+n if value[:4] == "_$!<" and value[-4:] == ">!$_": # [sic] obj = "abl:"+munge(value[4:-4]).lower() else: # User generated obj = "user:"+munge(value).lower() else: pred, obj = predicateObject(n, props, value) groupData[group].append((pred, obj)) else: if group is not None: # End group cardData += endGroup(group); group = None n, props, value = icslex.parseLine(line) if n == 'begin': cardData = "" cardID = "[]" elif n == "uid": cardID = "" % value elif n == 'end': wr("%s %s." % (cardID, cardData)) else: if n == 'n': # ugh special case map = fieldProperties.get(n,None) cardData += orderedFields(value, map) +'\n' # Naked fields - see notes else: p, o = predicateObject(n, props, value) if p: cardData+= " %s %s;\n" %(p, o) wr("\n\n#ends\n") input.close() def do(path): if verbose: sys.stderr.write("Doing " + path + "\n") extract(path) ################################### def _test(): import doctest doctest.testmod() ######################################## Main program recursive = 0 nochange = 1 verbose = 0 files = [] for arg in sys.argv[1:]: if arg[0:1] == "-": if arg == "-v": verbose = 1 if arg == '-t': _test() sys.exit(0) else: print """Bad option argument. -v verbose -t self-test """ sys.exit(-1) else: files.append(arg) if files == []: files = [ "." ] # Default to this directory for path in files: do(path)