#!/usr/bin/env python # # Copyright (c) 2006-2010 Benjamin Schweizer. All rights reserved. # # Abstract # ~~~~~~~~ # This module provides methods for reading and writing LDIF files as # specified in RFC 2849. # # Features # ~~~~~~~~ # - read/write ldif files # - folding/multi-line support # - base64-latin1 support # # Authors # ~~~~~~~ # Benjamin Schweizer, http://benjamin-schweizer.de/contact # # Changes # ~~~~~~~ # 2010-03-19, benjamin: fixed multiline base64 output # 2006-10-18, benjamin: fixed comments handling and empty lines # 2006-04-03, benjamin: made module encapsulation # 2006-03-03, benjamin: initial release # # Todo # ~~~~ # - support for base64 chars # - input validation # - check zero length attributes # - check multi line line length # - testing # import base64 def parser(filehandle, encoding='utf-8'): """a parser for ldif files implemented as generator""" record = {} key = False value = False is_base64 = False lineno = 0 line = filehandle.readline() while line: line = line[:-1] lineno = lineno + 1 if line == '': # end of record # append last entry if key: if is_base64: value = base64.decodestring(value).decode(encoding) if key in record.keys(): record[key].append(value) else: record[key] = [value] key = False value = False is_base64 = False # yield if we have data if record: yield record record = {} elif line[0] == '#': # pass comments pass elif line[0] == ' ': # multi line value = value + line[1:] else: # append last entry if key: if is_base64: value = base64.decodestring(value).decode(encoding) if key in record.keys(): record[key].append(value) else: record[key] = [value] key = False value = False is_base64 = False # process new pair if line.count(':: '): # base64 key = line.split(':: ')[0] value = ':: '.join(line.split(':: ')[1:]) is_base64 = True else: # ascii7 key = line.split(': ')[0] value = ': '.join(line.split(': ')[1:]) is_base64 = False line = filehandle.readline() # append last entry if key: if is_base64: value = base64.decodestring(value).decode(encoding) if key in record.keys(): record[key].append(value) else: record[key] = [value] key = False value = False is_base64 = False # yield yield record yield False class writer: """a writer for ldif files""" def __init__(self, filehandle, encoding='utf-8'): self.filehandle = filehandle self.encoding = encoding self.filehandle.write("#\n# Generated with tinyparsers.ldif\n#\n\n") def __del__(self): self.filehandle.write("\n# eof.\n") def put(self, record): def flush(key, value): must_encode = False for c in value: if not ord(c) in range(32, 128): must_encode = True if must_encode: #str = "%s:: %s" % (key, base64.encodestring(value.encode(self.encoding))[:-1]) str = ''.join(base64.encodestring(value.encode(self.encoding)).split('\n')) str = "%s:: %s" % (key, str) else: str = "%s: %s" % (key, value.encode('ascii')) self.filehandle.write(str[:78] + "\n") str = str[78:] while str: self.filehandle.write(' ' + str[:76] + "\n") str = str[76:] order = record.keys() order.remove('dn') order = ['dn'] + order if 'changetype' in order: order.remove('changetype') order = ['changetype'] + order for key in order: for element in record[key]: # supress empty lines if element: flush(key, element) self.filehandle.write("\n") if __name__ == '__main__': import sys if not len(sys.argv) == 3: print """Copyright (c) 2006-2010 Benjamin Schweizer. All rights reserved. usage: %s INPUT.ldif OUTPUT.ldif """ % sys.argv[0] sys.exit(1) fh = open(sys.argv[1], 'r') fh2 = open(sys.argv[2], 'wb') ldifparser = parser(fh, encoding="latin1") ldifwriter = writer(fh2, encoding="utf-8") record = ldifparser.next() while record: print '.', ldifwriter.put(record) record = ldifparser.next() ldifparser = False fh.close() ldifwriter = False fh2.close() # eof.