Credit: Julius Welby
Parsers of tabular data or comma-separated values (CSV) files usually output a list of lists. Converting these into XML allows them to be manipulated with XSLT and other XML tools. Example 12-2 takes a list of equal-length lists and converts it into XML (or, optionally, into an HTML table).
Example 12-2. Converting a list of equal-length lists into XML
# LL2XML.py -- Version 0.3 -- 15 July 2001 # http://www.outwardlynormal.com/python/ll2XML.htm for the full docs import string # Set up exceptions class Error(Exception): def _ _init_ _(self, errcode, heading_num = 0, sublist_length = 0): self.errcode = errcode if self.errcode == "Length Error - Sublists": self.message = ["All the sublists must be of uniform length."] elif self.errcode == "Heading Error - heading/sublist mismatch": self.message = ["There is at least one empty heading item.\n", "Please supply non-empty headings."] elif self.errcode == "Length Error: heading/sublist mismatch": self.message = ["Number of headings =", 'heading_num', "\n", "Number of elements in sublists =", 'sublist_length', "\n", "These numbers must be equal."] else: self.message = [""] self.errmsg = string.join(self.message) def _ _str_ _(self): return self.errmsg def escape(s): """ Replace special characters '&', "'", '<', '>', and '"' with XML entities. """ s = s.replace("&", "&") # Must be done first! s = s.replace("'", "'") s = s.replace("<", "<") s = s.replace(">", ">") s = s.replace('"', """) return s def cleanTag(s): if type(s) != type(""): s = str(s) s = string.lower(s) s = string.replace(s," ", "_") s = escape(s) return s def LL2XML(LL, headings_tuple = ( ), root_element = "rows", row_element = "row", xml_declared = "yes"): if headings_tuple == "table": headings_tuple = ("td",) * len(LL[0]) root_element = "table" row_element = "tr" xml_declared = "no" root_element = cleanTag(root_element) row_element = cleanTag(row_element) if not headings_tuple: headings = LL[0] firstRow = "headings" else: headings = headings_tuple firstRow = "data" # Sublists all of the same length? sublist_length = len(LL[0]) for sublist in LL: if len(sublist) != sublist_length: raise Error("Length Error - Sublists") # Check headings heading_num = len(headings) if heading_num != sublist_length: raise Error("Heading Error - heading/sublist mismatch", heading_num, sublist_length) for item in headings: if not item: raise Error("Heading Error - Empty Item") # Do the conversion bits = [] def add_bits(*somebits): bits.extend(list(somebits)) if xml_declared == "yes": xml_declaration = '<?xml version="1.0" encoding="iso-8859-1"?>\n' else: xml_declaration = "" add_bits(xml_declaration, '<', root_element, '>') if firstRow == "headings": LL = LL[1:] # Remove redundant heading row, if present for sublist in LL: add_bits("\n <", row_element, ">\n") i = 0 for item in sublist: tag = headings[i] tag = cleanTag(tag) if type(item) != type(""): item = `item` item = escape(item) add_bits(" <", tag, ">", item, "</", tag, ">\n") i = i+1 add_bits(" </", row_element, ">") add_bits("\n</", root_element, ">") return string.join(bits, "") def test( ): LL = [ ['Login', 'First Name', 'Last Name', 'Job', 'Group', 'Office', 'Permission'], ['auser', 'Arnold', 'Atkins', 'Partner', 'Tax', 'London', 'read'], ['buser', 'Bill', 'Brown', 'Partner', 'Tax', 'New York', 'read'], ['cuser', 'Clive', 'Cutler', 'Partner', 'Management', 'Brussels', 'read'], ['duser', 'Denis', 'Davis', 'Developer', 'ISS', 'London', 'admin'], ['euser', 'Eric', 'Ericsson', 'Analyst', 'Analysis', 'London', 'admin'], ['fuser', 'Fabian', 'Fowles', 'Partner', 'IP', 'London', 'read'] ] LL_no_heads = LL[1:] # Example 1 print "Example 1: Simple case, using defaults.\n" print LL2XML(LL) print # Example 2 print """Example 2: LL has its headings in the first line, and we define our root and row element names.\n""" print LL2XML(LL,( ),"people","person") print # Example 3 print """Example 3: headings supplied using the headings argument(tuple), using default root and row element names.\n""" print LL2XML(LL_no_heads, ("Login","First Name","Last Name","Job","Group","Office","Permission")) print #Example 4 print """Example 4: The special case where we ask for an HTML table as output by just giving the string "table" as the second argument.\n""" print LL2XML(LL,"table") print if _ _name_ _ == '_ _main_ _': test( )
If the first sublist is a list of headings, these are used to form the element names of the rest of the data, or else the element names can be defined in the function call. Root and row elements can be named if required.
This recipe is coded for compatibility with all versions of Python, including extremely old versions, to the point of reimplementing the escape functionality rather than relying on those supplied by Python’s standard library.
For the specific job of parsing CSV you should probably use one of the existing Python modules available at the Vaults of Parnassus (http://www.vex.net/parnassus/apyllo.py?find=csv); two such parsers are at http://tratt.net/laurie/python/asv/ and http://www.object-craft.com.au/projects/csv/; the permanent home of this module is http://www.outwardlynormal.com/python/ll2XML.htm.
Get Python Cookbook now with the O’Reilly learning platform.
O’Reilly members experience books, live events, courses curated by job role, and more from O’Reilly and nearly 200 top publishers.