From 3fd81c73cfd8bad36b2a1cf7955006e35c1d9db5 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Fri, 23 Oct 2015 16:44:24 +0200 Subject: Assignment 3: code, plots --- Assignment 3/packages/xlrd/xlsx.py | 804 +++++++++++++++++++++++++++++++++++++ 1 file changed, 804 insertions(+) create mode 100644 Assignment 3/packages/xlrd/xlsx.py (limited to 'Assignment 3/packages/xlrd/xlsx.py') diff --git a/Assignment 3/packages/xlrd/xlsx.py b/Assignment 3/packages/xlrd/xlsx.py new file mode 100644 index 0000000..89f9753 --- /dev/null +++ b/Assignment 3/packages/xlrd/xlsx.py @@ -0,0 +1,804 @@ +## +# Portions copyright (c) 2008-2012 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd package, which is released under a BSD-style licence. +## + +from __future__ import print_function, unicode_literals + +DEBUG = 0 + +import sys +import re +from .timemachine import * +from .book import Book, Name +from .biffh import error_text_from_code, XLRDError, XL_CELL_BLANK, XL_CELL_TEXT, XL_CELL_BOOLEAN, XL_CELL_ERROR +from .formatting import is_date_format_string, Format, XF +from .sheet import Sheet + +DLF = sys.stdout # Default Log File + +ET = None +ET_has_iterparse = False +Element_has_iter = False + +def ensure_elementtree_imported(verbosity, logfile): + global ET, ET_has_iterparse, Element_has_iter + if ET is not None: + return + if "IronPython" in sys.version: + import xml.etree.ElementTree as ET + #### 2.7.2.1: fails later with + #### NotImplementedError: iterparse is not supported on IronPython. (CP #31923) + else: + try: import xml.etree.cElementTree as ET + except ImportError: + try: import cElementTree as ET + except ImportError: + try: import lxml.etree as ET + except ImportError: + try: import xml.etree.ElementTree as ET + except ImportError: + try: import elementtree.ElementTree as ET + except ImportError: + raise Exception("Failed to import an ElementTree implementation") + if hasattr(ET, 'iterparse'): + _dummy_stream = BYTES_IO(b'') + try: + ET.iterparse(_dummy_stream) + ET_has_iterparse = True + except NotImplementedError: + pass + Element_has_iter = hasattr(ET.ElementTree, 'iter') + if verbosity: + etree_version = repr([ + (item, getattr(ET, item)) + for item in ET.__dict__.keys() + if item.lower().replace('_', '') == 'version' + ]) + print(ET.__file__, ET.__name__, etree_version, ET_has_iterparse, file=logfile) + +def split_tag(tag): + pos = tag.rfind('}') + 1 + if pos >= 2: + return tag[:pos], tag[pos:] + return '', tag + +def augment_keys(adict, uri): + # uri must already be enclosed in {} + for x in list(adict.keys()): + adict[uri + x] = adict[x] + +_UPPERCASE_1_REL_INDEX = {} # Used in fast conversion of column names (e.g. "XFD") to indices (16383) +for _x in xrange(26): + _UPPERCASE_1_REL_INDEX["ABCDEFGHIJKLMNOPQRSTUVWXYZ"[_x]] = _x + 1 +for _x in "123456789": + _UPPERCASE_1_REL_INDEX[_x] = 0 +del _x + +def cell_name_to_rowx_colx(cell_name, letter_value=_UPPERCASE_1_REL_INDEX): + # Extract column index from cell name + # A => 0, Z =>25, AA => 26, XFD => 16383 + colx = 0 + charx = -1 + try: + for c in cell_name: + charx += 1 + lv = letter_value[c] + if lv: + colx = colx * 26 + lv + else: # start of row number; can't be '0' + colx = colx - 1 + assert 0 <= colx < X12_MAX_COLS + break + except KeyError: + raise Exception('Unexpected character %r in cell name %r' % (c, cell_name)) + rowx = int(cell_name[charx:]) - 1 + return rowx, colx + +error_code_from_text = {} +for _code, _text in error_text_from_code.items(): + error_code_from_text[_text] = _code + +# === X12 === Excel 2007 .xlsx =============================================== + +U_SSML12 = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}" +U_ODREL = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}" +U_PKGREL = "{http://schemas.openxmlformats.org/package/2006/relationships}" +U_CP = "{http://schemas.openxmlformats.org/package/2006/metadata/core-properties}" +U_DC = "{http://purl.org/dc/elements/1.1/}" +U_DCTERMS = "{http://purl.org/dc/terms/}" +XML_SPACE_ATTR = "{http://www.w3.org/XML/1998/namespace}space" +XML_WHITESPACE = "\t\n \r" +X12_MAX_ROWS = 2 ** 20 +X12_MAX_COLS = 2 ** 14 +V_TAG = U_SSML12 + 'v' # cell child: value +F_TAG = U_SSML12 + 'f' # cell child: formula +IS_TAG = U_SSML12 + 'is' # cell child: inline string + +def unescape(s, + subber=re.compile(r'_x[0-9A-Fa-f]{4,4}_', re.UNICODE).sub, + repl=lambda mobj: unichr(int(mobj.group(0)[2:6], 16)), + ): + if "_" in s: + return subber(repl, s) + return s + +def cooked_text(self, elem): + t = elem.text + if t is None: + return '' + if elem.get(XML_SPACE_ATTR) != 'preserve': + t = t.strip(XML_WHITESPACE) + return ensure_unicode(unescape(t)) + +def get_text_from_si_or_is(self, elem, r_tag=U_SSML12+'r', t_tag=U_SSML12 +'t'): + "Returns unescaped unicode" + accum = [] + for child in elem: + # self.dump_elem(child) + tag = child.tag + if tag == t_tag: + t = cooked_text(self, child) + if t: # note: .text attribute can be None + accum.append(t) + elif tag == r_tag: + for tnode in child: + if tnode.tag == t_tag: + t = cooked_text(self, tnode) + if t: + accum.append(t) + return ''.join(accum) + +def map_attributes(amap, elem, obj): + for xml_attr, obj_attr, cnv_func_or_const in amap: + if not xml_attr: + setattr(obj, obj_attr, cnv_func_or_const) + continue + if not obj_attr: continue #### FIX ME #### + raw_value = elem.get(xml_attr) + cooked_value = cnv_func_or_const(raw_value) + setattr(obj, obj_attr, cooked_value) + +def cnv_ST_Xstring(s): + if s is None: return "" + return ensure_unicode(s) + +def cnv_xsd_unsignedInt(s): + if not s: + return None + value = int(s) + assert value >= 0 + return value + +def cnv_xsd_boolean(s): + if not s: + return 0 + if s in ("1", "true", "on"): + return 1 + if s in ("0", "false", "off"): + return 0 + raise ValueError("unexpected xsd:boolean value: %r" % s) + + +_defined_name_attribute_map = ( + ("name", "name", cnv_ST_Xstring, ), + ("comment", "", cnv_ST_Xstring, ), + ("customMenu", "", cnv_ST_Xstring, ), + ("description", "", cnv_ST_Xstring, ), + ("help", "", cnv_ST_Xstring, ), + ("statusBar", "", cnv_ST_Xstring, ), + ("localSheetId", "scope", cnv_xsd_unsignedInt, ), + ("hidden", "hidden", cnv_xsd_boolean, ), + ("function", "func", cnv_xsd_boolean, ), + ("vbProcedure", "vbasic", cnv_xsd_boolean, ), + ("xlm", "macro", cnv_xsd_boolean, ), + ("functionGroupId", "funcgroup", cnv_xsd_unsignedInt, ), + ("shortcutKey", "", cnv_ST_Xstring, ), + ("publishToServer", "", cnv_xsd_boolean, ), + ("workbookParameter", "", cnv_xsd_boolean, ), + ("", "any_err", 0, ), + ("", "any_external", 0, ), + ("", "any_rel", 0, ), + ("", "basic_formula_len", 0, ), + ("", "binary", 0, ), + ("", "builtin", 0, ), + ("", "complex", 0, ), + ("", "evaluated", 0, ), + ("", "excel_sheet_index", 0, ), + ("", "excel_sheet_num", 0, ), + ("", "option_flags", 0, ), + ("", "result", None, ), + ("", "stack", None, ), + ) + +def make_name_access_maps(bk): + name_and_scope_map = {} # (name.lower(), scope): Name_object + name_map = {} # name.lower() : list of Name_objects (sorted in scope order) + num_names = len(bk.name_obj_list) + for namex in xrange(num_names): + nobj = bk.name_obj_list[namex] + name_lcase = nobj.name.lower() + key = (name_lcase, nobj.scope) + if key in name_and_scope_map: + msg = 'Duplicate entry %r in name_and_scope_map' % (key, ) + if 0: + raise XLRDError(msg) + else: + if bk.verbosity: + print(msg, file=bk.logfile) + name_and_scope_map[key] = nobj + sort_data = (nobj.scope, namex, nobj) + if name_lcase in name_map: + name_map[name_lcase].append(sort_data) + else: + name_map[name_lcase] = [sort_data] + for key in name_map.keys(): + alist = name_map[key] + alist.sort() + name_map[key] = [x[2] for x in alist] + bk.name_and_scope_map = name_and_scope_map + bk.name_map = name_map + +class X12General(object): + + def process_stream(self, stream, heading=None): + if self.verbosity >= 2 and heading is not None: + fprintf(self.logfile, "\n=== %s ===\n", heading) + self.tree = ET.parse(stream) + getmethod = self.tag2meth.get + for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator(): + if self.verbosity >= 3: + self.dump_elem(elem) + meth = getmethod(elem.tag) + if meth: + meth(self, elem) + self.finish_off() + + def finish_off(self): + pass + + def dump_elem(self, elem): + fprintf(self.logfile, + "===\ntag=%r len=%d attrib=%r text=%r tail=%r\n", + split_tag(elem.tag)[1], len(elem), elem.attrib, elem.text, elem.tail) + + def dumpout(self, fmt, *vargs): + text = (12 * ' ' + fmt + '\n') % vargs + self.logfile.write(text) + +class X12Book(X12General): + + def __init__(self, bk, logfile=DLF, verbosity=False): + self.bk = bk + self.logfile = logfile + self.verbosity = verbosity + self.bk.nsheets = 0 + self.bk.props = {} + self.relid2path = {} + self.relid2reltype = {} + self.sheet_targets = [] # indexed by sheetx + self.sheetIds = [] # indexed by sheetx + + core_props_menu = { + U_CP+"lastModifiedBy": ("last_modified_by", cnv_ST_Xstring), + U_DC+"creator": ("creator", cnv_ST_Xstring), + U_DCTERMS+"modified": ("modified", cnv_ST_Xstring), + U_DCTERMS+"created": ("created", cnv_ST_Xstring), + } + + def process_coreprops(self, stream): + if self.verbosity >= 2: + fprintf(self.logfile, "\n=== coreProps ===\n") + self.tree = ET.parse(stream) + getmenu = self.core_props_menu.get + props = {} + for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator(): + if self.verbosity >= 3: + self.dump_elem(elem) + menu = getmenu(elem.tag) + if menu: + attr, func = menu + value = func(elem.text) + props[attr] = value + self.bk.user_name = props.get('last_modified_by') or props.get('creator') + self.bk.props = props + if self.verbosity >= 2: + fprintf(self.logfile, "props: %r\n", props) + self.finish_off() + + def process_rels(self, stream): + if self.verbosity >= 2: + fprintf(self.logfile, "\n=== Relationships ===\n") + tree = ET.parse(stream) + r_tag = U_PKGREL + 'Relationship' + for elem in tree.findall(r_tag): + rid = elem.get('Id') + target = elem.get('Target') + reltype = elem.get('Type').split('/')[-1] + if self.verbosity >= 2: + self.dumpout('Id=%r Type=%r Target=%r', rid, reltype, target) + self.relid2reltype[rid] = reltype + # self.relid2path[rid] = 'xl/' + target + if target.startswith('/'): + self.relid2path[rid] = target[1:] # drop the / + else: + self.relid2path[rid] = 'xl/' + target + + def do_defined_name(self, elem): + #### UNDER CONSTRUCTION #### + if 0 and self.verbosity >= 3: + self.dump_elem(elem) + nobj = Name() + bk = self.bk + nobj.bk = bk + nobj.name_index = len(bk.name_obj_list) + bk.name_obj_list.append(nobj) + nobj.name = elem.get('name') + nobj.raw_formula = None # compiled bytecode formula -- not in XLSX + nobj.formula_text = cooked_text(self, elem) + map_attributes(_defined_name_attribute_map, elem, nobj) + if nobj.scope is None: + nobj.scope = -1 # global + if nobj.name.startswith("_xlnm."): + nobj.builtin = 1 + if self.verbosity >= 2: + nobj.dump(header='=== Name object ===') + + def do_defined_names(self, elem): + for child in elem: + self.do_defined_name(child) + make_name_access_maps(self.bk) + + def do_sheet(self, elem): + bk = self.bk + sheetx = bk.nsheets + # print elem.attrib + rid = elem.get(U_ODREL + 'id') + sheetId = int(elem.get('sheetId')) + name = unescape(ensure_unicode(elem.get('name'))) + reltype = self.relid2reltype[rid] + target = self.relid2path[rid] + if self.verbosity >= 2: + self.dumpout( + 'sheetx=%d sheetId=%r rid=%r type=%r name=%r', + sheetx, sheetId, rid, reltype, name) + if reltype != 'worksheet': + if self.verbosity >= 2: + self.dumpout('Ignoring sheet of type %r (name=%r)', reltype, name) + return + state = elem.get('state') + visibility_map = { + None: 0, + 'visible': 0, + 'hidden': 1, + 'veryHidden': 2 + } + bk._sheet_visibility.append(visibility_map[state]) + sheet = Sheet(bk, position=None, name=name, number=sheetx) + sheet.utter_max_rows = X12_MAX_ROWS + sheet.utter_max_cols = X12_MAX_COLS + bk._sheet_list.append(sheet) + bk._sheet_names.append(name) + bk.nsheets += 1 + self.sheet_targets.append(target) + self.sheetIds.append(sheetId) + + + def do_workbookpr(self, elem): + datemode = cnv_xsd_boolean(elem.get('date1904')) + if self.verbosity >= 2: + self.dumpout('datemode=%r', datemode) + self.bk.datemode = datemode + + tag2meth = { + 'definedNames': do_defined_names, + 'workbookPr': do_workbookpr, + 'sheet': do_sheet, + } + augment_keys(tag2meth, U_SSML12) + +class X12SST(X12General): + + def __init__(self, bk, logfile=DLF, verbosity=0): + self.bk = bk + self.logfile = logfile + self.verbosity = verbosity + if ET_has_iterparse: + self.process_stream = self.process_stream_iterparse + else: + self.process_stream = self.process_stream_findall + + def process_stream_iterparse(self, stream, heading=None): + if self.verbosity >= 2 and heading is not None: + fprintf(self.logfile, "\n=== %s ===\n", heading) + si_tag = U_SSML12 + 'si' + elemno = -1 + sst = self.bk._sharedstrings + for event, elem in ET.iterparse(stream): + if elem.tag != si_tag: continue + elemno = elemno + 1 + if self.verbosity >= 3: + fprintf(self.logfile, "element #%d\n", elemno) + self.dump_elem(elem) + result = get_text_from_si_or_is(self, elem) + sst.append(result) + elem.clear() # destroy all child elements + if self.verbosity >= 2: + self.dumpout('Entries in SST: %d', len(sst)) + if self.verbosity >= 3: + for x, s in enumerate(sst): + fprintf(self.logfile, "SST x=%d s=%r\n", x, s) + + def process_stream_findall(self, stream, heading=None): + if self.verbosity >= 2 and heading is not None: + fprintf(self.logfile, "\n=== %s ===\n", heading) + self.tree = ET.parse(stream) + si_tag = U_SSML12 + 'si' + elemno = -1 + sst = self.bk._sharedstrings + for elem in self.tree.findall(si_tag): + elemno = elemno + 1 + if self.verbosity >= 3: + fprintf(self.logfile, "element #%d\n", elemno) + self.dump_elem(elem) + result = get_text_from_si_or_is(self, elem) + sst.append(result) + if self.verbosity >= 2: + self.dumpout('Entries in SST: %d', len(sst)) + +class X12Styles(X12General): + + def __init__(self, bk, logfile=DLF, verbosity=0): + self.bk = bk + self.logfile = logfile + self.verbosity = verbosity + self.xf_counts = [0, 0] + self.xf_type = None + self.fmt_is_date = {} + for x in list(range(14, 23)) + list(range(45, 48)): #### hard-coding FIX ME #### + self.fmt_is_date[x] = 1 + # dummy entry for XF 0 in case no Styles section + self.bk._xf_index_to_xl_type_map[0] = 2 + # fill_in_standard_formats(bk) #### pre-integration kludge + + def do_cellstylexfs(self, elem): + self.xf_type = 0 + + def do_cellxfs(self, elem): + self.xf_type = 1 + + def do_numfmt(self, elem): + formatCode = ensure_unicode(elem.get('formatCode')) + numFmtId = int(elem.get('numFmtId')) + is_date = is_date_format_string(self.bk, formatCode) + self.fmt_is_date[numFmtId] = is_date + fmt_obj = Format(numFmtId, is_date + 2, formatCode) + self.bk.format_map[numFmtId] = fmt_obj + if self.verbosity >= 3: + self.dumpout('numFmtId=%d formatCode=%r is_date=%d', numFmtId, formatCode, is_date) + + def do_xf(self, elem): + if self.xf_type != 1: + #### ignoring style XFs for the moment + return + xfx = self.xf_counts[self.xf_type] + self.xf_counts[self.xf_type] = xfx + 1 + xf = XF() + self.bk.xf_list.append(xf) + self.bk.xfcount += 1 + numFmtId = int(elem.get('numFmtId', '0')) + xf.format_key = numFmtId + is_date = self.fmt_is_date.get(numFmtId, 0) + self.bk._xf_index_to_xl_type_map[xfx] = is_date + 2 + if self.verbosity >= 3: + self.dumpout( + 'xfx=%d numFmtId=%d', + xfx, numFmtId, + ) + self.dumpout(repr(self.bk._xf_index_to_xl_type_map)) + + tag2meth = { + 'cellStyleXfs': do_cellstylexfs, + 'cellXfs': do_cellxfs, + 'numFmt': do_numfmt, + 'xf': do_xf, + } + augment_keys(tag2meth, U_SSML12) + +class X12Sheet(X12General): + + def __init__(self, sheet, logfile=DLF, verbosity=0): + self.sheet = sheet + self.logfile = logfile + self.verbosity = verbosity + self.rowx = -1 # We may need to count them. + self.bk = sheet.book + self.sst = self.bk._sharedstrings + self.merged_cells = sheet.merged_cells + self.warned_no_cell_name = 0 + self.warned_no_row_num = 0 + if ET_has_iterparse: + self.process_stream = self.own_process_stream + + def own_process_stream(self, stream, heading=None): + if self.verbosity >= 2 and heading is not None: + fprintf(self.logfile, "\n=== %s ===\n", heading) + getmethod = self.tag2meth.get + row_tag = U_SSML12 + "row" + self_do_row = self.do_row + for event, elem in ET.iterparse(stream): + if elem.tag == row_tag: + self_do_row(elem) + elem.clear() # destroy all child elements (cells) + elif elem.tag == U_SSML12 + "dimension": + self.do_dimension(elem) + elif elem.tag == U_SSML12 + "mergeCell": + self.do_merge_cell(elem) + self.finish_off() + + def process_comments_stream(self, stream): + root = ET.parse(stream).getroot() + author_list = root[0] + assert author_list.tag == U_SSML12 + 'authors' + authors = [elem.text for elem in author_list] + comment_list = root[1] + assert comment_list.tag == U_SSML12 + 'commentList' + cell_note_map = self.sheet.cell_note_map + from .sheet import Note + text_tag = U_SSML12 + 'text' + r_tag = U_SSML12 + 'r' + t_tag = U_SSML12 + 't' + for elem in comment_list.findall(U_SSML12 + 'comment'): + ts = elem.findall('./' + text_tag + '/' + t_tag) + ts += elem.findall('./' + text_tag + '/' + r_tag + '/' + t_tag) + ref = elem.get('ref') + note = Note() + note.author = authors[int(elem.get('authorId'))] + note.rowx, note.colx = coords = cell_name_to_rowx_colx(ref) + note.text = '' + for t in ts: + note.text += cooked_text(self, t) + cell_note_map[coords] = note + + def do_dimension(self, elem): + ref = elem.get('ref') # example: "A1:Z99" or just "A1" + if ref: + # print >> self.logfile, "dimension: ref=%r" % ref + last_cell_ref = ref.split(':')[-1] # example: "Z99" + rowx, colx = cell_name_to_rowx_colx(last_cell_ref) + self.sheet._dimnrows = rowx + 1 + self.sheet._dimncols = colx + 1 + + def do_merge_cell(self, elem): + # The ref attribute should be a cell range like "B1:D5". + ref = elem.get('ref') + if ref: + first_cell_ref, last_cell_ref = ref.split(':') + first_rowx, first_colx = cell_name_to_rowx_colx(first_cell_ref) + last_rowx, last_colx = cell_name_to_rowx_colx(last_cell_ref) + self.merged_cells.append((first_rowx, last_rowx + 1, + first_colx, last_colx + 1)) + + def do_row(self, row_elem): + + def bad_child_tag(child_tag): + raise Exception('cell type %s has unexpected child <%s> at rowx=%r colx=%r' % (cell_type, child_tag, rowx, colx)) + + row_number = row_elem.get('r') + if row_number is None: # Yes, it's optional. + self.rowx += 1 + explicit_row_number = 0 + if self.verbosity and not self.warned_no_row_num: + self.dumpout("no row number; assuming rowx=%d", self.rowx) + self.warned_no_row_num = 1 + else: + self.rowx = int(row_number) - 1 + explicit_row_number = 1 + assert 0 <= self.rowx < X12_MAX_ROWS + rowx = self.rowx + colx = -1 + if self.verbosity >= 3: + self.dumpout(" row_number=%r rowx=%d explicit=%d", + row_number, self.rowx, explicit_row_number) + letter_value = _UPPERCASE_1_REL_INDEX + for cell_elem in row_elem: + cell_name = cell_elem.get('r') + if cell_name is None: # Yes, it's optional. + colx += 1 + if self.verbosity and not self.warned_no_cell_name: + self.dumpout("no cellname; assuming rowx=%d colx=%d", rowx, colx) + self.warned_no_cell_name = 1 + else: + # Extract column index from cell name + # A => 0, Z =>25, AA => 26, XFD => 16383 + colx = 0 + charx = -1 + try: + for c in cell_name: + charx += 1 + if c == '$': + continue + lv = letter_value[c] + if lv: + colx = colx * 26 + lv + else: # start of row number; can't be '0' + colx = colx - 1 + assert 0 <= colx < X12_MAX_COLS + break + except KeyError: + raise Exception('Unexpected character %r in cell name %r' % (c, cell_name)) + if explicit_row_number and cell_name[charx:] != row_number: + raise Exception('cell name %r but row number is %r' % (cell_name, row_number)) + xf_index = int(cell_elem.get('s', '0')) + cell_type = cell_elem.get('t', 'n') + tvalue = None + formula = None + if cell_type == 'n': + # n = number. Most frequent type. + # child contains plain text which can go straight into float() + # OR there's no text in which case it's a BLANK cell + for child in cell_elem: + child_tag = child.tag + if child_tag == V_TAG: + tvalue = child.text + elif child_tag == F_TAG: + formula = cooked_text(self, child) + else: + raise Exception('unexpected tag %r' % child_tag) + if not tvalue: + if self.bk.formatting_info: + self.sheet.put_cell(rowx, colx, XL_CELL_BLANK, '', xf_index) + else: + self.sheet.put_cell(rowx, colx, None, float(tvalue), xf_index) + elif cell_type == "s": + # s = index into shared string table. 2nd most frequent type + # child contains plain text which can go straight into int() + for child in cell_elem: + child_tag = child.tag + if child_tag == V_TAG: + tvalue = child.text + elif child_tag == F_TAG: + # formula not expected here, but gnumeric does it. + formula = child.text + else: + bad_child_tag(child_tag) + if not tvalue: + # + if self.bk.formatting_info: + self.sheet.put_cell(rowx, colx, XL_CELL_BLANK, '', xf_index) + else: + value = self.sst[int(tvalue)] + self.sheet.put_cell(rowx, colx, XL_CELL_TEXT, value, xf_index) + elif cell_type == "str": + # str = string result from formula. + # Should have (formula) child; however in one file, all text cells are str with no formula. + # child can contain escapes + for child in cell_elem: + child_tag = child.tag + if child_tag == V_TAG: + tvalue = cooked_text(self, child) + elif child_tag == F_TAG: + formula = cooked_text(self, child) + else: + bad_child_tag(child_tag) + # assert tvalue is not None and formula is not None + # Yuk. Fails with file created by gnumeric -- no tvalue! + self.sheet.put_cell(rowx, colx, XL_CELL_TEXT, tvalue, xf_index) + elif cell_type == "b": + # b = boolean + # child contains "0" or "1" + # Maybe the data should be converted with cnv_xsd_boolean; + # ECMA standard is silent; Excel 2007 writes 0 or 1 + for child in cell_elem: + child_tag = child.tag + if child_tag == V_TAG: + tvalue = child.text + elif child_tag == F_TAG: + formula = cooked_text(self, child) + else: + bad_child_tag(child_tag) + self.sheet.put_cell(rowx, colx, XL_CELL_BOOLEAN, int(tvalue), xf_index) + elif cell_type == "e": + # e = error + # child contains e.g. "#REF!" + for child in cell_elem: + child_tag = child.tag + if child_tag == V_TAG: + tvalue = child.text + elif child_tag == F_TAG: + formula = cooked_text(self, child) + else: + bad_child_tag(child_tag) + value = error_code_from_text[tvalue] + self.sheet.put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index) + elif cell_type == "inlineStr": + # Not expected in files produced by Excel. + # Only possible child is . + # It's a way of allowing 3rd party s/w to write text (including rich text) cells + # without having to build a shared string table + for child in cell_elem: + child_tag = child.tag + if child_tag == IS_TAG: + tvalue = get_text_from_si_or_is(self, child) + else: + bad_child_tag(child_tag) + assert tvalue is not None + self.sheet.put_cell(rowx, colx, XL_CELL_TEXT, tvalue, xf_index) + else: + raise Exception("Unknown cell type %r in rowx=%d colx=%d" % (cell_type, rowx, colx)) + + tag2meth = { + 'row': do_row, + } + augment_keys(tag2meth, U_SSML12) + +def open_workbook_2007_xml( + zf, + component_names, + logfile=sys.stdout, + verbosity=0, + use_mmap=0, + formatting_info=0, + on_demand=0, + ragged_rows=0, + ): + ensure_elementtree_imported(verbosity, logfile) + bk = Book() + bk.logfile = logfile + bk.verbosity = verbosity + bk.formatting_info = formatting_info + if formatting_info: + raise NotImplementedError("formatting_info=True not yet implemented") + bk.use_mmap = False #### Not supported initially + bk.on_demand = on_demand + if on_demand: + if verbosity: + print("WARNING *** on_demand=True not yet implemented; falling back to False", file=bk.logfile) + bk.on_demand = False + bk.ragged_rows = ragged_rows + + x12book = X12Book(bk, logfile, verbosity) + zflo = zf.open(component_names['xl/_rels/workbook.xml.rels']) + x12book.process_rels(zflo) + del zflo + zflo = zf.open(component_names['xl/workbook.xml']) + x12book.process_stream(zflo, 'Workbook') + del zflo + props_name = 'docprops/core.xml' + if props_name in component_names: + zflo = zf.open(component_names[props_name]) + x12book.process_coreprops(zflo) + + x12sty = X12Styles(bk, logfile, verbosity) + if 'xl/styles.xml' in component_names: + zflo = zf.open(component_names['xl/styles.xml']) + x12sty.process_stream(zflo, 'styles') + del zflo + else: + # seen in MS sample file MergedCells.xlsx + pass + + sst_fname = 'xl/sharedstrings.xml' + x12sst = X12SST(bk, logfile, verbosity) + if sst_fname in component_names: + zflo = zf.open(component_names[sst_fname]) + x12sst.process_stream(zflo, 'SST') + del zflo + + for sheetx in range(bk.nsheets): + fname = x12book.sheet_targets[sheetx] + zflo = zf.open(component_names[fname]) + sheet = bk._sheet_list[sheetx] + x12sheet = X12Sheet(sheet, logfile, verbosity) + heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname) + x12sheet.process_stream(zflo, heading) + del zflo + comments_fname = 'xl/comments%d.xml' % (sheetx + 1) + if comments_fname in component_names: + comments_stream = zf.open(component_names[comments_fname]) + x12sheet.process_comments_stream(comments_stream) + del comments_stream + + sheet.tidy_dimensions() + + return bk -- cgit v1.2.3