diff options
author | Camil Staps | 2015-10-23 16:44:24 +0200 |
---|---|---|
committer | Camil Staps | 2015-10-23 16:51:10 +0200 |
commit | 3fd81c73cfd8bad36b2a1cf7955006e35c1d9db5 (patch) | |
tree | 30f99545bbcdd74d8965219faa6bfc7db70e8bc1 /Assignment 3/packages/xlrd/book.py | |
parent | Assignment 2 finished (diff) |
Assignment 3: code, plots
Diffstat (limited to 'Assignment 3/packages/xlrd/book.py')
-rw-r--r-- | Assignment 3/packages/xlrd/book.py | 1420 |
1 files changed, 1420 insertions, 0 deletions
diff --git a/Assignment 3/packages/xlrd/book.py b/Assignment 3/packages/xlrd/book.py new file mode 100644 index 0000000..7bb01b4 --- /dev/null +++ b/Assignment 3/packages/xlrd/book.py @@ -0,0 +1,1420 @@ +# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd +# This module is part of the xlrd package, which is released under a +# BSD-style licence. + +from __future__ import print_function + +from .timemachine import * +from .biffh import * +import struct; unpack = struct.unpack +import sys +import time +from . import sheet +from . import compdoc +from .formula import * +from . import formatting +if sys.version.startswith("IronPython"): + # print >> sys.stderr, "...importing encodings" + import encodings + +empty_cell = sheet.empty_cell # for exposure to the world ... + +DEBUG = 0 + +USE_FANCY_CD = 1 + +TOGGLE_GC = 0 +import gc +# gc.set_debug(gc.DEBUG_STATS) + +try: + import mmap + MMAP_AVAILABLE = 1 +except ImportError: + MMAP_AVAILABLE = 0 +USE_MMAP = MMAP_AVAILABLE + +MY_EOF = 0xF00BAAA # not a 16-bit number + +SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5) + +SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20) + +_code_from_builtin_name = { + "Consolidate_Area": "\x00", + "Auto_Open": "\x01", + "Auto_Close": "\x02", + "Extract": "\x03", + "Database": "\x04", + "Criteria": "\x05", + "Print_Area": "\x06", + "Print_Titles": "\x07", + "Recorder": "\x08", + "Data_Form": "\x09", + "Auto_Activate": "\x0A", + "Auto_Deactivate": "\x0B", + "Sheet_Title": "\x0C", + "_FilterDatabase": "\x0D", + } +builtin_name_from_code = {} +code_from_builtin_name = {} +for _bin, _bic in _code_from_builtin_name.items(): + _bin = UNICODE_LITERAL(_bin) + _bic = UNICODE_LITERAL(_bic) + code_from_builtin_name[_bin] = _bic + builtin_name_from_code[_bic] = _bin +del _bin, _bic, _code_from_builtin_name + +def open_workbook_xls(filename=None, + logfile=sys.stdout, verbosity=0, use_mmap=USE_MMAP, + file_contents=None, + encoding_override=None, + formatting_info=False, on_demand=False, ragged_rows=False, + ): + t0 = time.clock() + if TOGGLE_GC: + orig_gc_enabled = gc.isenabled() + if orig_gc_enabled: + gc.disable() + bk = Book() + try: + bk.biff2_8_load( + filename=filename, file_contents=file_contents, + logfile=logfile, verbosity=verbosity, use_mmap=use_mmap, + encoding_override=encoding_override, + formatting_info=formatting_info, + on_demand=on_demand, + ragged_rows=ragged_rows, + ) + t1 = time.clock() + bk.load_time_stage_1 = t1 - t0 + biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) + if not biff_version: + raise XLRDError("Can't determine file's BIFF version") + if biff_version not in SUPPORTED_VERSIONS: + raise XLRDError( + "BIFF version %s is not supported" + % biff_text_from_num[biff_version] + ) + bk.biff_version = biff_version + if biff_version <= 40: + # no workbook globals, only 1 worksheet + if on_demand: + fprintf(bk.logfile, + "*** WARNING: on_demand is not supported for this Excel version.\n" + "*** Setting on_demand to False.\n") + bk.on_demand = on_demand = False + bk.fake_globals_get_sheet() + elif biff_version == 45: + # worksheet(s) embedded in global stream + bk.parse_globals() + if on_demand: + fprintf(bk.logfile, "*** WARNING: on_demand is not supported for this Excel version.\n" + "*** Setting on_demand to False.\n") + bk.on_demand = on_demand = False + else: + bk.parse_globals() + bk._sheet_list = [None for sh in bk._sheet_names] + if not on_demand: + bk.get_sheets() + bk.nsheets = len(bk._sheet_list) + if biff_version == 45 and bk.nsheets > 1: + fprintf(bk.logfile, + "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n" + "*** Book-level data will be that of the last worksheet.\n", + bk.nsheets + ) + if TOGGLE_GC: + if orig_gc_enabled: + gc.enable() + t2 = time.clock() + bk.load_time_stage_2 = t2 - t1 + except: + bk.release_resources() + raise + # normal exit + if not on_demand: + bk.release_resources() + return bk + +## +# For debugging: dump the file's BIFF records in char & hex. +# @param filename The path to the file to be dumped. +# @param outfile An open file, to which the dump is written. +# @param unnumbered If true, omit offsets (for meaningful diffs). + +def dump(filename, outfile=sys.stdout, unnumbered=False): + bk = Book() + bk.biff2_8_load(filename=filename, logfile=outfile, ) + biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered) + +## +# For debugging and analysis: summarise the file's BIFF records. +# I.e. produce a sorted file of (record_name, count). +# @param filename The path to the file to be summarised. +# @param outfile An open file, to which the summary is written. + +def count_records(filename, outfile=sys.stdout): + bk = Book() + bk.biff2_8_load(filename=filename, logfile=outfile, ) + biff_count_records(bk.mem, bk.base, bk.stream_len, outfile) + +## +# Information relating to a named reference, formula, macro, etc. +# <br /> -- New in version 0.6.0 +# <br /> -- <i>Name information is <b>not</b> extracted from files older than +# Excel 5.0 (Book.biff_version < 50)</i> + +class Name(BaseObject): + + _repr_these = ['stack'] + book = None # parent + + ## + # 0 = Visible; 1 = Hidden + hidden = 0 + + ## + # 0 = Command macro; 1 = Function macro. Relevant only if macro == 1 + func = 0 + + ## + # 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1 + vbasic = 0 + + ## + # 0 = Standard name; 1 = Macro name + macro = 0 + + ## + # 0 = Simple formula; 1 = Complex formula (array formula or user defined)<br /> + # <i>No examples have been sighted.</i> + complex = 0 + + ## + # 0 = User-defined name; 1 = Built-in name + # (common examples: Print_Area, Print_Titles; see OOo docs for full list) + builtin = 0 + + ## + # Function group. Relevant only if macro == 1; see OOo docs for values. + funcgroup = 0 + + ## + # 0 = Formula definition; 1 = Binary data<br /> <i>No examples have been sighted.</i> + binary = 0 + + ## + # The index of this object in book.name_obj_list + name_index = 0 + + ## + # A Unicode string. If builtin, decoded as per OOo docs. + name = UNICODE_LITERAL("") + + ## + # An 8-bit string. + raw_formula = b'' + + ## + # -1: The name is global (visible in all calculation sheets).<br /> + # -2: The name belongs to a macro sheet or VBA sheet.<br /> + # -3: The name is invalid.<br /> + # 0 <= scope < book.nsheets: The name is local to the sheet whose index is scope. + scope = -1 + + ## + # The result of evaluating the formula, if any. + # If no formula, or evaluation of the formula encountered problems, + # the result is None. Otherwise the result is a single instance of the + # Operand class. + # + result = None + + ## + # This is a convenience method for the frequent use case where the name + # refers to a single cell. + # @return An instance of the Cell class. + # @throws XLRDError The name is not a constant absolute reference + # to a single cell. + def cell(self): + res = self.result + if res: + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind == oREF and len(value) == 1: + ref3d = value[0] + if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1 + and ref3d.rowxlo == ref3d.rowxhi - 1 + and ref3d.colxlo == ref3d.colxhi - 1): + sh = self.book.sheet_by_index(ref3d.shtxlo) + return sh.cell(ref3d.rowxlo, ref3d.colxlo) + self.dump(self.book.logfile, + header="=== Dump of Name object ===", + footer="======= End of dump =======", + ) + raise XLRDError("Not a constant absolute reference to a single cell") + + ## + # This is a convenience method for the use case where the name + # refers to one rectangular area in one worksheet. + # @param clipped If true (the default), the returned rectangle is clipped + # to fit in (0, sheet.nrows, 0, sheet.ncols) -- it is guaranteed that + # 0 <= rowxlo <= rowxhi <= sheet.nrows and that the number of usable rows + # in the area (which may be zero) is rowxhi - rowxlo; likewise for columns. + # @return a tuple (sheet_object, rowxlo, rowxhi, colxlo, colxhi). + # @throws XLRDError The name is not a constant absolute reference + # to a single area in a single sheet. + def area2d(self, clipped=True): + res = self.result + if res: + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind == oREF and len(value) == 1: # only 1 reference + ref3d = value[0] + if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet + sh = self.book.sheet_by_index(ref3d.shtxlo) + if not clipped: + return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi + rowxlo = min(ref3d.rowxlo, sh.nrows) + rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows)) + colxlo = min(ref3d.colxlo, sh.ncols) + colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols)) + assert 0 <= rowxlo <= rowxhi <= sh.nrows + assert 0 <= colxlo <= colxhi <= sh.ncols + return sh, rowxlo, rowxhi, colxlo, colxhi + self.dump(self.book.logfile, + header="=== Dump of Name object ===", + footer="======= End of dump =======", + ) + raise XLRDError("Not a constant absolute reference to a single area in a single sheet") + +## +# Contents of a "workbook". +# <p>WARNING: You don't call this class yourself. You use the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").</p> + +class Book(BaseObject): + + ## + # The number of worksheets present in the workbook file. + # This information is available even when no sheets have yet been loaded. + nsheets = 0 + + ## + # Which date system was in force when this file was last saved.<br /> + # 0 => 1900 system (the Excel for Windows default).<br /> + # 1 => 1904 system (the Excel for Macintosh default).<br /> + datemode = 0 # In case it's not specified in the file. + + ## + # Version of BIFF (Binary Interchange File Format) used to create the file. + # Latest is 8.0 (represented here as 80), introduced with Excel 97. + # Earliest supported by this module: 2.0 (represented as 20). + biff_version = 0 + + ## + # List containing a Name object for each NAME record in the workbook. + # <br /> -- New in version 0.6.0 + name_obj_list = [] + + ## + # An integer denoting the character set used for strings in this file. + # For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE. + # For earlier versions, this is used to derive the appropriate Python encoding + # to be used to convert to Unicode. + # Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman' + codepage = None + + ## + # The encoding that was derived from the codepage. + encoding = None + + ## + # A tuple containing the (telephone system) country code for:<br /> + # [0]: the user-interface setting when the file was created.<br /> + # [1]: the regional settings.<br /> + # Example: (1, 61) meaning (USA, Australia). + # This information may give a clue to the correct encoding for an unknown codepage. + # For a long list of observed values, refer to the OpenOffice.org documentation for + # the COUNTRY record. + countries = (0, 0) + + ## + # What (if anything) is recorded as the name of the last user to save the file. + user_name = UNICODE_LITERAL('') + + ## + # A list of Font class instances, each corresponding to a FONT record. + # <br /> -- New in version 0.6.1 + font_list = [] + + ## + # A list of XF class instances, each corresponding to an XF record. + # <br /> -- New in version 0.6.1 + xf_list = [] + + ## + # A list of Format objects, each corresponding to a FORMAT record, in + # the order that they appear in the input file. + # It does <i>not</i> contain builtin formats. + # If you are creating an output file using (for example) pyExcelerator, + # use this list. + # The collection to be used for all visual rendering purposes is format_map. + # <br /> -- New in version 0.6.1 + format_list = [] + + ## + # The mapping from XF.format_key to Format object. + # <br /> -- New in version 0.6.1 + format_map = {} + + ## + # This provides access via name to the extended format information for + # both built-in styles and user-defined styles.<br /> + # It maps <i>name</i> to (<i>built_in</i>, <i>xf_index</i>), where:<br /> + # <i>name</i> is either the name of a user-defined style, + # or the name of one of the built-in styles. Known built-in names are + # Normal, RowLevel_1 to RowLevel_7, + # ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]", + # "Currency [0]", Hyperlink, and "Followed Hyperlink".<br /> + # <i>built_in</i> 1 = built-in style, 0 = user-defined<br /> + # <i>xf_index</i> is an index into Book.xf_list.<br /> + # References: OOo docs s6.99 (STYLE record); Excel UI Format/Style + # <br /> -- New in version 0.6.1; since 0.7.4, extracted only if + # open_workbook(..., formatting_info=True) + style_name_map = {} + + ## + # This provides definitions for colour indexes. Please refer to the + # above section "The Palette; Colour Indexes" for an explanation + # of how colours are represented in Excel.<br /> + # Colour indexes into the palette map into (red, green, blue) tuples. + # "Magic" indexes e.g. 0x7FFF map to None. + # <i>colour_map</i> is what you need if you want to render cells on screen or in a PDF + # file. If you are writing an output XLS file, use <i>palette_record</i>. + # <br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + colour_map = {} + + ## + # If the user has changed any of the colours in the standard palette, the XLS + # file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier) + # RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)]. + # Otherwise this list will be empty. This is what you need if you are + # writing an output XLS file. If you want to render cells on screen or in a PDF + # file, use colour_map. + # <br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + palette_record = [] + + ## + # Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent). + load_time_stage_1 = -1.0 + + ## + # Time in seconds to parse the data from the contiguous string (or mmap equivalent). + load_time_stage_2 = -1.0 + + ## + # @return A list of all sheets in the book. + # All sheets not already loaded will be loaded. + def sheets(self): + for sheetx in xrange(self.nsheets): + if not self._sheet_list[sheetx]: + self.get_sheet(sheetx) + return self._sheet_list[:] + + ## + # @param sheetx Sheet index in range(nsheets) + # @return An object of the Sheet class + def sheet_by_index(self, sheetx): + return self._sheet_list[sheetx] or self.get_sheet(sheetx) + + ## + # @param sheet_name Name of sheet required + # @return An object of the Sheet class + def sheet_by_name(self, sheet_name): + try: + sheetx = self._sheet_names.index(sheet_name) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name) + return self.sheet_by_index(sheetx) + + ## + # @return A list of the names of all the worksheets in the workbook file. + # This information is available even when no sheets have yet been loaded. + def sheet_names(self): + return self._sheet_names[:] + + ## + # @param sheet_name_or_index Name or index of sheet enquired upon + # @return true if sheet is loaded, false otherwise + # <br /> -- New in version 0.7.1 + def sheet_loaded(self, sheet_name_or_index): + if isinstance(sheet_name_or_index, int): + sheetx = sheet_name_or_index + else: + try: + sheetx = self._sheet_names.index(sheet_name_or_index) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name_or_index) + return bool(self._sheet_list[sheetx]) + + ## + # @param sheet_name_or_index Name or index of sheet to be unloaded. + # <br /> -- New in version 0.7.1 + def unload_sheet(self, sheet_name_or_index): + if isinstance(sheet_name_or_index, int): + sheetx = sheet_name_or_index + else: + try: + sheetx = self._sheet_names.index(sheet_name_or_index) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name_or_index) + self._sheet_list[sheetx] = None + + ## + # This method has a dual purpose. You can call it to release + # memory-consuming objects and (possibly) a memory-mapped file + # (mmap.mmap object) when you have finished loading sheets in + # on_demand mode, but still require the Book object to examine the + # loaded sheets. It is also called automatically (a) when open_workbook + # raises an exception and (b) if you are using a "with" statement, when + # the "with" block is exited. Calling this method multiple times on the + # same object has no ill effect. + def release_resources(self): + self._resources_released = 1 + if hasattr(self.mem, "close"): + # must be a mmap.mmap object + self.mem.close() + self.mem = None + if hasattr(self.filestr, "close"): + self.filestr.close() + self.filestr = None + self._sharedstrings = None + self._rich_text_runlist_map = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + self.release_resources() + # return false + + ## + # A mapping from (lower_case_name, scope) to a single Name object. + # <br /> -- New in version 0.6.0 + name_and_scope_map = {} + + ## + # A mapping from lower_case_name to a list of Name objects. The list is + # sorted in scope order. Typically there will be one item (of global scope) + # in the list. + # <br /> -- New in version 0.6.0 + name_map = {} + + def __init__(self): + self._sheet_list = [] + self._sheet_names = [] + self._sheet_visibility = [] # from BOUNDSHEET record + self.nsheets = 0 + self._sh_abs_posn = [] # sheet's absolute position in the stream + self._sharedstrings = [] + self._rich_text_runlist_map = {} + self.raw_user_name = False + self._sheethdr_count = 0 # BIFF 4W only + self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W + self.initialise_format_info() + self._all_sheets_count = 0 # includes macro & VBA sheets + self._supbook_count = 0 + self._supbook_locals_inx = None + self._supbook_addins_inx = None + self._all_sheets_map = [] # maps an all_sheets index to a calc-sheets index (or -1) + self._externsheet_info = [] + self._externsheet_type_b57 = [] + self._extnsht_name_from_num = {} + self._sheet_num_from_name = {} + self._extnsht_count = 0 + self._supbook_types = [] + self._resources_released = 0 + self.addin_func_names = [] + self.name_obj_list = [] + self.colour_map = {} + self.palette_record = [] + self.xf_list = [] + self.style_name_map = {} + self.mem = b'' + self.filestr = b'' + + def biff2_8_load(self, filename=None, file_contents=None, + logfile=sys.stdout, verbosity=0, use_mmap=USE_MMAP, + encoding_override=None, + formatting_info=False, + on_demand=False, + ragged_rows=False, + ): + # DEBUG = 0 + self.logfile = logfile + self.verbosity = verbosity + self.use_mmap = use_mmap and MMAP_AVAILABLE + self.encoding_override = encoding_override + self.formatting_info = formatting_info + self.on_demand = on_demand + self.ragged_rows = ragged_rows + + if not file_contents: + with open(filename, "rb") as f: + f.seek(0, 2) # EOF + size = f.tell() + f.seek(0, 0) # BOF + if size == 0: + raise XLRDError("File size is 0 bytes") + if self.use_mmap: + self.filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ) + self.stream_len = size + else: + self.filestr = f.read() + self.stream_len = len(self.filestr) + else: + self.filestr = file_contents + self.stream_len = len(file_contents) + + self.base = 0 + if self.filestr[:8] != compdoc.SIGNATURE: + # got this one at the antique store + self.mem = self.filestr + else: + cd = compdoc.CompDoc(self.filestr, logfile=self.logfile) + if USE_FANCY_CD: + for qname in ['Workbook', 'Book']: + self.mem, self.base, self.stream_len = \ + cd.locate_named_stream(UNICODE_LITERAL(qname)) + if self.mem: break + else: + raise XLRDError("Can't find workbook in OLE2 compound document") + else: + for qname in ['Workbook', 'Book']: + self.mem = cd.get_named_stream(UNICODE_LITERAL(qname)) + if self.mem: break + else: + raise XLRDError("Can't find workbook in OLE2 compound document") + self.stream_len = len(self.mem) + del cd + if self.mem is not self.filestr: + if hasattr(self.filestr, "close"): + self.filestr.close() + self.filestr = b'' + self._position = self.base + if DEBUG: + print("mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len), file=self.logfile) + + def initialise_format_info(self): + # needs to be done once per sheet for BIFF 4W :-( + self.format_map = {} + self.format_list = [] + self.xfcount = 0 + self.actualfmtcount = 0 # number of FORMAT records seen so far + self._xf_index_to_xl_type_map = {0: XL_CELL_NUMBER} + self._xf_epilogue_done = 0 + self.xf_list = [] + self.font_list = [] + + def get2bytes(self): + pos = self._position + buff_two = self.mem[pos:pos+2] + lenbuff = len(buff_two) + self._position += lenbuff + if lenbuff < 2: + return MY_EOF + lo, hi = buff_two + return (BYTES_ORD(hi) << 8) | BYTES_ORD(lo) + + def get_record_parts(self): + pos = self._position + mem = self.mem + code, length = unpack('<HH', mem[pos:pos+4]) + pos += 4 + data = mem[pos:pos+length] + self._position = pos + length + return (code, length, data) + + def get_record_parts_conditional(self, reqd_record): + pos = self._position + mem = self.mem + code, length = unpack('<HH', mem[pos:pos+4]) + if code != reqd_record: + return (None, 0, b'') + pos += 4 + data = mem[pos:pos+length] + self._position = pos + length + return (code, length, data) + + def get_sheet(self, sh_number, update_pos=True): + if self._resources_released: + raise XLRDError("Can't load sheets after releasing resources.") + if update_pos: + self._position = self._sh_abs_posn[sh_number] + _unused_biff_version = self.getbof(XL_WORKSHEET) + # assert biff_version == self.biff_version ### FAILS + # Have an example where book is v7 but sheet reports v8!!! + # It appears to work OK if the sheet version is ignored. + # Confirmed by Daniel Rentz: happens when Excel does "save as" + # creating an old version file; ignore version details on sheet BOF. + sh = sheet.Sheet(self, + self._position, + self._sheet_names[sh_number], + sh_number, + ) + sh.read(self) + self._sheet_list[sh_number] = sh + return sh + + def get_sheets(self): + # DEBUG = 0 + if DEBUG: print("GET_SHEETS:", self._sheet_names, self._sh_abs_posn, file=self.logfile) + for sheetno in xrange(len(self._sheet_names)): + if DEBUG: print("GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn, file=self.logfile) + self.get_sheet(sheetno) + + def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier + formatting.initialise_book(self) + fake_sheet_name = UNICODE_LITERAL('Sheet 1') + self._sheet_names = [fake_sheet_name] + self._sh_abs_posn = [0] + self._sheet_visibility = [0] # one sheet, visible + self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand + self.get_sheets() + + def handle_boundsheet(self, data): + # DEBUG = 1 + bv = self.biff_version + self.derive_encoding() + if DEBUG: + fprintf(self.logfile, "BOUNDSHEET: bv=%d data %r\n", bv, data); + if bv == 45: # BIFF4W + #### Not documented in OOo docs ... + # In fact, the *only* data is the name of the sheet. + sheet_name = unpack_string(data, 0, self.encoding, lenlen=1) + visibility = 0 + sheet_type = XL_BOUNDSHEET_WORKSHEET # guess, patch later + if len(self._sh_abs_posn) == 0: + abs_posn = self._sheetsoffset + self.base + # Note (a) this won't be used + # (b) it's the position of the SHEETHDR record + # (c) add 11 to get to the worksheet BOF record + else: + abs_posn = -1 # unknown + else: + offset, visibility, sheet_type = unpack('<iBB', data[0:6]) + abs_posn = offset + self.base # because global BOF is always at posn 0 in the stream + if bv < BIFF_FIRST_UNICODE: + sheet_name = unpack_string(data, 6, self.encoding, lenlen=1) + else: + sheet_name = unpack_unicode(data, 6, lenlen=1) + + if DEBUG or self.verbosity >= 2: + fprintf(self.logfile, + "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n", + self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type) + self._all_sheets_count += 1 + if sheet_type != XL_BOUNDSHEET_WORKSHEET: + self._all_sheets_map.append(-1) + descr = { + 1: 'Macro sheet', + 2: 'Chart', + 6: 'Visual Basic module', + }.get(sheet_type, 'UNKNOWN') + + if DEBUG or self.verbosity >= 1: + fprintf(self.logfile, + "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n", + sheet_name, sheet_type, descr) + else: + snum = len(self._sheet_names) + self._all_sheets_map.append(snum) + self._sheet_names.append(sheet_name) + self._sh_abs_posn.append(abs_posn) + self._sheet_visibility.append(visibility) + self._sheet_num_from_name[sheet_name] = snum + + def handle_builtinfmtcount(self, data): + ### N.B. This count appears to be utterly useless. + # DEBUG = 1 + builtinfmtcount = unpack('<H', data[0:2])[0] + if DEBUG: fprintf(self.logfile, "BUILTINFMTCOUNT: %r\n", builtinfmtcount) + self.builtinfmtcount = builtinfmtcount + + def derive_encoding(self): + if self.encoding_override: + self.encoding = self.encoding_override + elif self.codepage is None: + if self.biff_version < 80: + fprintf(self.logfile, + "*** No CODEPAGE record, no encoding_override: will use 'ascii'\n") + self.encoding = 'ascii' + else: + self.codepage = 1200 # utf16le + if self.verbosity >= 2: + fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n") + else: + codepage = self.codepage + if codepage in encoding_from_codepage: + encoding = encoding_from_codepage[codepage] + elif 300 <= codepage <= 1999: + encoding = 'cp' + str(codepage) + else: + encoding = 'unknown_codepage_' + str(codepage) + if DEBUG or (self.verbosity and encoding != self.encoding) : + fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", codepage, encoding) + self.encoding = encoding + if self.codepage != 1200: # utf_16_le + # If we don't have a codec that can decode ASCII into Unicode, + # we're well & truly stuffed -- let the punter know ASAP. + try: + _unused = unicode(b'trial', self.encoding) + except BaseException as e: + fprintf(self.logfile, + "ERROR *** codepage %r -> encoding %r -> %s: %s\n", + self.codepage, self.encoding, type(e).__name__.split(".")[-1], e) + raise + if self.raw_user_name: + strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1) + strg = strg.rstrip() + # if DEBUG: + # print "CODEPAGE: user name decoded from %r to %r" % (self.user_name, strg) + self.user_name = strg + self.raw_user_name = False + return self.encoding + + def handle_codepage(self, data): + # DEBUG = 0 + codepage = unpack('<H', data[0:2])[0] + self.codepage = codepage + self.derive_encoding() + + def handle_country(self, data): + countries = unpack('<HH', data[0:4]) + if self.verbosity: print("Countries:", countries, file=self.logfile) + # Note: in BIFF7 and earlier, country record was put (redundantly?) in each worksheet. + assert self.countries == (0, 0) or self.countries == countries + self.countries = countries + + def handle_datemode(self, data): + datemode = unpack('<H', data[0:2])[0] + if DEBUG or self.verbosity: + fprintf(self.logfile, "DATEMODE: datemode %r\n", datemode) + assert datemode in (0, 1) + self.datemode = datemode + + def handle_externname(self, data): + blah = DEBUG or self.verbosity >= 2 + if self.biff_version >= 80: + option_flags, other_info =unpack("<HI", data[:6]) + pos = 6 + name, pos = unpack_unicode_update_pos(data, pos, lenlen=1) + extra = data[pos:] + if self._supbook_types[-1] == SUPBOOK_ADDIN: + self.addin_func_names.append(name) + if blah: + fprintf(self.logfile, + "EXTERNNAME: sbktype=%d oflags=0x%04x oinfo=0x%08x name=%r extra=%r\n", + self._supbook_types[-1], option_flags, other_info, name, extra) + + def handle_externsheet(self, data): + self.derive_encoding() # in case CODEPAGE record missing/out of order/wrong + self._extnsht_count += 1 # for use as a 1-based index + blah1 = DEBUG or self.verbosity >= 1 + blah2 = DEBUG or self.verbosity >= 2 + if self.biff_version >= 80: + num_refs = unpack("<H", data[0:2])[0] + bytes_reqd = num_refs * 6 + 2 + while len(data) < bytes_reqd: + if blah1: + fprintf( + self.logfile, + "INFO: EXTERNSHEET needs %d bytes, have %d\n", + bytes_reqd, len(data), + ) + code2, length2, data2 = self.get_record_parts() + if code2 != XL_CONTINUE: + raise XLRDError("Missing CONTINUE after EXTERNSHEET record") + data += data2 + pos = 2 + for k in xrange(num_refs): + info = unpack("<HHH", data[pos:pos+6]) + ref_recordx, ref_first_sheetx, ref_last_sheetx = info + self._externsheet_info.append(info) + pos += 6 + if blah2: + fprintf( + self.logfile, + "EXTERNSHEET(b8): k = %2d, record = %2d, first_sheet = %5d, last sheet = %5d\n", + k, ref_recordx, ref_first_sheetx, ref_last_sheetx, + ) + else: + nc, ty = unpack("<BB", data[:2]) + if blah2: + print("EXTERNSHEET(b7-):", file=self.logfile) + hex_char_dump(data, 0, len(data), fout=self.logfile) + msg = { + 1: "Encoded URL", + 2: "Current sheet!!", + 3: "Specific sheet in own doc't", + 4: "Nonspecific sheet in own doc't!!", + }.get(ty, "Not encoded") + print(" %3d chars, type is %d (%s)" % (nc, ty, msg), file=self.logfile) + if ty == 3: + sheet_name = unicode(data[2:nc+2], self.encoding) + self._extnsht_name_from_num[self._extnsht_count] = sheet_name + if blah2: print(self._extnsht_name_from_num, file=self.logfile) + if not (1 <= ty <= 4): + ty = 0 + self._externsheet_type_b57.append(ty) + + def handle_filepass(self, data): + if self.verbosity >= 2: + logf = self.logfile + fprintf(logf, "FILEPASS:\n") + hex_char_dump(data, 0, len(data), base=0, fout=logf) + if self.biff_version >= 80: + kind1, = unpack('<H', data[:2]) + if kind1 == 0: # weak XOR encryption + key, hash_value = unpack('<HH', data[2:]) + fprintf(logf, + 'weak XOR: key=0x%04x hash=0x%04x\n', + key, hash_value) + elif kind1 == 1: + kind2, = unpack('<H', data[4:6]) + if kind2 == 1: # BIFF8 standard encryption + caption = "BIFF8 std" + elif kind2 == 2: + caption = "BIFF8 strong" + else: + caption = "** UNKNOWN ENCRYPTION METHOD **" + fprintf(logf, "%s\n", caption) + raise XLRDError("Workbook is encrypted") + + def handle_name(self, data): + blah = DEBUG or self.verbosity >= 2 + bv = self.biff_version + if bv < 50: + return + self.derive_encoding() + # print + # hex_char_dump(data, 0, len(data), fout=self.logfile) + ( + option_flags, kb_shortcut, name_len, fmla_len, extsht_index, sheet_index, + menu_text_len, description_text_len, help_topic_text_len, status_bar_text_len, + ) = unpack("<HBBHHH4B", data[0:14]) + nobj = Name() + nobj.book = self ### CIRCULAR ### + name_index = len(self.name_obj_list) + nobj.name_index = name_index + self.name_obj_list.append(nobj) + nobj.option_flags = option_flags + for attr, mask, nshift in ( + ('hidden', 1, 0), + ('func', 2, 1), + ('vbasic', 4, 2), + ('macro', 8, 3), + ('complex', 0x10, 4), + ('builtin', 0x20, 5), + ('funcgroup', 0xFC0, 6), + ('binary', 0x1000, 12), + ): + setattr(nobj, attr, (option_flags & mask) >> nshift) + + macro_flag = " M"[nobj.macro] + if bv < 80: + internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len) + else: + internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len) + nobj.extn_sheet_num = extsht_index + nobj.excel_sheet_index = sheet_index + nobj.scope = None # patched up in the names_epilogue() method + if blah: + fprintf( + self.logfile, + "NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r\n", + name_index, macro_flag, option_flags, name_len, + fmla_len, extsht_index, sheet_index, internal_name) + name = internal_name + if nobj.builtin: + name = builtin_name_from_code.get(name, "??Unknown??") + if blah: print(" builtin: %s" % name, file=self.logfile) + nobj.name = name + nobj.raw_formula = data[pos:] + nobj.basic_formula_len = fmla_len + nobj.evaluated = 0 + if blah: + nobj.dump( + self.logfile, + header="--- handle_name: name[%d] ---" % name_index, + footer="-------------------", + ) + + def names_epilogue(self): + blah = self.verbosity >= 2 + f = self.logfile + if blah: + print("+++++ names_epilogue +++++", file=f) + print("_all_sheets_map", REPR(self._all_sheets_map), file=f) + print("_extnsht_name_from_num", REPR(self._extnsht_name_from_num), file=f) + print("_sheet_num_from_name", REPR(self._sheet_num_from_name), file=f) + num_names = len(self.name_obj_list) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + # Convert from excel_sheet_index to scope. + # This is done here because in BIFF7 and earlier, the + # BOUNDSHEET records (from which _all_sheets_map is derived) + # come after the NAME records. + if self.biff_version >= 80: + sheet_index = nobj.excel_sheet_index + if sheet_index == 0: + intl_sheet_index = -1 # global + elif 1 <= sheet_index <= len(self._all_sheets_map): + intl_sheet_index = self._all_sheets_map[sheet_index-1] + if intl_sheet_index == -1: # maps to a macro or VBA sheet + intl_sheet_index = -2 # valid sheet reference but not useful + else: + # huh? + intl_sheet_index = -3 # invalid + elif 50 <= self.biff_version <= 70: + sheet_index = nobj.extn_sheet_num + if sheet_index == 0: + intl_sheet_index = -1 # global + else: + sheet_name = self._extnsht_name_from_num[sheet_index] + intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2) + nobj.scope = intl_sheet_index + + for namex in range(num_names): + nobj = self.name_obj_list[namex] + # Parse the formula ... + if nobj.macro or nobj.binary: continue + if nobj.evaluated: continue + evaluate_name_formula(self, nobj, namex, blah=blah) + + if self.verbosity >= 2: + print("---------- name object dump ----------", file=f) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + nobj.dump(f, header="--- name[%d] ---" % namex) + print("--------------------------------------", file=f) + # + # Build some dicts for access to the name objects + # + name_and_scope_map = {} # (name.lower(), scope): Name_object + name_map = {} # name.lower() : list of Name_objects (sorted in scope order) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + name_lcase = nobj.name.lower() + key = (name_lcase, nobj.scope) + if key in name_and_scope_map and self.verbosity: + fprintf(f, 'Duplicate entry %r in name_and_scope_map\n', key) + name_and_scope_map[key] = nobj + sort_data = (nobj.scope, namex, nobj) + # namex (a temp unique ID) ensures the Name objects will not + # be compared (fatal in py3) + if name_lcase in name_map: + name_map[name_lcase].append(sort_data) + else: + name_map[name_lcase] = [sort_data] + for key in name_map.keys(): + alist = name_map[key] + alist.sort() + name_map[key] = [x[2] for x in alist] + self.name_and_scope_map = name_and_scope_map + self.name_map = name_map + + def handle_obj(self, data): + # Not doing much handling at all. + # Worrying about embedded (BOF ... EOF) substreams is done elsewhere. + # DEBUG = 1 + obj_type, obj_id = unpack('<HI', data[4:10]) + # if DEBUG: print "---> handle_obj type=%d id=0x%08x" % (obj_type, obj_id) + + def handle_supbook(self, data): + # aka EXTERNALBOOK in OOo docs + self._supbook_types.append(None) + blah = DEBUG or self.verbosity >= 2 + if blah: + print("SUPBOOK:", file=self.logfile) + hex_char_dump(data, 0, len(data), fout=self.logfile) + num_sheets = unpack("<H", data[0:2])[0] + if blah: print("num_sheets = %d" % num_sheets, file=self.logfile) + sbn = self._supbook_count + self._supbook_count += 1 + if data[2:4] == b"\x01\x04": + self._supbook_types[-1] = SUPBOOK_INTERNAL + self._supbook_locals_inx = self._supbook_count - 1 + if blah: + print("SUPBOOK[%d]: internal 3D refs; %d sheets" % (sbn, num_sheets), file=self.logfile) + print(" _all_sheets_map", self._all_sheets_map, file=self.logfile) + return + if data[0:4] == b"\x01\x00\x01\x3A": + self._supbook_types[-1] = SUPBOOK_ADDIN + self._supbook_addins_inx = self._supbook_count - 1 + if blah: print("SUPBOOK[%d]: add-in functions" % sbn, file=self.logfile) + return + url, pos = unpack_unicode_update_pos(data, 2, lenlen=2) + if num_sheets == 0: + self._supbook_types[-1] = SUPBOOK_DDEOLE + if blah: fprintf(self.logfile, "SUPBOOK[%d]: DDE/OLE document = %r\n", sbn, url) + return + self._supbook_types[-1] = SUPBOOK_EXTERNAL + if blah: fprintf(self.logfile, "SUPBOOK[%d]: url = %r\n", sbn, url) + sheet_names = [] + for x in range(num_sheets): + try: + shname, pos = unpack_unicode_update_pos(data, pos, lenlen=2) + except struct.error: + # #### FIX ME #### + # Should implement handling of CONTINUE record(s) ... + if self.verbosity: + print(( + "*** WARNING: unpack failure in sheet %d of %d in SUPBOOK record for file %r" + % (x, num_sheets, url) + ), file=self.logfile) + break + sheet_names.append(shname) + if blah: fprintf(self.logfile, " sheetx=%d namelen=%d name=%r (next pos=%d)\n", x, len(shname), shname, pos) + + def handle_sheethdr(self, data): + # This a BIFF 4W special. + # The SHEETHDR record is followed by a (BOF ... EOF) substream containing + # a worksheet. + # DEBUG = 1 + self.derive_encoding() + sheet_len = unpack('<i', data[:4])[0] + sheet_name = unpack_string(data, 4, self.encoding, lenlen=1) + sheetno = self._sheethdr_count + assert sheet_name == self._sheet_names[sheetno] + self._sheethdr_count += 1 + BOF_posn = self._position + posn = BOF_posn - 4 - len(data) + if DEBUG: fprintf(self.logfile, 'SHEETHDR %d at posn %d: len=%d name=%r\n', sheetno, posn, sheet_len, sheet_name) + self.initialise_format_info() + if DEBUG: print('SHEETHDR: xf epilogue flag is %d' % self._xf_epilogue_done, file=self.logfile) + self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand + self.get_sheet(sheetno, update_pos=False) + if DEBUG: print('SHEETHDR: posn after get_sheet() =', self._position, file=self.logfile) + self._position = BOF_posn + sheet_len + + def handle_sheetsoffset(self, data): + # DEBUG = 0 + posn = unpack('<i', data)[0] + if DEBUG: print('SHEETSOFFSET:', posn, file=self.logfile) + self._sheetsoffset = posn + + def handle_sst(self, data): + # DEBUG = 1 + if DEBUG: + print("SST Processing", file=self.logfile) + t0 = time.time() + nbt = len(data) + strlist = [data] + uniquestrings = unpack('<i', data[4:8])[0] + if DEBUG or self.verbosity >= 2: + fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings) + while 1: + code, nb, data = self.get_record_parts_conditional(XL_CONTINUE) + if code is None: + break + nbt += nb + if DEBUG >= 2: + fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt) + strlist.append(data) + self._sharedstrings, rt_runlist = unpack_SST_table(strlist, uniquestrings) + if self.formatting_info: + self._rich_text_runlist_map = rt_runlist + if DEBUG: + t1 = time.time() + print("SST processing took %.2f seconds" % (t1 - t0, ), file=self.logfile) + + def handle_writeaccess(self, data): + DEBUG = 0 + if self.biff_version < 80: + if not self.encoding: + self.raw_user_name = True + self.user_name = data + return + strg = unpack_string(data, 0, self.encoding, lenlen=1) + else: + strg = unpack_unicode(data, 0, lenlen=2) + if DEBUG: fprintf(self.logfile, "WRITEACCESS: %d bytes; raw=%s %r\n", len(data), self.raw_user_name, strg) + strg = strg.rstrip() + self.user_name = strg + + def parse_globals(self): + # DEBUG = 0 + # no need to position, just start reading (after the BOF) + formatting.initialise_book(self) + while 1: + rc, length, data = self.get_record_parts() + if DEBUG: print("parse_globals: record code is 0x%04x" % rc, file=self.logfile) + if rc == XL_SST: + self.handle_sst(data) + elif rc == XL_FONT or rc == XL_FONT_B3B4: + self.handle_font(data) + elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals + self.handle_format(data) + elif rc == XL_XF: + self.handle_xf(data) + elif rc == XL_BOUNDSHEET: + self.handle_boundsheet(data) + elif rc == XL_DATEMODE: + self.handle_datemode(data) + elif rc == XL_CODEPAGE: + self.handle_codepage(data) + elif rc == XL_COUNTRY: + self.handle_country(data) + elif rc == XL_EXTERNNAME: + self.handle_externname(data) + elif rc == XL_EXTERNSHEET: + self.handle_externsheet(data) + elif rc == XL_FILEPASS: + self.handle_filepass(data) + elif rc == XL_WRITEACCESS: + self.handle_writeaccess(data) + elif rc == XL_SHEETSOFFSET: + self.handle_sheetsoffset(data) + elif rc == XL_SHEETHDR: + self.handle_sheethdr(data) + elif rc == XL_SUPBOOK: + self.handle_supbook(data) + elif rc == XL_NAME: + self.handle_name(data) + elif rc == XL_PALETTE: + self.handle_palette(data) + elif rc == XL_STYLE: + self.handle_style(data) + elif rc & 0xff == 9 and self.verbosity: + fprintf(self.logfile, "*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r\n", + self._position - length - 4, rc, length, data) + elif rc == XL_EOF: + self.xf_epilogue() + self.names_epilogue() + self.palette_epilogue() + if not self.encoding: + self.derive_encoding() + if self.biff_version == 45: + # DEBUG = 0 + if DEBUG: print("global EOF: position", self._position, file=self.logfile) + # if DEBUG: + # pos = self._position - 4 + # print repr(self.mem[pos:pos+40]) + return + else: + # if DEBUG: + # print >> self.logfile, "parse_globals: ignoring record code 0x%04x" % rc + pass + + def read(self, pos, length): + data = self.mem[pos:pos+length] + self._position = pos + len(data) + return data + + def getbof(self, rqd_stream): + # DEBUG = 1 + # if DEBUG: print >> self.logfile, "getbof(): position", self._position + if DEBUG: print("reqd: 0x%04x" % rqd_stream, file=self.logfile) + def bof_error(msg): + raise XLRDError('Unsupported format, or corrupt file: ' + msg) + savpos = self._position + opcode = self.get2bytes() + if opcode == MY_EOF: + bof_error('Expected BOF record; met end of file') + if opcode not in bofcodes: + bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8]) + length = self.get2bytes() + if length == MY_EOF: + bof_error('Incomplete BOF record[1]; met end of file') + if not (4 <= length <= 20): + bof_error( + 'Invalid length (%d) for BOF record type 0x%04x' + % (length, opcode)) + padding = b'\0' * max(0, boflen[opcode] - length) + data = self.read(self._position, length); + if DEBUG: fprintf(self.logfile, "\ngetbof(): data=%r\n", data) + if len(data) < length: + bof_error('Incomplete BOF record[2]; met end of file') + data += padding + version1 = opcode >> 8 + version2, streamtype = unpack('<HH', data[0:4]) + if DEBUG: + print("getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x" \ + % (opcode, version2, streamtype), file=self.logfile) + bof_offset = self._position - 4 - length + if DEBUG: + print("getbof(): BOF found at offset %d; savpos=%d" \ + % (bof_offset, savpos), file=self.logfile) + version = build = year = 0 + if version1 == 0x08: + build, year = unpack('<HH', data[4:8]) + if version2 == 0x0600: + version = 80 + elif version2 == 0x0500: + if year < 1994 or build in (2412, 3218, 3321): + version = 50 + else: + version = 70 + else: + # dodgy one, created by a 3rd-party tool + version = { + 0x0000: 21, + 0x0007: 21, + 0x0200: 21, + 0x0300: 30, + 0x0400: 40, + }.get(version2, 0) + elif version1 in (0x04, 0x02, 0x00): + version = {0x04: 40, 0x02: 30, 0x00: 21}[version1] + + if version == 40 and streamtype == XL_WORKBOOK_GLOBALS_4W: + version = 45 # i.e. 4W + + if DEBUG or self.verbosity >= 2: + print("BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d" \ + % (opcode, version2, streamtype, build, year, version), file=self.logfile) + got_globals = streamtype == XL_WORKBOOK_GLOBALS or ( + version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W) + if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream: + return version + if version < 50 and streamtype == XL_WORKSHEET: + return version + if version >= 50 and streamtype == 0x0100: + bof_error("Workspace file -- no spreadsheet data") + bof_error( + 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d' \ + % (opcode, version2, streamtype, build, year, version) + ) + +# === helper functions + +def expand_cell_address(inrow, incol): + # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8" + outrow = inrow + if incol & 0x8000: + if outrow >= 32768: + outrow -= 65536 + relrow = 1 + else: + relrow = 0 + outcol = incol & 0xFF + if incol & 0x4000: + if outcol >= 128: + outcol -= 256 + relcol = 1 + else: + relcol = 0 + return outrow, outcol, relrow, relcol + +def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"): + assert colx >= 0 + name = UNICODE_LITERAL('') + while 1: + quot, rem = divmod(colx, 26) + name = _A2Z[rem] + name + if not quot: + return name + colx = quot - 1 + +def display_cell_address(rowx, colx, relrow, relcol): + if relrow: + rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx)) + else: + rowpart = "$%d" % (rowx+1,) + if relcol: + colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx)) + else: + colpart = "$" + colname(colx) + return colpart + rowpart + +def unpack_SST_table(datatab, nstrings): + "Return list of strings" + datainx = 0 + ndatas = len(datatab) + data = datatab[0] + datalen = len(data) + pos = 8 + strings = [] + strappend = strings.append + richtext_runs = {} + local_unpack = unpack + local_min = min + local_BYTES_ORD = BYTES_ORD + latin_1 = "latin_1" + for _unused_i in xrange(nstrings): + nchars = local_unpack('<H', data[pos:pos+2])[0] + pos += 2 + options = local_BYTES_ORD(data[pos]) + pos += 1 + rtcount = 0 + phosz = 0 + if options & 0x08: # richtext + rtcount = local_unpack('<H', data[pos:pos+2])[0] + pos += 2 + if options & 0x04: # phonetic + phosz = local_unpack('<i', data[pos:pos+4])[0] + pos += 4 + accstrg = UNICODE_LITERAL('') + charsgot = 0 + while 1: + charsneed = nchars - charsgot + if options & 0x01: + # Uncompressed UTF-16 + charsavail = local_min((datalen - pos) >> 1, charsneed) + rawstrg = data[pos:pos+2*charsavail] + # if DEBUG: print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + try: + accstrg += unicode(rawstrg, "utf_16_le") + except: + # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + # Probable cause: dodgy data e.g. unfinished surrogate pair. + # E.g. file unicode2.xls in pyExcelerator's examples has cells containing + # unichr(i) for i in range(0x100000) + # so this will include 0xD800 etc + raise + pos += 2*charsavail + else: + # Note: this is COMPRESSED (not ASCII!) encoding!!! + charsavail = local_min(datalen - pos, charsneed) + rawstrg = data[pos:pos+charsavail] + # if DEBUG: print "SST CMPRSD: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + accstrg += unicode(rawstrg, latin_1) + pos += charsavail + charsgot += charsavail + if charsgot == nchars: + break + datainx += 1 + data = datatab[datainx] + datalen = len(data) + options = local_BYTES_ORD(data[0]) + pos = 1 + + if rtcount: + runs = [] + for runindex in xrange(rtcount): + if pos == datalen: + pos = 0 + datainx += 1 + data = datatab[datainx] + datalen = len(data) + runs.append(local_unpack("<HH", data[pos:pos+4])) + pos += 4 + richtext_runs[len(strings)] = runs + + pos += phosz # size of the phonetic stuff to skip + if pos >= datalen: + # adjust to correct position in next record + pos = pos - datalen + datainx += 1 + if datainx < ndatas: + data = datatab[datainx] + datalen = len(data) + else: + assert _unused_i == nstrings - 1 + strappend(accstrg) + return strings, richtext_runs |