From 3fd81c73cfd8bad36b2a1cf7955006e35c1d9db5 Mon Sep 17 00:00:00 2001
From: Camil Staps
Date: Fri, 23 Oct 2015 16:44:24 +0200
Subject: Assignment 3: code, plots
---
Assignment 3/packages/pydot.py | 2037 ++++++++++++++++
Assignment 3/packages/treeViewer.py | 17 +
Assignment 3/packages/xlrd/__init__.py | 467 ++++
Assignment 3/packages/xlrd/biffh.py | 663 ++++++
Assignment 3/packages/xlrd/book.py | 1420 ++++++++++++
Assignment 3/packages/xlrd/compdoc.py | 473 ++++
Assignment 3/packages/xlrd/doc/compdoc.html | 69 +
Assignment 3/packages/xlrd/doc/xlrd.html | 2064 +++++++++++++++++
Assignment 3/packages/xlrd/examples/namesdemo.xls | Bin 0 -> 22528 bytes
.../packages/xlrd/examples/xlrdnameAPIdemo.py | 179 ++
Assignment 3/packages/xlrd/formatting.py | 1262 ++++++++++
Assignment 3/packages/xlrd/formula.py | 2179 ++++++++++++++++++
Assignment 3/packages/xlrd/info.py | 1 +
Assignment 3/packages/xlrd/licences.py | 77 +
Assignment 3/packages/xlrd/sheet.py | 2424 ++++++++++++++++++++
Assignment 3/packages/xlrd/timemachine.py | 52 +
Assignment 3/packages/xlrd/xldate.py | 213 ++
Assignment 3/packages/xlrd/xlsx.py | 804 +++++++
18 files changed, 14401 insertions(+)
create mode 100755 Assignment 3/packages/pydot.py
create mode 100755 Assignment 3/packages/treeViewer.py
create mode 100644 Assignment 3/packages/xlrd/__init__.py
create mode 100644 Assignment 3/packages/xlrd/biffh.py
create mode 100644 Assignment 3/packages/xlrd/book.py
create mode 100644 Assignment 3/packages/xlrd/compdoc.py
create mode 100644 Assignment 3/packages/xlrd/doc/compdoc.html
create mode 100644 Assignment 3/packages/xlrd/doc/xlrd.html
create mode 100644 Assignment 3/packages/xlrd/examples/namesdemo.xls
create mode 100644 Assignment 3/packages/xlrd/examples/xlrdnameAPIdemo.py
create mode 100644 Assignment 3/packages/xlrd/formatting.py
create mode 100644 Assignment 3/packages/xlrd/formula.py
create mode 100644 Assignment 3/packages/xlrd/info.py
create mode 100644 Assignment 3/packages/xlrd/licences.py
create mode 100644 Assignment 3/packages/xlrd/sheet.py
create mode 100644 Assignment 3/packages/xlrd/timemachine.py
create mode 100644 Assignment 3/packages/xlrd/xldate.py
create mode 100644 Assignment 3/packages/xlrd/xlsx.py
(limited to 'Assignment 3/packages')
diff --git a/Assignment 3/packages/pydot.py b/Assignment 3/packages/pydot.py
new file mode 100755
index 0000000..104c817
--- /dev/null
+++ b/Assignment 3/packages/pydot.py
@@ -0,0 +1,2037 @@
+# -*- coding: utf-8 -*-
+"""Graphviz's dot language Python interface.
+
+This module provides with a full interface to create handle modify
+and process graphs in Graphviz's dot language.
+
+References:
+
+pydot Homepage: http://code.google.com/p/pydot/
+Graphviz: http://www.graphviz.org/
+DOT Language: http://www.graphviz.org/doc/info/lang.html
+
+Programmed and tested with Graphviz 2.26.3 and Python 2.6 on OSX 10.6.4
+
+Copyright (c) 2005-2011 Ero Carrera Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under a
+# BSD-style licence. A Python module for extracting data from MS Excel (TM) spreadsheet files.
+#
+# Development of this module would not have been possible without the document
+# "OpenOffice.org's Documentation of the Microsoft Excel File Format"
+# ("OOo docs" for short).
+# The latest version is available from OpenOffice.org in
+# PDF format
+# and
+# ODT format.
+# Small portions of the OOo docs are reproduced in this
+# document. A study of the OOo docs is recommended for those who wish a
+# deeper understanding of the Excel file layout than the xlrd docs can provide.
+# Backporting to Python 2.1 was partially funded by
+#
+# Journyx - provider of timesheet and project accounting solutions.
+#
+# Provision of formatting information in version 0.6.1 was funded by
+#
+# Simplistix Ltd.
+#
+# This module presents all text strings as Python unicode objects.
+# From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode.
+# Older files (Excel 95 and earlier) don't keep strings in Unicode;
+# a CODEPAGE record provides a codepage number (for example, 1252) which is
+# used by xlrd to derive the encoding (for same example: "cp1252") which is
+# used to translate to Unicode. If the CODEPAGE record is missing (possible if the file was created
+# by third-party software), xlrd will assume that the encoding is ascii, and keep going.
+# If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and
+# you will need to determine the encoding yourself, and tell xlrd:
+#
+# Version 0.7.4 -- April 2012
+# General information
+#
+# Acknowledgements
+#
+# Unicode
+#
+#
+# book = xlrd.open_workbook(..., encoding_override="cp1252")
+#
If the CODEPAGE record exists but is wrong (for example, the codepage +# number is 1251, but the strings are actually encoded in koi8_r), +# it can be overridden using the same mechanism. +# The supplied runxlrd.py has a corresponding command-line argument, which +# may be used for experimentation: +#
+# runxlrd.py -e koi8_r 3rows myfile.xls +#+#
The first place to look for an encoding ("codec name") is +# +# the Python documentation. +#
+# +# +#In reality, there are no such things. What you have are floating point +# numbers and pious hope. +# There are several problems with Excel dates:
+# +#(1) Dates are not stored as a separate data type; they are stored as +# floating point numbers and you have to rely on +# (a) the "number format" applied to them in Excel and/or +# (b) knowing which cells are supposed to have dates in them. +# This module helps with (a) by inspecting the +# format that has been applied to each number cell; +# if it appears to be a date format, the cell +# is classified as a date rather than a number. Feedback on this feature, +# especially from non-English-speaking locales, would be appreciated.
+# +#(2) Excel for Windows stores dates by default as the number of +# days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +# Macintosh uses a default start date of 1904-01-01T00:00:00. The date +# system can be changed in Excel on a per-workbook basis (for example: +# Tools -> Options -> Calculation, tick the "1904 date system" box). +# This is of course a bad idea if there are already dates in the +# workbook. There is no good reason to change it even if there are no +# dates in the workbook. Which date system is in use is recorded in the +# workbook. A workbook transported from Windows to Macintosh (or vice +# versa) will work correctly with the host Excel. When using this +# module's xldate_as_tuple function to convert numbers from a workbook, +# you must use the datemode attribute of the Book object. If you guess, +# or make a judgement depending on where you believe the workbook was +# created, you run the risk of being 1462 days out of kilter.
+# +#Reference: +# http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162
+# +# +#(3) The Excel implementation of the Windows-default 1900-based date system works on the +# incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +# which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +# result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +# program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +# Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.
+# +#Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326
+# +#(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +# Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +# independent of the calendar, +# or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +# The xldate_* functions in this module +# take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +# date systems. This is consistent with more recent Microsoft documentation +# (for example, the help file for Excel 2002 which says that the first day +# in the 1904 date system is 1904-01-02). +# +#
(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +# in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +# Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +# This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +# means that DATE(1899, 12, 31) is interpreted as 3799-12-31.
+# +#For further information, please refer to the documentation for the xldate_* functions.
+# +#+# A name is used to refer to a cell, a group of cells, a constant +# value, a formula, or a macro. Usually the scope of a name is global +# across the whole workbook. However it can be local to a worksheet. +# For example, if the sales figures are in different cells in +# different sheets, the user may define the name "Sales" in each +# sheet. There are built-in names, like "Print_Area" and +# "Print_Titles"; these two are naturally local to a sheet. +#
+# To inspect the names with a user interface like MS Excel, OOo Calc, +# or Gnumeric, click on Insert/Names/Define. This will show the global +# names, plus those local to the currently selected sheet. +#
+# A Book object provides two dictionaries (name_map and +# name_and_scope_map) and a list (name_obj_list) which allow various +# ways of accessing the Name objects. There is one Name object for +# each NAME record found in the workbook. Name objects have many +# attributes, several of which are relevant only when obj.macro is 1. +#
+# In the examples directory you will find namesdemo.xls which +# showcases the many different ways that names can be used, and +# xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +# the names in your files, and shows how to extract whatever a name is +# referring to. There is currently one "convenience method", +# Name.cell(), which extracts the value in the case where the name +# refers to a single cell. More convenience methods are planned. The +# source code for Name.cell (in __init__.py) is an extra source of +# information on how the Name attributes hang together. +#
+# +#Name information is not extracted from files older than +# Excel 5.0 (Book.biff_version < 50)
+# +#This collection of features, new in xlrd version 0.6.1, is intended +# to provide the information needed to (1) display/render spreadsheet contents +# (say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +# file without losing the ability to display/render it.
+# +#A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +# with each component in range(256). However it is not possible to access an +# unlimited number of colours; each spreadsheet is limited to a palette of 64 different +# colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +# ("colour index") into this palette. +# +# Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +# yellow, magenta, and cyan.
+#
+# The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+# can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+# of 7 rows of 8 colours. The last two rows are reserved for use in charts.
+# The correspondence between this grid and the assigned
+# colour indexes is NOT left-to-right top-to-bottom.
+# Indexes 8 to 15 correspond to changeable
+# parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+# index 15 starts off being cyan but can be changed by the user.
+#
+# The default colour for each index depends on the file version; tables of the defaults
+# are available in the source code. If the user changes one or more colours,
+# a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+# indexes.
+# Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+# to colour index 7; "[COLOR16]...." will produce cyan
+# unless the user changes colour index 15 to something else.
+#
+#
In addition, there are several "magic" colour indexes used by Excel:
+# 0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+# (used in XF, CF, and WINDOW2 records)
+# 0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+# (used in XF and CF records )
+# 0x43: System face colour (dialogue background colour)
+# 0x4D: System window text colour for chart border lines
+# 0x4E: System window background colour for chart areas
+# 0x4F: Automatic colour for chart border lines (seems to be always Black)
+# 0x50: System ToolTip background colour (used in note objects)
+# 0x51: System ToolTip text colour (used in note objects)
+# 0x7FFF: System window text colour for fonts (used in FONT and CF records)
+# Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+# records.
+#
+#
This feature, new in version 0.7.1, is governed by the on_demand argument +# to the open_workbook() function and allows saving memory and time by loading +# only those sheets that the caller is interested in, and releasing sheets +# when no longer required.
+# +#on_demand=False (default): No change. open_workbook() loads global data +# and all sheets, releases resources no longer required (principally the +# str or mmap object containing the Workbook stream), and returns.
+# +#on_demand=True and BIFF version < 5.0: A warning message is emitted, +# on_demand is recorded as False, and the old process is followed.
+# +#on_demand=True and BIFF version >= 5.0: open_workbook() loads global +# data and returns without releasing resources. At this stage, the only +# information available about sheets is Book.nsheets and Book.sheet_names().
+# +#Book.sheet_by_name() and Book.sheet_by_index() will load the requested +# sheet if it is not already loaded.
+# +#Book.sheets() will load all/any unloaded sheets.
+# +#The caller may save memory by calling +# Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +# This applies irrespective of the state of on_demand.
+# +#The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() +# -- except if those required resources have been released (which will +# have happened automatically when on_demand is false). This is the only +# case where an exception will be raised.
+# +#The caller may query the state of a sheet: +# Book.sheet_loaded(sheet_name_or_index) -> a bool
+# +#Book.release_resources() may used to save memory and close +# any memory-mapped file before proceding to examine already-loaded +# sheets. Once resources are released, no further sheets can be loaded.
+# +#When using on-demand, it is advisable to ensure that +# Book.release_resources() is always called even if an exception +# is raised in your own code; otherwise if the input file has been +# memory-mapped, the mmap.mmap object will not be closed and you will +# not be able to access the physical file until your Python process +# terminates. This can be done by calling Book.release_resources() +# explicitly in the finally suite of a try/finally block. +# New in xlrd 0.7.2: the Book object is a "context manager", so if +# using Python 2.5 or later, you can wrap your code in a "with" +# statement.
+## + +import sys, zipfile, pprint +from . import timemachine +from .biffh import ( + XLRDError, + biff_text_from_num, + error_text_from_code, + XL_CELL_BLANK, + XL_CELL_TEXT, + XL_CELL_BOOLEAN, + XL_CELL_ERROR, + XL_CELL_EMPTY, + XL_CELL_DATE, + XL_CELL_NUMBER + ) +from .formula import * # is constrained by __all__ +from .book import Book, colname #### TODO #### formula also has `colname` (restricted to 256 cols) +from .sheet import empty_cell +from .xldate import XLDateError, xldate_as_tuple + +if sys.version.startswith("IronPython"): + # print >> sys.stderr, "...importing encodings" + import encodings + +try: + import mmap + MMAP_AVAILABLE = 1 +except ImportError: + MMAP_AVAILABLE = 0 +USE_MMAP = MMAP_AVAILABLE + +## +# +# Open a spreadsheet file for data extraction. +# +# @param filename The path to the spreadsheet file to be opened. +# +# @param logfile An open file to which messages and diagnostics are written. +# +# @param verbosity Increases the volume of trace material written to the logfile. +# +# @param use_mmap Whether to use the mmap module is determined heuristically. +# Use this arg to override the result. Current heuristic: mmap is used if it exists. +# +# @param file_contents ... as a string or an mmap.mmap object or some other behave-alike object. +# If file_contents is supplied, filename will not be used, except (possibly) in messages. +# +# @param encoding_override Used to overcome missing or bad codepage information +# in older-version files. Refer to discussion in the Unicode section above. +#Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## + +# 2010-03-01 SJM Reading SCL record +# 2010-03-01 SJM Added more record IDs for biff_dump & biff_count +# 2008-02-10 SJM BIFF2 BLANK record +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte. +# 2007-04-22 SJM Remove experimental "trimming" facility. + +from __future__ import print_function + +DEBUG = 0 + +from struct import unpack +import sys +from .timemachine import * + +class XLRDError(Exception): + pass + +## +# Parent of almost all other classes in the package. Defines a common "dump" method +# for debugging. + +class BaseObject(object): + + _repr_these = [] + + ## + # @param f open file object, to which the dump is written + # @param header text to write before the dump + # @param footer text to write after the dump + # @param indent number of leading spaces (for recursive calls) + + def dump(self, f=None, header=None, footer=None, indent=0): + if f is None: + f = sys.stderr + if hasattr(self, "__slots__"): + alist = [] + for attr in self.__slots__: + alist.append((attr, getattr(self, attr))) + else: + alist = self.__dict__.items() + alist = sorted(alist) + pad = " " * indent + if header is not None: print(header, file=f) + list_type = type([]) + dict_type = type({}) + for attr, value in alist: + if getattr(value, 'dump', None) and attr != 'book': + value.dump(f, + header="%s%s (%s object):" % (pad, attr, value.__class__.__name__), + indent=indent+4) + elif attr not in self._repr_these and ( + isinstance(value, list_type) or isinstance(value, dict_type) + ): + print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f) + else: + fprintf(f, "%s%s: %r\n", pad, attr, value) + if footer is not None: print(footer, file=f) + +FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text +DATEFORMAT = FDT +NUMBERFORMAT = FNU + +( + XL_CELL_EMPTY, + XL_CELL_TEXT, + XL_CELL_NUMBER, + XL_CELL_DATE, + XL_CELL_BOOLEAN, + XL_CELL_ERROR, + XL_CELL_BLANK, # for use in debugging, gathering stats, etc +) = range(7) + +biff_text_from_num = { + 0: "(not BIFF)", + 20: "2.0", + 21: "2.1", + 30: "3", + 40: "4S", + 45: "4W", + 50: "5", + 70: "7", + 80: "8", + 85: "8X", + } + +## +#This dictionary can be used to produce a text version of the internal codes +# that Excel uses for error cells. Here are its contents: +#
+# 0x00: '#NULL!', # Intersection of two cell ranges is empty +# 0x07: '#DIV/0!', # Division by zero +# 0x0F: '#VALUE!', # Wrong type of operand +# 0x17: '#REF!', # Illegal or deleted cell reference +# 0x1D: '#NAME?', # Wrong function or range name +# 0x24: '#NUM!', # Value range overflow +# 0x2A: '#N/A', # Argument or function not available +#+ +error_text_from_code = { + 0x00: '#NULL!', # Intersection of two cell ranges is empty + 0x07: '#DIV/0!', # Division by zero + 0x0F: '#VALUE!', # Wrong type of operand + 0x17: '#REF!', # Illegal or deleted cell reference + 0x1D: '#NAME?', # Wrong function or range name + 0x24: '#NUM!', # Value range overflow + 0x2A: '#N/A', # Argument or function not available +} + +BIFF_FIRST_UNICODE = 80 + +XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5 +XL_WORKBOOK_GLOBALS_4W = 0x100 +XL_WORKSHEET = WRKSHEET = 0x10 + +XL_BOUNDSHEET_WORKSHEET = 0x00 +XL_BOUNDSHEET_CHART = 0x02 +XL_BOUNDSHEET_VB_MODULE = 0x06 + +# XL_RK2 = 0x7e +XL_ARRAY = 0x0221 +XL_ARRAY2 = 0x0021 +XL_BLANK = 0x0201 +XL_BLANK_B2 = 0x01 +XL_BOF = 0x809 +XL_BOOLERR = 0x205 +XL_BOOLERR_B2 = 0x5 +XL_BOUNDSHEET = 0x85 +XL_BUILTINFMTCOUNT = 0x56 +XL_CF = 0x01B1 +XL_CODEPAGE = 0x42 +XL_COLINFO = 0x7D +XL_COLUMNDEFAULT = 0x20 # BIFF2 only +XL_COLWIDTH = 0x24 # BIFF2 only +XL_CONDFMT = 0x01B0 +XL_CONTINUE = 0x3c +XL_COUNTRY = 0x8C +XL_DATEMODE = 0x22 +XL_DEFAULTROWHEIGHT = 0x0225 +XL_DEFCOLWIDTH = 0x55 +XL_DIMENSION = 0x200 +XL_DIMENSION2 = 0x0 +XL_EFONT = 0x45 +XL_EOF = 0x0a +XL_EXTERNNAME = 0x23 +XL_EXTERNSHEET = 0x17 +XL_EXTSST = 0xff +XL_FEAT11 = 0x872 +XL_FILEPASS = 0x2f +XL_FONT = 0x31 +XL_FONT_B3B4 = 0x231 +XL_FORMAT = 0x41e +XL_FORMAT2 = 0x1E # BIFF2, BIFF3 +XL_FORMULA = 0x6 +XL_FORMULA3 = 0x206 +XL_FORMULA4 = 0x406 +XL_GCW = 0xab +XL_HLINK = 0x01B8 +XL_QUICKTIP = 0x0800 +XL_HORIZONTALPAGEBREAKS = 0x1b +XL_INDEX = 0x20b +XL_INTEGER = 0x2 # BIFF2 only +XL_IXFE = 0x44 # BIFF2 only +XL_LABEL = 0x204 +XL_LABEL_B2 = 0x04 +XL_LABELRANGES = 0x15f +XL_LABELSST = 0xfd +XL_LEFTMARGIN = 0x26 +XL_TOPMARGIN = 0x28 +XL_RIGHTMARGIN = 0x27 +XL_BOTTOMMARGIN = 0x29 +XL_HEADER = 0x14 +XL_FOOTER = 0x15 +XL_HCENTER = 0x83 +XL_VCENTER = 0x84 +XL_MERGEDCELLS = 0xE5 +XL_MSO_DRAWING = 0x00EC +XL_MSO_DRAWING_GROUP = 0x00EB +XL_MSO_DRAWING_SELECTION = 0x00ED +XL_MULRK = 0xbd +XL_MULBLANK = 0xbe +XL_NAME = 0x18 +XL_NOTE = 0x1c +XL_NUMBER = 0x203 +XL_NUMBER_B2 = 0x3 +XL_OBJ = 0x5D +XL_PAGESETUP = 0xA1 +XL_PALETTE = 0x92 +XL_PANE = 0x41 +XL_PRINTGRIDLINES = 0x2B +XL_PRINTHEADERS = 0x2A +XL_RK = 0x27e +XL_ROW = 0x208 +XL_ROW_B2 = 0x08 +XL_RSTRING = 0xd6 +XL_SCL = 0x00A0 +XL_SHEETHDR = 0x8F # BIFF4W only +XL_SHEETPR = 0x81 +XL_SHEETSOFFSET = 0x8E # BIFF4W only +XL_SHRFMLA = 0x04bc +XL_SST = 0xfc +XL_STANDARDWIDTH = 0x99 +XL_STRING = 0x207 +XL_STRING_B2 = 0x7 +XL_STYLE = 0x293 +XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs +XL_TABLEOP = 0x236 +XL_TABLEOP2 = 0x37 +XL_TABLEOP_B2 = 0x36 +XL_TXO = 0x1b6 +XL_UNCALCED = 0x5e +XL_UNKNOWN = 0xffff +XL_VERTICALPAGEBREAKS = 0x1a +XL_WINDOW2 = 0x023E +XL_WINDOW2_B2 = 0x003E +XL_WRITEACCESS = 0x5C +XL_WSBOOL = XL_SHEETPR +XL_XF = 0xe0 +XL_XF2 = 0x0043 # BIFF2 version of XF record +XL_XF3 = 0x0243 # BIFF3 version of XF record +XL_XF4 = 0x0443 # BIFF4 version of XF record + +boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4} +bofcodes = (0x0809, 0x0409, 0x0209, 0x0009) + +XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206) + +_cell_opcode_list = [ + XL_BOOLERR, + XL_FORMULA, + XL_FORMULA3, + XL_FORMULA4, + XL_LABEL, + XL_LABELSST, + XL_MULRK, + XL_NUMBER, + XL_RK, + XL_RSTRING, + ] +_cell_opcode_dict = {} +for _cell_opcode in _cell_opcode_list: + _cell_opcode_dict[_cell_opcode] = 1 + +def is_cell_opcode(c): + return c in _cell_opcode_dict + +def upkbits(tgt_obj, src, manifest, local_setattr=setattr): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, (src & mask) >> n) + +def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, local_int((src & mask) >> n)) + +def unpack_string(data, pos, encoding, lenlen=1): + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + return unicode(data[pos:pos+nchars], encoding) + +def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None): + if known_len is not None: + # On a NAME record, the length byte is detached from the front of the string. + nchars = known_len + else: + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + newpos = pos + nchars + return (unicode(data[pos:newpos], encoding), newpos) + +def unpack_unicode(data, pos, lenlen=2): + "Return unicode_strg" + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + if not nchars: + # Ambiguous whether 0-length string should have an "options" byte. + # Avoid crash if missing. + return UNICODE_LITERAL("") + pos += lenlen + options = BYTES_ORD(data[pos]) + pos += 1 + # phonetic = options & 0x04 + # richtext = options & 0x08 + if options & 0x08: + # rt = unpack('
WARNING: You don't call this class yourself. You use the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").
+ +class Book(BaseObject): + + ## + # The number of worksheets present in the workbook file. + # This information is available even when no sheets have yet been loaded. + nsheets = 0 + + ## + # Which date system was in force when this file was last saved.Copyright � 2005-2012 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## + +# No part of the content of this file was derived from the works of David Giffin. + +# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer] +# 2007-09-08 SJM Warning message if sector sizes are extremely large. +# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted. +# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms. + +from __future__ import print_function +import sys +from struct import unpack +from .timemachine import * +import array + +## +# Magic cookie that should appear in the first 8 bytes of the file. +SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" + +EOCSID = -2 +FREESID = -1 +SATSID = -3 +MSATSID = -4 +EVILSID = -5 + +class CompDocError(Exception): + pass + +class DirNode(object): + + def __init__(self, DID, dent, DEBUG=0, logfile=sys.stdout): + # dent is the 128-byte directory entry + self.DID = DID + self.logfile = logfile + (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID, + self.root_DID) = \ + unpack('Implements the minimal functionality required +to extract a "Workbook" or "Book" stream (as one big string) +from an OLE2 Compound Document file. +
Copyright © 2005-2012 Stephen John Machin, Lingfo Pty Ltd
+This module is part of the xlrd package, which is released under a BSD-style licence.
+Compound document handler.
+For more information about this class, see The CompDoc Class.
+Magic cookie that should appear in the first 8 bytes of the file.
+Compound document handler.
+Interrogate the compound document's directory; return the stream as a string if found, otherwise +return None.
+Interrogate the compound document's directory. +If the named stream is not found, (None, 0, 0) will be returned. +If the named stream is found and is contiguous within the original byte sequence ("mem") +used when the document was opened, +then (mem, offset_to_start_of_stream, length_of_stream) is returned. +Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
+A Python module for extracting data from MS Excel (TM) spreadsheet files.
+
+Version 0.7.4 -- April 2012
+
+Development of this module would not have been possible without the document +"OpenOffice.org's Documentation of the Microsoft Excel File Format" +("OOo docs" for short). +The latest version is available from OpenOffice.org in + PDF format +and + ODT format. +Small portions of the OOo docs are reproduced in this +document. A study of the OOo docs is recommended for those who wish a +deeper understanding of the Excel file layout than the xlrd docs can provide. +
+ +Backporting to Python 2.1 was partially funded by + + Journyx - provider of timesheet and project accounting solutions. + +
+ +Provision of formatting information in version 0.6.1 was funded by + + Simplistix Ltd. + +
+ +This module presents all text strings as Python unicode objects. +From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode. +Older files (Excel 95 and earlier) don't keep strings in Unicode; +a CODEPAGE record provides a codepage number (for example, 1252) which is +used by xlrd to derive the encoding (for same example: "cp1252") which is +used to translate to Unicode.
+ +If the CODEPAGE record is missing (possible if the file was created +by third-party software), xlrd will assume that the encoding is ascii, and keep going. +If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and +you will need to determine the encoding yourself, and tell xlrd: +
+ book = xlrd.open_workbook(..., encoding_override="cp1252") ++
If the CODEPAGE record exists but is wrong (for example, the codepage +number is 1251, but the strings are actually encoded in koi8_r), +it can be overridden using the same mechanism. +The supplied runxlrd.py has a corresponding command-line argument, which +may be used for experimentation: +
+ runxlrd.py -e koi8_r 3rows myfile.xls ++
The first place to look for an encoding ("codec name") is + +the Python documentation. +
+ + +In reality, there are no such things. What you have are floating point +numbers and pious hope. +There are several problems with Excel dates:
+ +(1) Dates are not stored as a separate data type; they are stored as +floating point numbers and you have to rely on +(a) the "number format" applied to them in Excel and/or +(b) knowing which cells are supposed to have dates in them. +This module helps with (a) by inspecting the +format that has been applied to each number cell; +if it appears to be a date format, the cell +is classified as a date rather than a number. Feedback on this feature, +especially from non-English-speaking locales, would be appreciated.
+ +(2) Excel for Windows stores dates by default as the number of +days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +Macintosh uses a default start date of 1904-01-01T00:00:00. The date +system can be changed in Excel on a per-workbook basis (for example: +Tools -> Options -> Calculation, tick the "1904 date system" box). +This is of course a bad idea if there are already dates in the +workbook. There is no good reason to change it even if there are no +dates in the workbook. Which date system is in use is recorded in the +workbook. A workbook transported from Windows to Macintosh (or vice +versa) will work correctly with the host Excel. When using this +module's xldate_as_tuple function to convert numbers from a workbook, +you must use the datemode attribute of the Book object. If you guess, +or make a judgement depending on where you believe the workbook was +created, you run the risk of being 1462 days out of kilter.
+ +Reference: +http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162
+ + +(3) The Excel implementation of the Windows-default 1900-based date system works on the +incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.
+ +Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326
+ +(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +independent of the calendar, +or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +The xldate_* functions in this module +take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +date systems. This is consistent with more recent Microsoft documentation +(for example, the help file for Excel 2002 which says that the first day +in the 1904 date system is 1904-01-02). + +
(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +means that DATE(1899, 12, 31) is interpreted as 3799-12-31.
+ +For further information, please refer to the documentation for the xldate_* functions.
+ ++A name is used to refer to a cell, a group of cells, a constant +value, a formula, or a macro. Usually the scope of a name is global +across the whole workbook. However it can be local to a worksheet. +For example, if the sales figures are in different cells in +different sheets, the user may define the name "Sales" in each +sheet. There are built-in names, like "Print_Area" and +"Print_Titles"; these two are naturally local to a sheet. +
+To inspect the names with a user interface like MS Excel, OOo Calc, +or Gnumeric, click on Insert/Names/Define. This will show the global +names, plus those local to the currently selected sheet. +
+A Book object provides two dictionaries (name_map and +name_and_scope_map) and a list (name_obj_list) which allow various +ways of accessing the Name objects. There is one Name object for +each NAME record found in the workbook. Name objects have many +attributes, several of which are relevant only when obj.macro is 1. +
+In the examples directory you will find namesdemo.xls which +showcases the many different ways that names can be used, and +xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +the names in your files, and shows how to extract whatever a name is +referring to. There is currently one "convenience method", +Name.cell(), which extracts the value in the case where the name +refers to a single cell. More convenience methods are planned. The +source code for Name.cell (in __init__.py) is an extra source of +information on how the Name attributes hang together. +
+ +Name information is not extracted from files older than +Excel 5.0 (Book.biff_version < 50)
+ +This collection of features, new in xlrd version 0.6.1, is intended +to provide the information needed to (1) display/render spreadsheet contents +(say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +file without losing the ability to display/render it.
+ +A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +with each component in range(256). However it is not possible to access an +unlimited number of colours; each spreadsheet is limited to a palette of 64 different +colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +("colour index") into this palette. + +Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +yellow, magenta, and cyan.
+
+The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+of 7 rows of 8 colours. The last two rows are reserved for use in charts.
+The correspondence between this grid and the assigned
+colour indexes is NOT left-to-right top-to-bottom.
+Indexes 8 to 15 correspond to changeable
+parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+index 15 starts off being cyan but can be changed by the user.
+
+The default colour for each index depends on the file version; tables of the defaults
+are available in the source code. If the user changes one or more colours,
+a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+indexes.
+Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+to colour index 7; "[COLOR16]...." will produce cyan
+unless the user changes colour index 15 to something else.
+
+
In addition, there are several "magic" colour indexes used by Excel:
+0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+(used in XF, CF, and WINDOW2 records)
+0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+(used in XF and CF records )
+0x43: System face colour (dialogue background colour)
+0x4D: System window text colour for chart border lines
+0x4E: System window background colour for chart areas
+0x4F: Automatic colour for chart border lines (seems to be always Black)
+0x50: System ToolTip background colour (used in note objects)
+0x51: System ToolTip text colour (used in note objects)
+0x7FFF: System window text colour for fonts (used in FONT and CF records)
+Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+records.
+
+
This feature, new in version 0.7.1, is governed by the on_demand argument +to the open_workbook() function and allows saving memory and time by loading +only those sheets that the caller is interested in, and releasing sheets +when no longer required.
+ +on_demand=False (default): No change. open_workbook() loads global data +and all sheets, releases resources no longer required (principally the +str or mmap object containing the Workbook stream), and returns.
+ +on_demand=True and BIFF version < 5.0: A warning message is emitted, +on_demand is recorded as False, and the old process is followed.
+ +on_demand=True and BIFF version >= 5.0: open_workbook() loads global +data and returns without releasing resources. At this stage, the only +information available about sheets is Book.nsheets and Book.sheet_names().
+ +Book.sheet_by_name() and Book.sheet_by_index() will load the requested +sheet if it is not already loaded.
+ +Book.sheets() will load all/any unloaded sheets.
+ +The caller may save memory by calling +Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +This applies irrespective of the state of on_demand.
+ +The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() + -- except if those required resources have been released (which will +have happened automatically when on_demand is false). This is the only +case where an exception will be raised.
+ +The caller may query the state of a sheet: +Book.sheet_loaded(sheet_name_or_index) -> a bool
+ +Book.release_resources() may used to save memory and close +any memory-mapped file before proceding to examine already-loaded +sheets. Once resources are released, no further sheets can be loaded.
+ +When using on-demand, it is advisable to ensure that +Book.release_resources() is always called even if an exception +is raised in your own code; otherwise if the input file has been +memory-mapped, the mmap.mmap object will not be closed and you will +not be able to access the physical file until your Python process +terminates. This can be done by calling Book.release_resources() +explicitly in the finally suite of a try/finally block. +New in xlrd 0.7.2: the Book object is a "context manager", so if +using Python 2.5 or later, you can wrap your code in a "with" +statement.
+Parent of almost all other classes in the package.
+For more information about this class, see The BaseObject Class.
+Contents of a "workbook".
+For more information about this class, see The Book Class.
+Contains the data for one cell.
+For more information about this class, see The Cell Class.
+Utility function: (5, 7) => 'H6'
+Utility function: (5, 7) => '$H$6'
+Width and default formatting information that applies to one or +more columns in a sheet.
+For more information about this class, see The Colinfo Class.
+Utility function: 7 => 'H', 27 => 'AB'
+For debugging and analysis: summarise the file's BIFF records. +I.e. produce a sorted file of (record_name, count).
+For debugging: dump the file's BIFF records in char & hex. +
There is one and only one instance of an empty cell -- it's a singleton. This is it. +You may use a test like "acell is empty_cell".
+This mixin class exists solely so that Format, Font, and XF....
+For more information about this class, see The EqNeAttrs Class.
+This dictionary can be used to produce a text version of the internal codes +that Excel uses for error cells. Here are its contents: +
+0x00: '#NULL!', # Intersection of two cell ranges is empty +0x07: '#DIV/0!', # Division by zero +0x0F: '#VALUE!', # Wrong type of operand +0x17: '#REF!', # Illegal or deleted cell reference +0x1D: '#NAME?', # Wrong function or range name +0x24: '#NUM!', # Value range overflow +0x2A: '#N/A', # Argument or function not available ++
An Excel "font" contains the details of not only what is normally +considered a font, but also several other display attributes.
+For more information about this class, see The Font Class.
+"Number format" information from a FORMAT record.
+For more information about this class, see The Format Class.
+Contains the attributes of a hyperlink.
+For more information about this class, see The Hyperlink Class.
+Information relating to a named reference, formula, macro, etc.
+For more information about this class, see The Name Class.
+Represents a user "comment" or "note".
+For more information about this class, see The Note Class.
+Open a spreadsheet file for data extraction.
+Used in evaluating formulas.
+For more information about this class, see The Operand Class.
+Utility function:
+
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20'
+
Utility function:
+
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))
+R1C1 mode => 'Sheet1!R[-32]C[-13]:R[-23]C[12]'
+A1 mode => depends on base cell (browx, bcolx)
+
Represents an absolute or relative 3-dimensional reference to a box +of one or more cells.
+For more information about this class, see The Ref3D Class.
+Height and default formatting information that applies to a row in a sheet.
+For more information about this class, see The Rowinfo Class.
+Contains the data for one worksheet.
+For more information about this class, see The Sheet Class.
+eXtended Formatting information for cells, rows, columns and styles.
+For more information about this class, see The XF Class.
+A collection of the alignment and similar attributes of an XF record.
+For more information about this class, see The XFAlignment Class.
+A collection of the background-related attributes of an XF record.
+For more information about this class, see The XFBackground Class.
+A collection of the border-related attributes of an XF record.
+For more information about this class, see The XFBorder Class.
+A collection of the protection-related attributes of an XF record.
+For more information about this class, see The XFProtection Class.
+Convert an Excel number (presumed to represent a date, a datetime or a time) into +a tuple suitable for feeding to datetime or mx.DateTime constructors.
+Convert a date tuple (year, month, day) to an Excel date.
+Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value. +For more details, refer to other xldate_from_*_tuple functions.
+Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
+Parent of almost all other classes in the package. Defines a common "dump" method +for debugging.
+Contents of a "workbook". +
WARNING: You don't call this class yourself. You use the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").
+Version of BIFF (Binary Interchange File Format) used to create the file. +Latest is 8.0 (represented here as 80), introduced with Excel 97. +Earliest supported by this module: 2.0 (represented as 20).
+An integer denoting the character set used for strings in this file. +For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE. +For earlier versions, this is used to derive the appropriate Python encoding +to be used to convert to Unicode. +Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman'
+This provides definitions for colour indexes. Please refer to the
+above section "The Palette; Colour Indexes" for an explanation
+of how colours are represented in Excel.
+Colour indexes into the palette map into (red, green, blue) tuples.
+"Magic" indexes e.g. 0x7FFF map to None.
+colour_map is what you need if you want to render cells on screen or in a PDF
+file. If you are writing an output XLS file, use palette_record.
+
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+
A tuple containing the (telephone system) country code for:
+ [0]: the user-interface setting when the file was created.
+ [1]: the regional settings.
+Example: (1, 61) meaning (USA, Australia).
+This information may give a clue to the correct encoding for an unknown codepage.
+For a long list of observed values, refer to the OpenOffice.org documentation for
+the COUNTRY record.
+
Which date system was in force when this file was last saved.
+ 0 => 1900 system (the Excel for Windows default).
+ 1 => 1904 system (the Excel for Macintosh default).
+
The encoding that was derived from the codepage.
+A list of Font class instances, each corresponding to a FONT record.
+
-- New in version 0.6.1
+
A list of Format objects, each corresponding to a FORMAT record, in
+the order that they appear in the input file.
+It does not contain builtin formats.
+If you are creating an output file using (for example) pyExcelerator,
+use this list.
+The collection to be used for all visual rendering purposes is format_map.
+
-- New in version 0.6.1
+
The mapping from XF.format_key to Format object.
+
-- New in version 0.6.1
+
Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent).
+Time in seconds to parse the data from the contiguous string (or mmap equivalent).
+A mapping from (lower_case_name, scope) to a single Name object.
+
-- New in version 0.6.0
+
A mapping from lower_case_name to a list of Name objects. The list is
+sorted in scope order. Typically there will be one item (of global scope)
+in the list.
+
-- New in version 0.6.0
+
List containing a Name object for each NAME record in the workbook.
+
-- New in version 0.6.0
+
The number of worksheets present in the workbook file. +This information is available even when no sheets have yet been loaded.
+If the user has changed any of the colours in the standard palette, the XLS
+file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier)
+RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)].
+Otherwise this list will be empty. This is what you need if you are
+writing an output XLS file. If you want to render cells on screen or in a PDF
+file, use colour_map.
+
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+
This method has a dual purpose. You can call it to release +memory-consuming objects and (possibly) a memory-mapped file +(mmap.mmap object) when you have finished loading sheets in +on_demand mode, but still require the Book object to examine the +loaded sheets. It is also called automatically (a) when open_workbook +raises an exception and (b) if you are using a "with" statement, when +the "with" block is exited. Calling this method multiple times on the +same object has no ill effect.
+This provides access via name to the extended format information for
+both built-in styles and user-defined styles.
+It maps name to (built_in, xf_index), where:
+name is either the name of a user-defined style,
+or the name of one of the built-in styles. Known built-in names are
+Normal, RowLevel_1 to RowLevel_7,
+ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]",
+"Currency [0]", Hyperlink, and "Followed Hyperlink".
+built_in 1 = built-in style, 0 = user-defined
+xf_index is an index into Book.xf_list.
+References: OOo docs s6.99 (STYLE record); Excel UI Format/Style
+
-- New in version 0.6.1; since 0.7.4, extracted only if
+open_workbook(..., formatting_info=True)
+
What (if anything) is recorded as the name of the last user to save the file.
+A list of XF class instances, each corresponding to an XF record.
+
-- New in version 0.6.1
+
Contains the data for one cell.
+ +WARNING: You don't call this class yourself. You access Cell objects +via methods of the Sheet object(s) that you found in the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").
+Cell objects have three attributes: ctype is an int, value +(which depends on ctype) and xf_index. +If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. +The following table describes the types of cells and how their values +are represented in Python.
+ +Type symbol | +Type number | +Python value | +
---|---|---|
XL_CELL_EMPTY | +0 | +empty string u'' | +
XL_CELL_TEXT | +1 | +a Unicode string | +
XL_CELL_NUMBER | +2 | +float | +
XL_CELL_DATE | +3 | +float | +
XL_CELL_BOOLEAN | +4 | +int; 1 means TRUE, 0 means FALSE | +
XL_CELL_ERROR | +5 | +int representing internal Excel codes; for a text representation, +refer to the supplied dictionary error_text_from_code | +
XL_CELL_BLANK | +6 | +empty string u''. Note: this type will appear only when +open_workbook(..., formatting_info=True) is used. | +
Width and default formatting information that applies to one or +more columns in a sheet. Derived from COLINFO records. + +
Here is the default hierarchy for width, according to the OOo docs:
+
+
"""In BIFF3, if a COLINFO record is missing for a column,
+the width specified in the record DEFCOLWIDTH is used instead.
+
+
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
+if the corresponding bit for this column is cleared in the GCW
+record, otherwise the column width set in the DEFCOLWIDTH record
+is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
+
+
In BIFF8, if a COLINFO record is missing for a column,
+the width specified in the record STANDARDWIDTH is used.
+If this [STANDARDWIDTH] record is also missing,
+the column width of the record DEFCOLWIDTH is used instead."""
+
+
+Footnote: The docs on the GCW record say this:
+"""
+If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
+record. If a bit is cleared, the corresponding column uses the width set in the
+COLINFO record for this column.
+
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
+the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
+record of the worksheet will be used instead.
+
"""
+At the moment (2007-01-17) xlrd is going with the GCW version of the story.
+Reference to the source may be useful: see the computed_column_width(colx) method
+of the Sheet class.
+
-- New in version 0.6.1
+
Value of a 1-bit flag whose purpose is unknown +but is often seen set to 1
+1 = column is collapsed
+1 = column is hidden
+Outline level of the column, in range(7). +(0 = no outline)
+Width of the column in 1/256 of the width of the zero character, +using default font (first FONT record in the file).
+XF index to be used for formatting empty cells.
+This mixin class exists solely so that Format, Font, and XF.... objects +can be compared by value of their attributes.
+An Excel "font" contains the details of not only what is normally
+considered a font, but also several other display attributes.
+Items correspond to those in the Excel UI's Format/Cells/Font tab.
+
-- New in version 0.6.1
+
1 = Characters are bold. Redundant; see "weight" attribute.
+Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol, +77 = Apple Roman, +128 = ANSI Japanese Shift-JIS, +129 = ANSI Korean (Hangul), +130 = ANSI Korean (Johab), +134 = ANSI Chinese Simplified GBK, +136 = ANSI Chinese Traditional BIG5, +161 = ANSI Greek, +162 = ANSI Turkish, +163 = ANSI Vietnamese, +177 = ANSI Hebrew, +178 = ANSI Arabic, +186 = ANSI Baltic, +204 = ANSI Cyrillic, +222 = ANSI Thai, +238 = ANSI Latin II (Central European), +255 = OEM Latin I
+An explanation of "colour index" is given in the Formatting +section at the start of this document.
+1 = Superscript, 2 = Subscript.
+0 = None (unknown or don't care)
+1 = Roman (variable width, serifed)
+2 = Swiss (variable width, sans-serifed)
+3 = Modern (fixed width, serifed or sans-serifed)
+4 = Script (cursive)
+5 = Decorative (specialised, for example Old English, Fraktur)
+
The 0-based index used to refer to this Font() instance. +Note that index 4 is never used; xlrd supplies a dummy place-holder.
+Height of the font (in twips). A twip = 1/20 of a point.
+1 = Characters are italic.
+The name of the font. Example: u"Arial"
+1 = Font is outline style (Macintosh only)
+1 = Font is shadow style (Macintosh only)
+1 = Characters are struck out.
+0 = None
+1 = Single; 0x21 (33) = Single accounting
+2 = Double; 0x22 (34) = Double accounting
+
1 = Characters are underlined. Redundant; see "underline_type" attribute.
+Font weight (100-1000). Standard values are 400 for normal text +and 700 for bold text.
+"Number format" information from a FORMAT record.
+
-- New in version 0.6.1
+
The key into Book.format_map
+The format string
+A classification that has been inferred from the format string.
+Currently, this is used only to distinguish between numbers and dates.
+
Values:
+
FUN = 0 # unknown
+
FDT = 1 # date
+
FNU = 2 # number
+
FGE = 3 # general
+
FTX = 4 # text
+
Contains the attributes of a hyperlink.
+Hyperlink objects are accessible through Sheet.hyperlink_list
+and Sheet.hyperlink_map.
+
-- New in version 0.7.2
+
Description ... this is displayed in the cell, +and should be identical to the cell value. Unicode string, or None. It seems +impossible NOT to have a description created by the Excel UI.
+Index of first column
+Index of first row
+Index of last column
+Index of last row
+The text of the "quick tip" displayed when the cursor +hovers over the hyperlink.
+Target frame. Unicode string. Note: I have not seen a case of this. +It seems impossible to create one in the Excel UI.
+"Textmark": the piece after the "#" in +"http://docs.python.org/library#struct_module", or the Sheet1!A1:Z99 +part when type is "workbook".
+Type of hyperlink. Unicode string, one of 'url', 'unc', +'local file', 'workbook', 'unknown'
+The URL or file-path, depending in the type. Unicode string, except +in the rare case of a local but non-existent file with non-ASCII +characters in the name, in which case only the "8.3" filename is available, +as a bytes (3.x) or str (2.x) string, with unknown encoding. +
Information relating to a named reference, formula, macro, etc.
+
-- New in version 0.6.0
+
-- Name information is not extracted from files older than
+Excel 5.0 (Book.biff_version < 50)
+
This is a convenience method for the use case where the name +refers to one rectangular area in one worksheet.
+0 = Formula definition; 1 = Binary data
No examples have been sighted.
+
0 = User-defined name; 1 = Built-in name +(common examples: Print_Area, Print_Titles; see OOo docs for full list)
+This is a convenience method for the frequent use case where the name +refers to a single cell.
+0 = Simple formula; 1 = Complex formula (array formula or user defined)
+No examples have been sighted.
+
0 = Command macro; 1 = Function macro. Relevant only if macro == 1
+Function group. Relevant only if macro == 1; see OOo docs for values.
+0 = Visible; 1 = Hidden
+0 = Standard name; 1 = Macro name
+A Unicode string. If builtin, decoded as per OOo docs.
+The index of this object in book.name_obj_list
+An 8-bit string.
+The result of evaluating the formula, if any. +If no formula, or evaluation of the formula encountered problems, +the result is None. Otherwise the result is a single instance of the +Operand class.
+-1: The name is global (visible in all calculation sheets).
+-2: The name belongs to a macro sheet or VBA sheet.
+-3: The name is invalid.
+0 <= scope < book.nsheets: The name is local to the sheet whose index is scope.
+
0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1
+ Represents a user "comment" or "note".
+Note objects are accessible through Sheet.cell_note_map.
+
-- New in version 0.7.2
+
Author of note
+True if the containing column is hidden
+Column index
+List of (offset_in_string, font_index) tuples. +Unlike Sheet.rich_text_runlist_map, the first offset should always be 0. +
True if the containing row is hidden
+Row index
+True if note is always shown
+Text of the note
+Used in evaluating formulas. +The following table describes the kinds and how their values +are represented.
+ +Kind symbol | +Kind number | +Value representation | +
---|---|---|
oBOOL | +3 | +integer: 0 => False; 1 => True | +
oERR | +4 | +None, or an int error code (same as XL_CELL_ERROR in the Cell class). + | +
oMSNG | +5 | +Used by Excel as a placeholder for a missing (not supplied) function +argument. Should *not* appear as a final formula result. Value is None. | +
oNUM | +2 | +A float. Note that there is no way of distinguishing dates. | +
oREF | +-1 | +The value is either None or a non-empty list of
+absolute Ref3D instances. + |
+
oREL | +-2 | +The value is None or a non-empty list of +fully or partially relative Ref3D instances. + | +
oSTRG | +1 | +A Unicode string. | +
oUNK | +0 | +The kind is unknown or ambiguous. The value is None | +
oUNK means that the kind of operand is not known unambiguously.
+The reconstituted text of the original formula. Function names will be +in English irrespective of the original language, which doesn't seem +to be recorded anywhere. The separator is ",", not ";" or whatever else +might be more appropriate for the end-user's locale; patches welcome.
+None means that the actual value of the operand is a variable +(depends on cell data), not a constant.
+Represents an absolute or relative 3-dimensional reference to a box
+of one or more cells.
+-- New in version 0.6.0
+
The coords attribute is a tuple of the form:
+(shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
+where 0 <= thingxlo <= thingx < thingxhi.
+Note that it is quite possible to have thingx > nthings; for example
+Print_Titles could have colxhi == 256 and/or rowxhi == 65536
+irrespective of how many columns/rows are actually used in the worksheet.
+The caller will need to decide how to handle this situation.
+Keyword: IndexError :-)
+
The components of the coords attribute are also available as individual +attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.
+ +The relflags attribute is a 6-tuple of flags which indicate whether
+the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).
+Note that there is necessarily no information available as to what cell(s)
+the reference could possibly be relative to. The caller must decide what if
+any use to make of oREL operands. Note also that a partially relative
+reference may well be a typo.
+For example, define name A1Z10 as $a$1:$z10 (missing $ after z)
+while the cursor is on cell Sheet3!A27.
+The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26)
+and relflags = (0, 0, 0, 1, 0, 0).
+So far, only one possibility of a sheet-relative component in
+a reference has been noticed: a 2D reference located in the "current sheet".
+
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...).
+
Height and default formatting information that applies to a row in a sheet.
+Derived from ROW records.
+
-- New in version 0.6.1
height: Height of the row, in twips. One twip == 1/20 of a point.
+ +has_default_height: 0 = Row has custom height; 1 = Row has default height.
+ +outline_level: Outline level of the row (0 to 7)
+ +outline_group_starts_ends: 1 = Outline group starts or ends here (depending on where the +outline buttons are located, see WSBOOL record [TODO ??]), +and is collapsed
+ +hidden: 1 = Row is hidden (manually, or by a filter or outline group)
+ +height_mismatch: 1 = Row height and default font height do not match
+ +has_default_xf_index: 1 = the xf_index attribute is usable; 0 = ignore it
+ +xf_index: Index to default XF record for empty cells in this row. +Don't use this if has_default_xf_index == 0.
+ +additional_space_above: This flag is set, if the upper border of at least one cell in this row +or if the lower border of at least one cell in the row above is +formatted with a thick line style. Thin and medium line styles are not +taken into account.
+ +additional_space_below: This flag is set, if the lower border of at least one cell in this row +or if the upper border of at least one cell in the row below is +formatted with a medium or thick line style. Thin line styles are not +taken into account.
+Contains the data for one worksheet.
+ +In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a +column index, counting from zero. +Negative values for row/column indexes and slice positions are supported in the expected fashion.
+ +For information about cell types and cell values, refer to the documentation of the Cell class.
+ +WARNING: You don't call this class yourself. You access Sheet objects via the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").
+A reference to the Book object to which this sheet belongs. +Example usage: some_sheet.book.datemode
+Cell object in the given row and column. +
A sparse mapping from (rowx, colx) to a Note object.
+Cells not containing a note ("comment") are not mapped.
+
-- New in version 0.7.2
Type of the cell in the given row and column. +Refer to the documentation of the Cell class. +
Value of the cell in the given row and column.
+XF index of the cell in the given row and column.
+This is an index into Book.xf_list.
+
-- New in version 0.6.1
+
Returns a sequence of the Cell objects in the given column. +
List of address ranges of cells containing column labels.
+These are set up in Excel by Insert > Name > Labels > Columns.
+
-- New in version 0.6.0
+
How to deconstruct the list:
+
+for crange in thesheet.col_label_ranges: + rlo, rhi, clo, chi = crange + for rx in xrange(rlo, rhi): + for cx in xrange(clo, chi): + print "Column label at (rowx=%d, colx=%d) is %r" \ + (rx, cx, thesheet.cell_value(rx, cx)) ++
Returns a slice of the Cell objects in the given column. +
Returns a slice of the types of the cells in the given column.
+Returns a slice of the values of the cells in the given column.
+The map from a column index to a Colinfo object. Often there is an entry
+in COLINFO records for all column indexes in range(257).
+Note that xlrd ignores the entry for the non-existent
+257th column. On the other hand, there may be no entry for unused columns.
+
-- New in version 0.6.1. Populated only if open_workbook(formatting_info=True).
+
Determine column display width.
+
-- New in version 0.6.1
+
+
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default column width from DEFCOLWIDTH record, else None.
+From the OOo docs:
+"""Column width in characters, using the width of the zero character
+from default font (first FONT record in the file). Excel adds some
+extra space to the default width, depending on the default font and
+default font size. The algorithm how to exactly calculate the resulting
+column width is not known.
+Example: The default width of 8 set in this record results in a column
+width of 8.43 using Arial font with a size of 10 points."""
+For the default hierarchy, refer to the Colinfo class.
+
-- New in version 0.6.1
+
A 256-element tuple corresponding to the contents of the GCW record for this sheet. +If no such record, treat as all bits zero. +Applies to BIFF4-7 only. See docs of the Colinfo class for discussion. +
Boolean specifying if a PANE record was present, ignore unless you're xlutils.copy
+A list of the horizontal page breaks in this sheet.
+Breaks are tuples in the form (index of row after break, start col index, end col index).
+Populated only if open_workbook(formatting_info=True).
+
-- New in version 0.7.2
+
Index of first visible row in bottom frozen/split pane
+Number of rows in top pane (frozen panes; for split panes, see comments below in code)
+A list of Hyperlink objects corresponding to HLINK records found
+in the worksheet.
-- New in version 0.7.2
A sparse mapping from (rowx, colx) to an item in hyperlink_list.
+Cells not covered by a hyperlink are not mapped.
+It is possible using the Excel UI to set up a hyperlink that
+covers a larger-than-1x1 rectangle of cells.
+Hyperlink rectangles may overlap (Excel doesn't check).
+When a multiply-covered cell is clicked on, the hyperlink that is activated
+(and the one that is mapped here) is the last in hyperlink_list.
+
-- New in version 0.7.2
List of address ranges of cells which have been merged.
+These are set up in Excel by Format > Cells > Alignment, then ticking
+the "Merge cells" box.
+
-- New in version 0.6.1. Extracted only if open_workbook(formatting_info=True).
+
How to deconstruct the list:
+
+for crange in thesheet.merged_cells: + rlo, rhi, clo, chi = crange + for rowx in xrange(rlo, rhi): + for colx in xrange(clo, chi): + # cell (rlo, clo) (the top left one) will carry the data + # and formatting info; the remainder will be recorded as + # blank cells, but a renderer will apply the formatting info + # for the top left cell (e.g. border, pattern) to all cells in + # the range. ++
Name of sheet.
+Nominal number of columns in sheet. It is 1 + the maximum column index +found, ignoring trailing empty cells. See also open_workbook(ragged_rows=?) +and Sheet.row_len(row_index). +
Number of rows in sheet. A row index is in range(thesheet.nrows).
+Mapping of (rowx, colx) to list of (offset, font_index) tuples. The offset
+defines where in the string the font begins to be used.
+Offsets are expected to be in ascending order.
+If the first offset is not zero, the meaning is that the cell's XF's font should
+be used from offset 0.
+
This is a sparse mapping. There is no entry for cells that are not formatted with
+rich text.
+
How to use:
+
+runlist = thesheet.rich_text_runlist_map.get((rowx, colx)) +if runlist: + for offset, font_index in runlist: + # do work here. + pass ++Populated only if open_workbook(formatting_info=True). +
Returns a sequence of the Cell objects in the given row. +
List of address ranges of cells containing row labels.
+For more details, see col_label_ranges above.
+
-- New in version 0.6.0
+
Returns the effective number of cells in the given row. For use with
+open_workbook(ragged_rows=True) which is likely to produce rows
+with fewer than ncols cells.
+
-- New in version 0.7.2
+
Returns a slice of the Cell objects in the given row. +
Returns a slice of the types +of the cells in the given row.
+Returns a slice of the values +of the cells in the given row.
+The map from a row index to a Rowinfo object. Note that it is possible
+to have missing entries -- at least one source of XLS files doesn't
+bother writing ROW records.
+
-- New in version 0.6.1. Populated only if open_workbook(formatting_info=True).
+
Frozen panes: ignore it. Split panes: explanation and diagrams in OOo docs.
+Default column width from STANDARDWIDTH record, else None.
+From the OOo docs:
+"""Default width of the columns in 1/256 of the width of the zero
+character, using default font (first FONT record in the file)."""
+For the default hierarchy, refer to the Colinfo class.
+
-- New in version 0.6.1
+
Index of first visible column in right frozen/split pane
+Number of columns in left pane (frozen panes; for split panes, see comments below in code)
+A list of the vertical page breaks in this sheet.
+Breaks are tuples in the form (index of col after break, start row index, end row index).
+Populated only if open_workbook(formatting_info=True).
+
-- New in version 0.7.2
+
Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden +by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden +only by VBA macro).
+eXtended Formatting information for cells, rows, columns and styles.
+
-- New in version 0.6.1
+
+
Each of the 6 flags below describes the validity of
+a specific group of attributes.
+
+In cell XFs, flag==0 means the attributes of the parent style XF are used,
+(but only if the attributes are valid there); flag==1 means the attributes
+of this XF are used.
+In style XFs, flag==0 means the attribute setting is valid; flag==1 means
+the attribute should be ignored.
+Note that the API
+provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs
+have had the above inheritance mechanism applied.
+
+
An instance of an XFAlignment object.
+An instance of an XFBackground object.
+An instance of an XFBorder object.
+Index into Book.font_list
+Key into Book.format_map +
+Warning: OOo docs on the XF record call this "Index to FORMAT record". +It is not an index in the Python sense. It is a key to a map. +It is true only for Excel 4.0 and earlier files +that the key into format_map from an XF instance +is the same as the index into format_list, and only +if the index is less than 164. +
+0 = cell XF, 1 = style XF
+cell XF: Index into Book.xf_list
+of this XF's style XF
+style XF: 0xFFF
+
An instance of an XFProtection object.
+Index into Book.xf_list
+A collection of the alignment and similar attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Alignment tab.
+
-- New in version 0.6.1
+
Values: section 6.115 (p 214) of OOo docs
+A number in range(15).
+Values: section 6.115 (p 215) of OOo docs.
+Note: file versions BIFF7 and earlier use the documented
+"orientation" attribute; this will be mapped (without loss)
+into "rotation".
+
1 = shrink font size to fit text into cell.
+0 = according to context; 1 = left-to-right; 2 = right-to-left
+1 = text is wrapped at right margin
+Values: section 6.115 (p 215) of OOo docs
+A collection of the background-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Patterns tab.
+An explanation of "colour index" is given in the Formatting
+section at the start of this document.
+
-- New in version 0.6.1
+
See section 3.11 of the OOo docs.
+See section 3.11 of the OOo docs.
+See section 3.11 of the OOo docs.
+A collection of the border-related attributes of an XF record. +Items correspond to those in the Excel UI's Format/Cells/Border tab.
+An explanations of "colour index" is given in the Formatting +section at the start of this document. +There are five line style attributes; possible values and the +associated meanings are: +0 = No line, +1 = Thin, +2 = Medium, +3 = Dashed, +4 = Dotted, +5 = Thick, +6 = Double, +7 = Hair, +8 = Medium dashed, +9 = Thin dash-dotted, +10 = Medium dash-dotted, +11 = Thin dash-dot-dotted, +12 = Medium dash-dot-dotted, +13 = Slanted medium dash-dotted. +The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only. +For pictures of the line styles, refer to OOo docs s3.10 (p22) +"Line Styles for Cell Borders (BIFF3-BIFF8)".
+The colour index for the cell's bottom line
+The line style for the cell's bottom line
+The colour index for the cell's diagonal lines, if any
+1 = draw a diagonal from top left to bottom right
+The line style for the cell's diagonal lines, if any
+1 = draw a diagonal from bottom left to top right
+The colour index for the cell's left line
+The line style for the cell's left line
+The colour index for the cell's right line
+The line style for the cell's right line
+The colour index for the cell's top line
+The line style for the cell's top line
+A collection of the protection-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Protection tab.
+Note the OOo docs include the "cell or style" bit
+in this bundle of attributes.
+This is incorrect; the bit is used in determining which bundles to use.
+
-- New in version 0.6.1
+
1 = Cell is prevented from being changed, moved, resized, or deleted +(only if the sheet is protected).
+1 = Hide formula so that it doesn't appear in the formula bar when +the cell is selected (only if the sheet is protected).
+Copyright © 2006 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## +from __future__ import print_function + +import xlrd +from xlrd.timemachine import REPR +import sys +import glob + +def scope_as_string(book, scope): + if 0 <= scope < book.nsheets: + return "sheet #%d (%r)" % (scope, REPR(book.sheet_names()[scope])) + if scope == -1: + return "Global" + if scope == -2: + return "Macro/VBA" + return "Unknown scope value (%r)" % REPR(scope) + +def do_scope_query(book, scope_strg, show_contents=0, f=sys.stdout): + try: + qscope = int(scope_strg) + except ValueError: + if scope_strg == "*": + qscope = None # means "all' + else: + # so assume it's a sheet name ... + qscope = book.sheet_names().index(scope_strg) + print("%r => %d" % (scope_strg, qscope), file=f) + for nobj in book.name_obj_list: + if qscope is None or nobj.scope == qscope: + show_name_object(book, nobj, show_contents, f) + +def show_name_details(book, name, show_contents=0, f=sys.stdout): + """ + book -- Book object obtained from xlrd.open_workbook(). + name -- The name that's being investigated. + show_contents -- 0: Don't; 1: Non-empty cells only; 2: All cells + f -- Open output file handle. + """ + name_lcase = name.lower() # Excel names are case-insensitive. + nobj_list = book.name_map.get(name_lcase) + if not nobj_list: + print("%r: unknown name" % name, file=f) + return + for nobj in nobj_list: + show_name_object(book, nobj, show_contents, f) + +def show_name_details_in_scope( + book, name, scope_strg, show_contents=0, f=sys.stdout, + ): + try: + scope = int(scope_strg) + except ValueError: + # so assume it's a sheet name ... + scope = book.sheet_names().index(scope_strg) + print("%r => %d" % (scope_strg, scope), file=f) + name_lcase = name.lower() # Excel names are case-insensitive. + while 1: + nobj = book.name_and_scope_map.get((name_lcase, scope)) + if nobj: + break + print("Name %s not found in scope %d" % (REPR(name), scope), file=f) + if scope == -1: + return + scope = -1 # Try again with global scope + print("Name %s found in scope %d" % (REPR(name), scope), file=f) + show_name_object(book, nobj, show_contents, f) + +def showable_cell_value(celltype, cellvalue, datemode): + if celltype == xlrd.XL_CELL_DATE: + try: + showval = xlrd.xldate_as_tuple(cellvalue, datemode) + except xlrd.XLDateError as e: + showval = "%s:%s" % (type(e).__name__, e) + elif celltype == xlrd.XL_CELL_ERROR: + showval = xlrd.error_text_from_code.get( + cellvalue, 'Copyright © 2005-2012 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under +# a BSD-style licence.
+## + +# No part of the content of this file was derived from the works of David Giffin. + +from __future__ import print_function + +DEBUG = 0 +import re +from struct import unpack +from .timemachine import * +from .biffh import BaseObject, unpack_unicode, unpack_string, \ + upkbits, upkbitsL, fprintf, \ + FUN, FDT, FNU, FGE, FTX, XL_CELL_NUMBER, XL_CELL_DATE, \ + XL_FORMAT, XL_FORMAT2, \ + XLRDError + +_cellty_from_fmtty = { + FNU: XL_CELL_NUMBER, + FUN: XL_CELL_NUMBER, + FGE: XL_CELL_NUMBER, + FDT: XL_CELL_DATE, + FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. + } + +excel_default_palette_b5 = ( + ( 0, 0, 0), (255, 255, 255), (255, 0, 0), ( 0, 255, 0), + ( 0, 0, 255), (255, 255, 0), (255, 0, 255), ( 0, 255, 255), + (128, 0, 0), ( 0, 128, 0), ( 0, 0, 128), (128, 128, 0), + (128, 0, 128), ( 0, 128, 128), (192, 192, 192), (128, 128, 128), + (153, 153, 255), (153, 51, 102), (255, 255, 204), (204, 255, 255), + (102, 0, 102), (255, 128, 128), ( 0, 102, 204), (204, 204, 255), + ( 0, 0, 128), (255, 0, 255), (255, 255, 0), ( 0, 255, 255), + (128, 0, 128), (128, 0, 0), ( 0, 128, 128), ( 0, 0, 255), + ( 0, 204, 255), (204, 255, 255), (204, 255, 204), (255, 255, 153), + (153, 204, 255), (255, 153, 204), (204, 153, 255), (227, 227, 227), + ( 51, 102, 255), ( 51, 204, 204), (153, 204, 0), (255, 204, 0), + (255, 153, 0), (255, 102, 0), (102, 102, 153), (150, 150, 150), + ( 0, 51, 102), ( 51, 153, 102), ( 0, 51, 0), ( 51, 51, 0), + (153, 51, 0), (153, 51, 102), ( 51, 51, 153), ( 51, 51, 51), + ) + +excel_default_palette_b2 = excel_default_palette_b5[:16] + +# Following table borrowed from Gnumeric 1.4 source. +# Checked against OOo docs and MS docs. +excel_default_palette_b8 = ( # (red, green, blue) + ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0), # 0 + ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255), # 4 + (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0), # 8 + (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128), # 12 + (153,153,255), (153, 51,102), (255,255,204), (204,255,255), # 16 + (102, 0,102), (255,128,128), ( 0,102,204), (204,204,255), # 20 + ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255), # 24 + (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255), # 28 + ( 0,204,255), (204,255,255), (204,255,204), (255,255,153), # 32 + (153,204,255), (255,153,204), (204,153,255), (255,204,153), # 36 + ( 51,102,255), ( 51,204,204), (153,204, 0), (255,204, 0), # 40 + (255,153, 0), (255,102, 0), (102,102,153), (150,150,150), # 44 + ( 0, 51,102), ( 51,153,102), ( 0, 51, 0), ( 51, 51, 0), # 48 + (153, 51, 0), (153, 51,102), ( 51, 51,153), ( 51, 51, 51), # 52 + ) + +default_palette = { + 80: excel_default_palette_b8, + 70: excel_default_palette_b5, + 50: excel_default_palette_b5, + 45: excel_default_palette_b2, + 40: excel_default_palette_b2, + 30: excel_default_palette_b2, + 21: excel_default_palette_b2, + 20: excel_default_palette_b2, + } + +""" +00H = Normal +01H = RowLevel_lv (see next field) +02H = ColLevel_lv (see next field) +03H = Comma +04H = Currency +05H = Percent +06H = Comma [0] (BIFF4-BIFF8) +07H = Currency [0] (BIFF4-BIFF8) +08H = Hyperlink (BIFF8) +09H = Followed Hyperlink (BIFF8) +""" +built_in_style_names = [ + "Normal", + "RowLevel_", + "ColLevel_", + "Comma", + "Currency", + "Percent", + "Comma [0]", + "Currency [0]", + "Hyperlink", + "Followed Hyperlink", + ] + +def initialise_colour_map(book): + book.colour_map = {} + book.colour_indexes_used = {} + if not book.formatting_info: + return + # Add the 8 invariant colours + for i in xrange(8): + book.colour_map[i] = excel_default_palette_b8[i] + # Add the default palette depending on the version + dpal = default_palette[book.biff_version] + ndpal = len(dpal) + for i in xrange(ndpal): + book.colour_map[i+8] = dpal[i] + # Add the specials -- None means the RGB value is not known + # System window text colour for border lines + book.colour_map[ndpal+8] = None + # System window background colour for pattern background + book.colour_map[ndpal+8+1] = None # + for ci in ( + 0x51, # System ToolTip text colour (used in note objects) + 0x7FFF, # 32767, system window text colour for fonts + ): + book.colour_map[ci] = None + +def nearest_colour_index(colour_map, rgb, debug=0): + # General purpose function. Uses Euclidean distance. + # So far used only for pre-BIFF8 WINDOW2 record. + # Doesn't have to be fast. + # Doesn't have to be fancy. + best_metric = 3 * 256 * 256 + best_colourx = 0 + for colourx, cand_rgb in colour_map.items(): + if cand_rgb is None: + continue + metric = 0 + for v1, v2 in zip(rgb, cand_rgb): + metric += (v1 - v2) * (v1 - v2) + if metric < best_metric: + best_metric = metric + best_colourx = colourx + if metric == 0: + break + if 0 and debug: + print("nearest_colour_index for %r is %r -> %r; best_metric is %d" \ + % (rgb, best_colourx, colour_map[best_colourx], best_metric)) + return best_colourx + +## +# This mixin class exists solely so that Format, Font, and XF.... objects +# can be compared by value of their attributes. +class EqNeAttrs(object): + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + return self.__dict__ != other.__dict__ + +## +# An Excel "font" contains the details of not only what is normally +# considered a font, but also several other display attributes. +# Items correspond to those in the Excel UI's Format/Cells/Font tab. +#A collection of the border-related attributes of an XF record. +# Items correspond to those in the Excel UI's Format/Cells/Border tab.
+#An explanations of "colour index" is given in the Formatting +# section at the start of this document. +# There are five line style attributes; possible values and the +# associated meanings are: +# 0 = No line, +# 1 = Thin, +# 2 = Medium, +# 3 = Dashed, +# 4 = Dotted, +# 5 = Thick, +# 6 = Double, +# 7 = Hair, +# 8 = Medium dashed, +# 9 = Thin dash-dotted, +# 10 = Medium dash-dotted, +# 11 = Thin dash-dot-dotted, +# 12 = Medium dash-dot-dotted, +# 13 = Slanted medium dash-dotted. +# The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only. +# For pictures of the line styles, refer to OOo docs s3.10 (p22) +# "Line Styles for Cell Borders (BIFF3-BIFF8)".
+#Each of the 6 flags below describes the validity of
+# a specific group of attributes.
+#
+# In cell XFs, flag==0 means the attributes of the parent style XF are used,
+# (but only if the attributes are valid there); flag==1 means the attributes
+# of this XF are used.
+# In style XFs, flag==0 means the attribute setting is valid; flag==1 means
+# the attribute should be ignored.
+# Note that the API
+# provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs
+# have had the above inheritance mechanism applied.
+#
+ # Warning: OOo docs on the XF record call this "Index to FORMAT record". + # It is not an index in the Python sense. It is a key to a map. + # It is true only for Excel 4.0 and earlier files + # that the key into format_map from an XF instance + # is the same as the index into format_list, and only + # if the index is less than 164. + #
+ format_key = 0 + ## + # An instance of an XFProtection object. + protection = None + ## + # An instance of an XFBackground object. + background = None + ## + # An instance of an XFAlignment object. + alignment = None + ## + # An instance of an XFBorder object. + border = None diff --git a/Assignment 3/packages/xlrd/formula.py b/Assignment 3/packages/xlrd/formula.py new file mode 100644 index 0000000..7c56aa4 --- /dev/null +++ b/Assignment 3/packages/xlrd/formula.py @@ -0,0 +1,2179 @@ +# -*- coding: cp1252 -*- + +## +# Module for parsing/evaluating Microsoft Excel formulas. +# +#Copyright © 2005-2012 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under +# a BSD-style licence.
+## + +# No part of the content of this file was derived from the works of David Giffin. + +from __future__ import print_function +import copy +from struct import unpack +from .timemachine import * +from .biffh import unpack_unicode_update_pos, unpack_string_update_pos, \ + XLRDError, hex_char_dump, error_text_from_code, BaseObject + +__all__ = [ + 'oBOOL', 'oERR', 'oNUM', 'oREF', 'oREL', 'oSTRG', 'oUNK', + 'decompile_formula', + 'dump_formula', + 'evaluate_name_formula', + 'okind_dict', + 'rangename3d', 'rangename3drel', 'cellname', 'cellnameabs', 'colname', + 'FMLA_TYPE_CELL', + 'FMLA_TYPE_SHARED', + 'FMLA_TYPE_ARRAY', + 'FMLA_TYPE_COND_FMT', + 'FMLA_TYPE_DATA_VAL', + 'FMLA_TYPE_NAME', + ] + +FMLA_TYPE_CELL = 1 +FMLA_TYPE_SHARED = 2 +FMLA_TYPE_ARRAY = 4 +FMLA_TYPE_COND_FMT = 8 +FMLA_TYPE_DATA_VAL = 16 +FMLA_TYPE_NAME = 32 +ALL_FMLA_TYPES = 63 + + +FMLA_TYPEDESCR_MAP = { + 1 : 'CELL', + 2 : 'SHARED', + 4 : 'ARRAY', + 8 : 'COND-FMT', + 16: 'DATA-VAL', + 32: 'NAME', + } + +_TOKEN_NOT_ALLOWED = { + 0x01: ALL_FMLA_TYPES - FMLA_TYPE_CELL, # tExp + 0x02: ALL_FMLA_TYPES - FMLA_TYPE_CELL, # tTbl + 0x0F: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tIsect + 0x10: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tUnion/List + 0x11: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tRange + 0x20: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tArray + 0x23: FMLA_TYPE_SHARED, # tName + 0x39: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tNameX + 0x3A: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tRef3d + 0x3B: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tArea3d + 0x2C: FMLA_TYPE_CELL + FMLA_TYPE_ARRAY, # tRefN + 0x2D: FMLA_TYPE_CELL + FMLA_TYPE_ARRAY, # tAreaN + # plus weird stuff like tMem* + }.get + +oBOOL = 3 +oERR = 4 +oMSNG = 5 # tMissArg +oNUM = 2 +oREF = -1 +oREL = -2 +oSTRG = 1 +oUNK = 0 + +okind_dict = { + -2: "oREL", + -1: "oREF", + 0 : "oUNK", + 1 : "oSTRG", + 2 : "oNUM", + 3 : "oBOOL", + 4 : "oERR", + 5 : "oMSNG", + } + +listsep = ',' #### probably should depend on locale + + +# sztabN[opcode] -> the number of bytes to consume. +# -1 means variable +# -2 means this opcode not implemented in this version. +# Which N to use? Depends on biff_version; see szdict. +sztab0 = [-2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 8, 4, 2, 2, 3, 9, 8, 2, 3, 8, 4, 7, 5, 5, 5, 2, 4, 7, 4, 7, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] +sztab1 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 2, 3, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] +sztab2 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 3, 4, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2] +sztab3 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 15, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 25, 18, 21, 18, 21, -2, -2] +sztab4 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 5, 5, 9, 7, 7, 7, 3, 5, 9, 5, 9, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 7, 7, 11, 7, 11, -2, -2] + +szdict = { + 20 : sztab0, + 21 : sztab0, + 30 : sztab1, + 40 : sztab2, + 45 : sztab2, + 50 : sztab3, + 70 : sztab3, + 80 : sztab4, + } + +# For debugging purposes ... the name for each opcode +# (without the prefix "t" used on OOo docs) +onames = ['Unk00', 'Exp', 'Tbl', 'Add', 'Sub', 'Mul', 'Div', 'Power', 'Concat', 'LT', 'LE', 'EQ', 'GE', 'GT', 'NE', 'Isect', 'List', 'Range', 'Uplus', 'Uminus', 'Percent', 'Paren', 'MissArg', 'Str', 'Extended', 'Attr', 'Sheet', 'EndSheet', 'Err', 'Bool', 'Int', 'Num', 'Array', 'Func', 'FuncVar', 'Name', 'Ref', 'Area', 'MemArea', 'MemErr', 'MemNoMem', 'MemFunc', 'RefErr', 'AreaErr', 'RefN', 'AreaN', 'MemAreaN', 'MemNoMemN', '', '', '', '', '', '', '', '', 'FuncCE', 'NameX', 'Ref3d', 'Area3d', 'RefErr3d', 'AreaErr3d', '', ''] + +func_defs = { + # index: (name, min#args, max#args, flags, #known_args, return_type, kargs) + 0 : ('COUNT', 0, 30, 0x04, 1, 'V', 'R'), + 1 : ('IF', 2, 3, 0x04, 3, 'V', 'VRR'), + 2 : ('ISNA', 1, 1, 0x02, 1, 'V', 'V'), + 3 : ('ISERROR', 1, 1, 0x02, 1, 'V', 'V'), + 4 : ('SUM', 0, 30, 0x04, 1, 'V', 'R'), + 5 : ('AVERAGE', 1, 30, 0x04, 1, 'V', 'R'), + 6 : ('MIN', 1, 30, 0x04, 1, 'V', 'R'), + 7 : ('MAX', 1, 30, 0x04, 1, 'V', 'R'), + 8 : ('ROW', 0, 1, 0x04, 1, 'V', 'R'), + 9 : ('COLUMN', 0, 1, 0x04, 1, 'V', 'R'), + 10 : ('NA', 0, 0, 0x02, 0, 'V', ''), + 11 : ('NPV', 2, 30, 0x04, 2, 'V', 'VR'), + 12 : ('STDEV', 1, 30, 0x04, 1, 'V', 'R'), + 13 : ('DOLLAR', 1, 2, 0x04, 1, 'V', 'V'), + 14 : ('FIXED', 2, 3, 0x04, 3, 'V', 'VVV'), + 15 : ('SIN', 1, 1, 0x02, 1, 'V', 'V'), + 16 : ('COS', 1, 1, 0x02, 1, 'V', 'V'), + 17 : ('TAN', 1, 1, 0x02, 1, 'V', 'V'), + 18 : ('ATAN', 1, 1, 0x02, 1, 'V', 'V'), + 19 : ('PI', 0, 0, 0x02, 0, 'V', ''), + 20 : ('SQRT', 1, 1, 0x02, 1, 'V', 'V'), + 21 : ('EXP', 1, 1, 0x02, 1, 'V', 'V'), + 22 : ('LN', 1, 1, 0x02, 1, 'V', 'V'), + 23 : ('LOG10', 1, 1, 0x02, 1, 'V', 'V'), + 24 : ('ABS', 1, 1, 0x02, 1, 'V', 'V'), + 25 : ('INT', 1, 1, 0x02, 1, 'V', 'V'), + 26 : ('SIGN', 1, 1, 0x02, 1, 'V', 'V'), + 27 : ('ROUND', 2, 2, 0x02, 2, 'V', 'VV'), + 28 : ('LOOKUP', 2, 3, 0x04, 2, 'V', 'VR'), + 29 : ('INDEX', 2, 4, 0x0c, 4, 'R', 'RVVV'), + 30 : ('REPT', 2, 2, 0x02, 2, 'V', 'VV'), + 31 : ('MID', 3, 3, 0x02, 3, 'V', 'VVV'), + 32 : ('LEN', 1, 1, 0x02, 1, 'V', 'V'), + 33 : ('VALUE', 1, 1, 0x02, 1, 'V', 'V'), + 34 : ('TRUE', 0, 0, 0x02, 0, 'V', ''), + 35 : ('FALSE', 0, 0, 0x02, 0, 'V', ''), + 36 : ('AND', 1, 30, 0x04, 1, 'V', 'R'), + 37 : ('OR', 1, 30, 0x04, 1, 'V', 'R'), + 38 : ('NOT', 1, 1, 0x02, 1, 'V', 'V'), + 39 : ('MOD', 2, 2, 0x02, 2, 'V', 'VV'), + 40 : ('DCOUNT', 3, 3, 0x02, 3, 'V', 'RRR'), + 41 : ('DSUM', 3, 3, 0x02, 3, 'V', 'RRR'), + 42 : ('DAVERAGE', 3, 3, 0x02, 3, 'V', 'RRR'), + 43 : ('DMIN', 3, 3, 0x02, 3, 'V', 'RRR'), + 44 : ('DMAX', 3, 3, 0x02, 3, 'V', 'RRR'), + 45 : ('DSTDEV', 3, 3, 0x02, 3, 'V', 'RRR'), + 46 : ('VAR', 1, 30, 0x04, 1, 'V', 'R'), + 47 : ('DVAR', 3, 3, 0x02, 3, 'V', 'RRR'), + 48 : ('TEXT', 2, 2, 0x02, 2, 'V', 'VV'), + 49 : ('LINEST', 1, 4, 0x04, 4, 'A', 'RRVV'), + 50 : ('TREND', 1, 4, 0x04, 4, 'A', 'RRRV'), + 51 : ('LOGEST', 1, 4, 0x04, 4, 'A', 'RRVV'), + 52 : ('GROWTH', 1, 4, 0x04, 4, 'A', 'RRRV'), + 56 : ('PV', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 57 : ('FV', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 58 : ('NPER', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 59 : ('PMT', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 60 : ('RATE', 3, 6, 0x04, 6, 'V', 'VVVVVV'), + 61 : ('MIRR', 3, 3, 0x02, 3, 'V', 'RVV'), + 62 : ('IRR', 1, 2, 0x04, 2, 'V', 'RV'), + 63 : ('RAND', 0, 0, 0x0a, 0, 'V', ''), + 64 : ('MATCH', 2, 3, 0x04, 3, 'V', 'VRR'), + 65 : ('DATE', 3, 3, 0x02, 3, 'V', 'VVV'), + 66 : ('TIME', 3, 3, 0x02, 3, 'V', 'VVV'), + 67 : ('DAY', 1, 1, 0x02, 1, 'V', 'V'), + 68 : ('MONTH', 1, 1, 0x02, 1, 'V', 'V'), + 69 : ('YEAR', 1, 1, 0x02, 1, 'V', 'V'), + 70 : ('WEEKDAY', 1, 2, 0x04, 2, 'V', 'VV'), + 71 : ('HOUR', 1, 1, 0x02, 1, 'V', 'V'), + 72 : ('MINUTE', 1, 1, 0x02, 1, 'V', 'V'), + 73 : ('SECOND', 1, 1, 0x02, 1, 'V', 'V'), + 74 : ('NOW', 0, 0, 0x0a, 0, 'V', ''), + 75 : ('AREAS', 1, 1, 0x02, 1, 'V', 'R'), + 76 : ('ROWS', 1, 1, 0x02, 1, 'V', 'R'), + 77 : ('COLUMNS', 1, 1, 0x02, 1, 'V', 'R'), + 78 : ('OFFSET', 3, 5, 0x04, 5, 'R', 'RVVVV'), + 82 : ('SEARCH', 2, 3, 0x04, 3, 'V', 'VVV'), + 83 : ('TRANSPOSE', 1, 1, 0x02, 1, 'A', 'A'), + 86 : ('TYPE', 1, 1, 0x02, 1, 'V', 'V'), + 92 : ('SERIESSUM', 4, 4, 0x02, 4, 'V', 'VVVA'), + 97 : ('ATAN2', 2, 2, 0x02, 2, 'V', 'VV'), + 98 : ('ASIN', 1, 1, 0x02, 1, 'V', 'V'), + 99 : ('ACOS', 1, 1, 0x02, 1, 'V', 'V'), + 100: ('CHOOSE', 2, 30, 0x04, 2, 'V', 'VR'), + 101: ('HLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), + 102: ('VLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), + 105: ('ISREF', 1, 1, 0x02, 1, 'V', 'R'), + 109: ('LOG', 1, 2, 0x04, 2, 'V', 'VV'), + 111: ('CHAR', 1, 1, 0x02, 1, 'V', 'V'), + 112: ('LOWER', 1, 1, 0x02, 1, 'V', 'V'), + 113: ('UPPER', 1, 1, 0x02, 1, 'V', 'V'), + 114: ('PROPER', 1, 1, 0x02, 1, 'V', 'V'), + 115: ('LEFT', 1, 2, 0x04, 2, 'V', 'VV'), + 116: ('RIGHT', 1, 2, 0x04, 2, 'V', 'VV'), + 117: ('EXACT', 2, 2, 0x02, 2, 'V', 'VV'), + 118: ('TRIM', 1, 1, 0x02, 1, 'V', 'V'), + 119: ('REPLACE', 4, 4, 0x02, 4, 'V', 'VVVV'), + 120: ('SUBSTITUTE', 3, 4, 0x04, 4, 'V', 'VVVV'), + 121: ('CODE', 1, 1, 0x02, 1, 'V', 'V'), + 124: ('FIND', 2, 3, 0x04, 3, 'V', 'VVV'), + 125: ('CELL', 1, 2, 0x0c, 2, 'V', 'VR'), + 126: ('ISERR', 1, 1, 0x02, 1, 'V', 'V'), + 127: ('ISTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 128: ('ISNUMBER', 1, 1, 0x02, 1, 'V', 'V'), + 129: ('ISBLANK', 1, 1, 0x02, 1, 'V', 'V'), + 130: ('T', 1, 1, 0x02, 1, 'V', 'R'), + 131: ('N', 1, 1, 0x02, 1, 'V', 'R'), + 140: ('DATEVALUE', 1, 1, 0x02, 1, 'V', 'V'), + 141: ('TIMEVALUE', 1, 1, 0x02, 1, 'V', 'V'), + 142: ('SLN', 3, 3, 0x02, 3, 'V', 'VVV'), + 143: ('SYD', 4, 4, 0x02, 4, 'V', 'VVVV'), + 144: ('DDB', 4, 5, 0x04, 5, 'V', 'VVVVV'), + 148: ('INDIRECT', 1, 2, 0x0c, 2, 'R', 'VV'), + 162: ('CLEAN', 1, 1, 0x02, 1, 'V', 'V'), + 163: ('MDETERM', 1, 1, 0x02, 1, 'V', 'A'), + 164: ('MINVERSE', 1, 1, 0x02, 1, 'A', 'A'), + 165: ('MMULT', 2, 2, 0x02, 2, 'A', 'AA'), + 167: ('IPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), + 168: ('PPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), + 169: ('COUNTA', 0, 30, 0x04, 1, 'V', 'R'), + 183: ('PRODUCT', 0, 30, 0x04, 1, 'V', 'R'), + 184: ('FACT', 1, 1, 0x02, 1, 'V', 'V'), + 189: ('DPRODUCT', 3, 3, 0x02, 3, 'V', 'RRR'), + 190: ('ISNONTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 193: ('STDEVP', 1, 30, 0x04, 1, 'V', 'R'), + 194: ('VARP', 1, 30, 0x04, 1, 'V', 'R'), + 195: ('DSTDEVP', 3, 3, 0x02, 3, 'V', 'RRR'), + 196: ('DVARP', 3, 3, 0x02, 3, 'V', 'RRR'), + 197: ('TRUNC', 1, 2, 0x04, 2, 'V', 'VV'), + 198: ('ISLOGICAL', 1, 1, 0x02, 1, 'V', 'V'), + 199: ('DCOUNTA', 3, 3, 0x02, 3, 'V', 'RRR'), + 204: ('USDOLLAR', 1, 2, 0x04, 2, 'V', 'VV'), + 205: ('FINDB', 2, 3, 0x04, 3, 'V', 'VVV'), + 206: ('SEARCHB', 2, 3, 0x04, 3, 'V', 'VVV'), + 207: ('REPLACEB', 4, 4, 0x02, 4, 'V', 'VVVV'), + 208: ('LEFTB', 1, 2, 0x04, 2, 'V', 'VV'), + 209: ('RIGHTB', 1, 2, 0x04, 2, 'V', 'VV'), + 210: ('MIDB', 3, 3, 0x02, 3, 'V', 'VVV'), + 211: ('LENB', 1, 1, 0x02, 1, 'V', 'V'), + 212: ('ROUNDUP', 2, 2, 0x02, 2, 'V', 'VV'), + 213: ('ROUNDDOWN', 2, 2, 0x02, 2, 'V', 'VV'), + 214: ('ASC', 1, 1, 0x02, 1, 'V', 'V'), + 215: ('DBCS', 1, 1, 0x02, 1, 'V', 'V'), + 216: ('RANK', 2, 3, 0x04, 3, 'V', 'VRV'), + 219: ('ADDRESS', 2, 5, 0x04, 5, 'V', 'VVVVV'), + 220: ('DAYS360', 2, 3, 0x04, 3, 'V', 'VVV'), + 221: ('TODAY', 0, 0, 0x0a, 0, 'V', ''), + 222: ('VDB', 5, 7, 0x04, 7, 'V', 'VVVVVVV'), + 227: ('MEDIAN', 1, 30, 0x04, 1, 'V', 'R'), + 228: ('SUMPRODUCT', 1, 30, 0x04, 1, 'V', 'A'), + 229: ('SINH', 1, 1, 0x02, 1, 'V', 'V'), + 230: ('COSH', 1, 1, 0x02, 1, 'V', 'V'), + 231: ('TANH', 1, 1, 0x02, 1, 'V', 'V'), + 232: ('ASINH', 1, 1, 0x02, 1, 'V', 'V'), + 233: ('ACOSH', 1, 1, 0x02, 1, 'V', 'V'), + 234: ('ATANH', 1, 1, 0x02, 1, 'V', 'V'), + 235: ('DGET', 3, 3, 0x02, 3, 'V', 'RRR'), + 244: ('INFO', 1, 1, 0x02, 1, 'V', 'V'), + 247: ('DB', 4, 5, 0x04, 5, 'V', 'VVVVV'), + 252: ('FREQUENCY', 2, 2, 0x02, 2, 'A', 'RR'), + 261: ('ERROR.TYPE', 1, 1, 0x02, 1, 'V', 'V'), + 269: ('AVEDEV', 1, 30, 0x04, 1, 'V', 'R'), + 270: ('BETADIST', 3, 5, 0x04, 1, 'V', 'V'), + 271: ('GAMMALN', 1, 1, 0x02, 1, 'V', 'V'), + 272: ('BETAINV', 3, 5, 0x04, 1, 'V', 'V'), + 273: ('BINOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 274: ('CHIDIST', 2, 2, 0x02, 2, 'V', 'VV'), + 275: ('CHIINV', 2, 2, 0x02, 2, 'V', 'VV'), + 276: ('COMBIN', 2, 2, 0x02, 2, 'V', 'VV'), + 277: ('CONFIDENCE', 3, 3, 0x02, 3, 'V', 'VVV'), + 278: ('CRITBINOM', 3, 3, 0x02, 3, 'V', 'VVV'), + 279: ('EVEN', 1, 1, 0x02, 1, 'V', 'V'), + 280: ('EXPONDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 281: ('FDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 282: ('FINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 283: ('FISHER', 1, 1, 0x02, 1, 'V', 'V'), + 284: ('FISHERINV', 1, 1, 0x02, 1, 'V', 'V'), + 285: ('FLOOR', 2, 2, 0x02, 2, 'V', 'VV'), + 286: ('GAMMADIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 287: ('GAMMAINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 288: ('CEILING', 2, 2, 0x02, 2, 'V', 'VV'), + 289: ('HYPGEOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 290: ('LOGNORMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 291: ('LOGINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 292: ('NEGBINOMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 293: ('NORMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 294: ('NORMSDIST', 1, 1, 0x02, 1, 'V', 'V'), + 295: ('NORMINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 296: ('NORMSINV', 1, 1, 0x02, 1, 'V', 'V'), + 297: ('STANDARDIZE', 3, 3, 0x02, 3, 'V', 'VVV'), + 298: ('ODD', 1, 1, 0x02, 1, 'V', 'V'), + 299: ('PERMUT', 2, 2, 0x02, 2, 'V', 'VV'), + 300: ('POISSON', 3, 3, 0x02, 3, 'V', 'VVV'), + 301: ('TDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 302: ('WEIBULL', 4, 4, 0x02, 4, 'V', 'VVVV'), + 303: ('SUMXMY2', 2, 2, 0x02, 2, 'V', 'AA'), + 304: ('SUMX2MY2', 2, 2, 0x02, 2, 'V', 'AA'), + 305: ('SUMX2PY2', 2, 2, 0x02, 2, 'V', 'AA'), + 306: ('CHITEST', 2, 2, 0x02, 2, 'V', 'AA'), + 307: ('CORREL', 2, 2, 0x02, 2, 'V', 'AA'), + 308: ('COVAR', 2, 2, 0x02, 2, 'V', 'AA'), + 309: ('FORECAST', 3, 3, 0x02, 3, 'V', 'VAA'), + 310: ('FTEST', 2, 2, 0x02, 2, 'V', 'AA'), + 311: ('INTERCEPT', 2, 2, 0x02, 2, 'V', 'AA'), + 312: ('PEARSON', 2, 2, 0x02, 2, 'V', 'AA'), + 313: ('RSQ', 2, 2, 0x02, 2, 'V', 'AA'), + 314: ('STEYX', 2, 2, 0x02, 2, 'V', 'AA'), + 315: ('SLOPE', 2, 2, 0x02, 2, 'V', 'AA'), + 316: ('TTEST', 4, 4, 0x02, 4, 'V', 'AAVV'), + 317: ('PROB', 3, 4, 0x04, 3, 'V', 'AAV'), + 318: ('DEVSQ', 1, 30, 0x04, 1, 'V', 'R'), + 319: ('GEOMEAN', 1, 30, 0x04, 1, 'V', 'R'), + 320: ('HARMEAN', 1, 30, 0x04, 1, 'V', 'R'), + 321: ('SUMSQ', 0, 30, 0x04, 1, 'V', 'R'), + 322: ('KURT', 1, 30, 0x04, 1, 'V', 'R'), + 323: ('SKEW', 1, 30, 0x04, 1, 'V', 'R'), + 324: ('ZTEST', 2, 3, 0x04, 2, 'V', 'RV'), + 325: ('LARGE', 2, 2, 0x02, 2, 'V', 'RV'), + 326: ('SMALL', 2, 2, 0x02, 2, 'V', 'RV'), + 327: ('QUARTILE', 2, 2, 0x02, 2, 'V', 'RV'), + 328: ('PERCENTILE', 2, 2, 0x02, 2, 'V', 'RV'), + 329: ('PERCENTRANK', 2, 3, 0x04, 2, 'V', 'RV'), + 330: ('MODE', 1, 30, 0x04, 1, 'V', 'A'), + 331: ('TRIMMEAN', 2, 2, 0x02, 2, 'V', 'RV'), + 332: ('TINV', 2, 2, 0x02, 2, 'V', 'VV'), + 336: ('CONCATENATE', 0, 30, 0x04, 1, 'V', 'V'), + 337: ('POWER', 2, 2, 0x02, 2, 'V', 'VV'), + 342: ('RADIANS', 1, 1, 0x02, 1, 'V', 'V'), + 343: ('DEGREES', 1, 1, 0x02, 1, 'V', 'V'), + 344: ('SUBTOTAL', 2, 30, 0x04, 2, 'V', 'VR'), + 345: ('SUMIF', 2, 3, 0x04, 3, 'V', 'RVR'), + 346: ('COUNTIF', 2, 2, 0x02, 2, 'V', 'RV'), + 347: ('COUNTBLANK', 1, 1, 0x02, 1, 'V', 'R'), + 350: ('ISPMT', 4, 4, 0x02, 4, 'V', 'VVVV'), + 351: ('DATEDIF', 3, 3, 0x02, 3, 'V', 'VVV'), + 352: ('DATESTRING', 1, 1, 0x02, 1, 'V', 'V'), + 353: ('NUMBERSTRING', 2, 2, 0x02, 2, 'V', 'VV'), + 354: ('ROMAN', 1, 2, 0x04, 2, 'V', 'VV'), + 358: ('GETPIVOTDATA', 2, 2, 0x02, 2, 'V', 'RV'), + 359: ('HYPERLINK', 1, 2, 0x04, 2, 'V', 'VV'), + 360: ('PHONETIC', 1, 1, 0x02, 1, 'V', 'V'), + 361: ('AVERAGEA', 1, 30, 0x04, 1, 'V', 'R'), + 362: ('MAXA', 1, 30, 0x04, 1, 'V', 'R'), + 363: ('MINA', 1, 30, 0x04, 1, 'V', 'R'), + 364: ('STDEVPA', 1, 30, 0x04, 1, 'V', 'R'), + 365: ('VARPA', 1, 30, 0x04, 1, 'V', 'R'), + 366: ('STDEVA', 1, 30, 0x04, 1, 'V', 'R'), + 367: ('VARA', 1, 30, 0x04, 1, 'V', 'R'), + 368: ('BAHTTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 369: ('THAIDAYOFWEEK', 1, 1, 0x02, 1, 'V', 'V'), + 370: ('THAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), + 371: ('THAIMONTHOFYEAR', 1, 1, 0x02, 1, 'V', 'V'), + 372: ('THAINUMSOUND', 1, 1, 0x02, 1, 'V', 'V'), + 373: ('THAINUMSTRING', 1, 1, 0x02, 1, 'V', 'V'), + 374: ('THAISTRINGLENGTH', 1, 1, 0x02, 1, 'V', 'V'), + 375: ('ISTHAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), + 376: ('ROUNDBAHTDOWN', 1, 1, 0x02, 1, 'V', 'V'), + 377: ('ROUNDBAHTUP', 1, 1, 0x02, 1, 'V', 'V'), + 378: ('THAIYEAR', 1, 1, 0x02, 1, 'V', 'V'), + 379: ('RTD', 2, 5, 0x04, 1, 'V', 'V'), + } + +tAttrNames = { + 0x00: "Skip??", # seen in SAMPLES.XLS which shipped with Excel 5.0 + 0x01: "Volatile", + 0x02: "If", + 0x04: "Choose", + 0x08: "Skip", + 0x10: "Sum", + 0x20: "Assign", + 0x40: "Space", + 0x41: "SpaceVolatile", + } + +error_opcodes = set([0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F]) + +tRangeFuncs = (min, max, min, max, min, max) +tIsectFuncs = (max, min, max, min, max, min) + +def do_box_funcs(box_funcs, boxa, boxb): + return tuple([ + func(numa, numb) + for func, numa, numb in zip(box_funcs, boxa.coords, boxb.coords) + ]) + +def adjust_cell_addr_biff8(rowval, colval, reldelta, browx=None, bcolx=None): + row_rel = (colval >> 15) & 1 + col_rel = (colval >> 14) & 1 + rowx = rowval + colx = colval & 0xff + if reldelta: + if row_rel and rowx >= 32768: + rowx -= 65536 + if col_rel and colx >= 128: + colx -= 256 + else: + if row_rel: + rowx -= browx + if col_rel: + colx -= bcolx + return rowx, colx, row_rel, col_rel + +def adjust_cell_addr_biff_le7( + rowval, colval, reldelta, browx=None, bcolx=None): + row_rel = (rowval >> 15) & 1 + col_rel = (rowval >> 14) & 1 + rowx = rowval & 0x3fff + colx = colval + if reldelta: + if row_rel and rowx >= 8192: + rowx -= 16384 + if col_rel and colx >= 128: + colx -= 256 + else: + if row_rel: + rowx -= browx + if col_rel: + colx -= bcolx + return rowx, colx, row_rel, col_rel + +def get_cell_addr(data, pos, bv, reldelta, browx=None, bcolx=None): + if bv >= 80: + rowval, colval = unpack("Kind symbol | +#Kind number | +#Value representation | +#
---|---|---|
oBOOL | +#3 | +#integer: 0 => False; 1 => True | +#
oERR | +#4 | +#None, or an int error code (same as XL_CELL_ERROR in the Cell class). +# | +#
oMSNG | +#5 | +#Used by Excel as a placeholder for a missing (not supplied) function +# argument. Should *not* appear as a final formula result. Value is None. | +#
oNUM | +#2 | +#A float. Note that there is no way of distinguishing dates. | +#
oREF | +#-1 | +#The value is either None or a non-empty list of
+# absolute Ref3D instances. +# |
+#
oREL | +#-2 | +#The value is None or a non-empty list of +# fully or partially relative Ref3D instances. +# | +#
oSTRG | +#1 | +#A Unicode string. | +#
oUNK | +#0 | +#The kind is unknown or ambiguous. The value is None | +#
Represents an absolute or relative 3-dimensional reference to a box
+# of one or more cells.
+# -- New in version 0.6.0
+#
The coords attribute is a tuple of the form:
+# (shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
+# where 0 <= thingxlo <= thingx < thingxhi.
+# Note that it is quite possible to have thingx > nthings; for example
+# Print_Titles could have colxhi == 256 and/or rowxhi == 65536
+# irrespective of how many columns/rows are actually used in the worksheet.
+# The caller will need to decide how to handle this situation.
+# Keyword: IndexError :-)
+#
The components of the coords attribute are also available as individual +# attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.
+# +#The relflags attribute is a 6-tuple of flags which indicate whether
+# the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0). Portions copyright © 2005-2013 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under a BSD-style licence. Contains the data for one worksheet. In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a
+# column index, counting from zero.
+# Negative values for row/column indexes and slice positions are supported in the expected fashion. For information about cell types and cell values, refer to the documentation of the {@link #Cell} class. WARNING: You don't call this class yourself. You access Sheet objects via the Book object that
+# was returned when you called xlrd.open_workbook("myfile.xls"). A list of {@link #Hyperlink} objects corresponding to HLINK records found
+ # in the worksheet. A sparse mapping from (rowx, colx) to an item in {@link #Sheet.hyperlink_list}.
+ # Cells not covered by a hyperlink are not mapped.
+ # It is possible using the Excel UI to set up a hyperlink that
+ # covers a larger-than-1x1 rectangle of cells.
+ # Hyperlink rectangles may overlap (Excel doesn't check).
+ # When a multiply-covered cell is clicked on, the hyperlink that is activated
+ # (and the one that is mapped here) is the last in hyperlink_list.
+ # A sparse mapping from (rowx, colx) to a {@link #Note} object.
+ # Cells not containing a note ("comment") are not mapped.
+ #
+# Note that there is necessarily no information available as to what cell(s)
+# the reference could possibly be relative to. The caller must decide what if
+# any use to make of oREL operands. Note also that a partially relative
+# reference may well be a typo.
+# For example, define name A1Z10 as $a$1:$z10 (missing $ after z)
+# while the cursor is on cell Sheet3!A27.
+# The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26)
+# and relflags = (0, 0, 0, 1, 0, 0).
+# So far, only one possibility of a sheet-relative component in
+# a reference has been noticed: a 2D reference located in the "current sheet".
+#
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...).
+
+class Ref3D(tuple):
+
+ def __init__(self, atuple):
+ self.coords = atuple[0:6]
+ self.relflags = atuple[6:12]
+ if not self.relflags:
+ self.relflags = (0, 0, 0, 0, 0, 0)
+ (self.shtxlo, self.shtxhi,
+ self.rowxlo, self.rowxhi,
+ self.colxlo, self.colxhi) = self.coords
+
+ def __repr__(self):
+ if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0):
+ return "Ref3D(coords=%r)" % (self.coords, )
+ else:
+ return "Ref3D(coords=%r, relflags=%r)" \
+ % (self.coords, self.relflags)
+
+tAdd = 0x03
+tSub = 0x04
+tMul = 0x05
+tDiv = 0x06
+tPower = 0x07
+tConcat = 0x08
+tLT, tLE, tEQ, tGE, tGT, tNE = range(0x09, 0x0F)
+
+import operator as opr
+
+def nop(x):
+ return x
+
+def _opr_pow(x, y): return x ** y
+
+def _opr_lt(x, y): return x < y
+def _opr_le(x, y): return x <= y
+def _opr_eq(x, y): return x == y
+def _opr_ge(x, y): return x >= y
+def _opr_gt(x, y): return x > y
+def _opr_ne(x, y): return x != y
+
+def num2strg(num):
+ """Attempt to emulate Excel's default conversion
+ from number to string.
+ """
+ s = str(num)
+ if s.endswith(".0"):
+ s = s[:-2]
+ return s
+
+_arith_argdict = {oNUM: nop, oSTRG: float}
+_cmp_argdict = {oNUM: nop, oSTRG: nop}
+# Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE.
+_strg_argdict = {oNUM:num2strg, oSTRG:nop}
+binop_rules = {
+ tAdd: (_arith_argdict, oNUM, opr.add, 30, '+'),
+ tSub: (_arith_argdict, oNUM, opr.sub, 30, '-'),
+ tMul: (_arith_argdict, oNUM, opr.mul, 40, '*'),
+ tDiv: (_arith_argdict, oNUM, opr.truediv, 40, '/'),
+ tPower: (_arith_argdict, oNUM, _opr_pow, 50, '^',),
+ tConcat:(_strg_argdict, oSTRG, opr.add, 20, '&'),
+ tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, '<'),
+ tLE: (_cmp_argdict, oBOOL, _opr_le, 10, '<='),
+ tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, '='),
+ tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, '>='),
+ tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, '>'),
+ tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>'),
+ }
+
+unop_rules = {
+ 0x13: (lambda x: -x, 70, '-', ''), # unary minus
+ 0x12: (lambda x: x, 70, '+', ''), # unary plus
+ 0x14: (lambda x: x / 100.0, 60, '', '%'),# percent
+ }
+
+LEAF_RANK = 90
+FUNC_RANK = 90
+
+STACK_ALARM_LEVEL = 5
+STACK_PANIC_LEVEL = 10
+
+def evaluate_name_formula(bk, nobj, namex, blah=0, level=0):
+ if level > STACK_ALARM_LEVEL:
+ blah = 1
+ data = nobj.raw_formula
+ fmlalen = nobj.basic_formula_len
+ bv = bk.biff_version
+ reldelta = 1 # All defined name formulas use "Method B" [OOo docs]
+ if blah:
+ print("::: evaluate_name_formula %r %r %d %d %r level=%d" \
+ % (namex, nobj.name, fmlalen, bv, data, level), file=bk.logfile)
+ hex_char_dump(data, 0, fmlalen, fout=bk.logfile)
+ if level > STACK_PANIC_LEVEL:
+ raise XLRDError("Excessive indirect references in NAME formula")
+ sztab = szdict[bv]
+ pos = 0
+ stack = []
+ any_rel = 0
+ any_err = 0
+ any_external = 0
+ unk_opnd = Operand(oUNK, None)
+ error_opnd = Operand(oERR, None)
+ spush = stack.append
+
+ def do_binop(opcd, stk):
+ assert len(stk) >= 2
+ bop = stk.pop()
+ aop = stk.pop()
+ argdict, result_kind, func, rank, sym = binop_rules[opcd]
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ resop = Operand(result_kind, None, rank, otext)
+ try:
+ bconv = argdict[bop.kind]
+ aconv = argdict[aop.kind]
+ except KeyError:
+ stk.append(resop)
+ return
+ if bop.value is None or aop.value is None:
+ stk.append(resop)
+ return
+ bval = bconv(bop.value)
+ aval = aconv(aop.value)
+ result = func(aval, bval)
+ if result_kind == oBOOL:
+ result = 1 if result else 0
+ resop.value = result
+ stk.append(resop)
+
+ def do_unaryop(opcode, result_kind, stk):
+ assert len(stk) >= 1
+ aop = stk.pop()
+ val = aop.value
+ func, rank, sym1, sym2 = unop_rules[opcode]
+ otext = ''.join([
+ sym1,
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym2,
+ ])
+ if val is not None:
+ val = func(val)
+ stk.append(Operand(result_kind, val, rank, otext))
+
+ def not_in_name_formula(op_arg, oname_arg):
+ msg = "ERROR *** Token 0x%02x (%s) found in NAME formula" \
+ % (op_arg, oname_arg)
+ raise FormulaError(msg)
+
+ if fmlalen == 0:
+ stack = [unk_opnd]
+
+ while 0 <= pos < fmlalen:
+ op = BYTES_ORD(data[pos])
+ opcode = op & 0x1f
+ optype = (op & 0x60) >> 5
+ if optype:
+ opx = opcode + 32
+ else:
+ opx = opcode
+ oname = onames[opx] # + [" RVA"][optype]
+ sz = sztab[opx]
+ if blah:
+ print("Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \
+ % (pos, op, oname, sz, opcode, optype), file=bk.logfile)
+ print("Stack =", stack, file=bk.logfile)
+ if sz == -2:
+ msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \
+ % (op, oname, bv)
+ raise FormulaError(msg)
+ if not optype:
+ if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl
+ not_in_name_formula(op, oname)
+ elif 0x03 <= opcode <= 0x0E:
+ # Add, Sub, Mul, Div, Power
+ # tConcat
+ # tLT, ..., tNE
+ do_binop(opcode, stack)
+ elif opcode == 0x0F: # tIsect
+ if blah: print("tIsect pre", stack, file=bk.logfile)
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ' '
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF)
+ res.text = otext
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind == oUNK or aop.kind == oUNK:
+ # This can happen with undefined
+ # (go search in the current sheet) labels.
+ # For example =Bob Sales
+ # Each label gets a NAME record with an empty formula (!)
+ # Evaluation of the tName token classifies it as oUNK
+ # res.kind = oREF
+ pass
+ elif bop.kind == oREF == aop.kind:
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tIsectFuncs, aop.value[0], bop.value[0])
+ res.value = [Ref3D(coords)]
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tIsectFuncs, aop.value[0], bop.value[0])
+ relfa = aop.value[0].relflags
+ relfb = bop.value[0].relflags
+ if relfa == relfb:
+ res.value = [Ref3D(coords + relfa)]
+ else:
+ pass
+ spush(res)
+ if blah: print("tIsect post", stack, file=bk.logfile)
+ elif opcode == 0x10: # tList
+ if blah: print("tList pre", stack, file=bk.logfile)
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ','
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL):
+ res.kind = oREF
+ if aop.kind == oREL or bop.kind == oREL:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) >= 1
+ assert len(bop.value) == 1
+ res.value = aop.value + bop.value
+ else:
+ pass
+ spush(res)
+ if blah: print("tList post", stack, file=bk.logfile)
+ elif opcode == 0x11: # tRange
+ if blah: print("tRange pre", stack, file=bk.logfile)
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ':'
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res = oERR
+ elif bop.kind == oREF == aop.kind:
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tRangeFuncs, aop.value[0], bop.value[0])
+ res.value = [Ref3D(coords)]
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tRangeFuncs, aop.value[0], bop.value[0])
+ relfa = aop.value[0].relflags
+ relfb = bop.value[0].relflags
+ if relfa == relfb:
+ res.value = [Ref3D(coords + relfa)]
+ else:
+ pass
+ spush(res)
+ if blah: print("tRange post", stack, file=bk.logfile)
+ elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent
+ do_unaryop(opcode, oNUM, stack)
+ elif opcode == 0x15: # tParen
+ # source cosmetics
+ pass
+ elif opcode == 0x16: # tMissArg
+ spush(Operand(oMSNG, None, LEAF_RANK, ''))
+ elif opcode == 0x17: # tStr
+ if bv <= 70:
+ strg, newpos = unpack_string_update_pos(
+ data, pos+1, bk.encoding, lenlen=1)
+ else:
+ strg, newpos = unpack_unicode_update_pos(
+ data, pos+1, lenlen=1)
+ sz = newpos - pos
+ if blah: print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile)
+ text = '"' + strg.replace('"', '""') + '"'
+ spush(Operand(oSTRG, strg, LEAF_RANK, text))
+ elif opcode == 0x18: # tExtended
+ # new with BIFF 8
+ assert bv >= 80
+ # not in OOo docs
+ raise FormulaError("tExtended token not implemented")
+ elif opcode == 0x19: # tAttr
+ subop, nc = unpack("
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20'
+def rangename3d(book, ref3d):
+ """ Ref3D(1, 4, 5, 20, 7, 10) => 'Sheet2:Sheet3!$H$6:$J$20'
+ (assuming Excel's default sheetnames) """
+ coords = ref3d.coords
+ return "%s!%s" % (
+ sheetrange(book, *coords[:2]),
+ rangename2d(*coords[2:6]))
+
+##
+# Utility function:
+#
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))
+# R1C1 mode => 'Sheet1!R[-32]C[-13]:R[-23]C[12]'
+# A1 mode => depends on base cell (browx, bcolx)
+def rangename3drel(book, ref3d, browx=None, bcolx=None, r1c1=0):
+ coords = ref3d.coords
+ relflags = ref3d.relflags
+ shdesc = sheetrangerel(book, coords[:2], relflags[:2])
+ rngdesc = rangename2drel(coords[2:6], relflags[2:6], browx, bcolx, r1c1)
+ if not shdesc:
+ return rngdesc
+ return "%s!%s" % (shdesc, rngdesc)
+
+def quotedsheetname(shnames, shx):
+ if shx >= 0:
+ shname = shnames[shx]
+ else:
+ shname = {
+ -1: "?internal; any sheet?",
+ -2: "internal; deleted sheet",
+ -3: "internal; macro sheet",
+ -4: "<
-- New in version 0.6.1. Populated only if open_workbook(formatting_info=True).
+ colinfo_map = {}
+
+ ##
+ # The map from a row index to a {@link #Rowinfo} object. Note that it is possible
+ # to have missing entries -- at least one source of XLS files doesn't
+ # bother writing ROW records.
+ #
-- New in version 0.6.1. Populated only if open_workbook(formatting_info=True).
+ rowinfo_map = {}
+
+ ##
+ # List of address ranges of cells containing column labels.
+ # These are set up in Excel by Insert > Name > Labels > Columns.
+ #
-- New in version 0.6.0
+ #
How to deconstruct the list:
+ #
+ # for crange in thesheet.col_label_ranges:
+ # rlo, rhi, clo, chi = crange
+ # for rx in xrange(rlo, rhi):
+ # for cx in xrange(clo, chi):
+ # print "Column label at (rowx=%d, colx=%d) is %r" \
+ # (rx, cx, thesheet.cell_value(rx, cx))
+ #
+ col_label_ranges = []
+
+ ##
+ # List of address ranges of cells containing row labels.
+ # For more details, see col_label_ranges above.
+ #
-- New in version 0.6.0
+ row_label_ranges = []
+
+ ##
+ # List of address ranges of cells which have been merged.
+ # These are set up in Excel by Format > Cells > Alignment, then ticking
+ # the "Merge cells" box.
+ #
-- New in version 0.6.1. Extracted only if open_workbook(formatting_info=True).
+ #
How to deconstruct the list:
+ #
+ # for crange in thesheet.merged_cells:
+ # rlo, rhi, clo, chi = crange
+ # for rowx in xrange(rlo, rhi):
+ # for colx in xrange(clo, chi):
+ # # cell (rlo, clo) (the top left one) will carry the data
+ # # and formatting info; the remainder will be recorded as
+ # # blank cells, but a renderer will apply the formatting info
+ # # for the top left cell (e.g. border, pattern) to all cells in
+ # # the range.
+ #
+ merged_cells = []
+
+ ##
+ # Mapping of (rowx, colx) to list of (offset, font_index) tuples. The offset
+ # defines where in the string the font begins to be used.
+ # Offsets are expected to be in ascending order.
+ # If the first offset is not zero, the meaning is that the cell's XF's font should
+ # be used from offset 0.
+ #
This is a sparse mapping. There is no entry for cells that are not formatted with
+ # rich text.
+ #
How to use:
+ #
+ # runlist = thesheet.rich_text_runlist_map.get((rowx, colx))
+ # if runlist:
+ # for offset, font_index in runlist:
+ # # do work here.
+ # pass
+ #
+ # Populated only if open_workbook(formatting_info=True).
+ #
-- New in version 0.7.2.
+ #
+ rich_text_runlist_map = {}
+
+ ##
+ # Default column width from DEFCOLWIDTH record, else None.
+ # From the OOo docs:
+ # """Column width in characters, using the width of the zero character
+ # from default font (first FONT record in the file). Excel adds some
+ # extra space to the default width, depending on the default font and
+ # default font size. The algorithm how to exactly calculate the resulting
+ # column width is not known.
+ # Example: The default width of 8 set in this record results in a column
+ # width of 8.43 using Arial font with a size of 10 points."""
+ # For the default hierarchy, refer to the {@link #Colinfo} class.
+ #
-- New in version 0.6.1
+ defcolwidth = None
+
+ ##
+ # Default column width from STANDARDWIDTH record, else None.
+ # From the OOo docs:
+ # """Default width of the columns in 1/256 of the width of the zero
+ # character, using default font (first FONT record in the file)."""
+ # For the default hierarchy, refer to the {@link #Colinfo} class.
+ #
-- New in version 0.6.1
+ standardwidth = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_row_height = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_row_height_mismatch = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_row_hidden = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_additional_space_above = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_additional_space_below = None
+
+ ##
+ # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden
+ # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden
+ # only by VBA macro).
+ visibility = 0
+
+ ##
+ # A 256-element tuple corresponding to the contents of the GCW record for this sheet.
+ # If no such record, treat as all bits zero.
+ # Applies to BIFF4-7 only. See docs of the {@link #Colinfo} class for discussion.
+ gcw = (0, ) * 256
+
+ ##
+ #
-- New in version 0.7.2
-- New in version 0.7.2
-- New in version 0.7.2
-- New in version 0.7.2
+ horizontal_page_breaks = []
+
+ ##
+ # A list of the vertical page breaks in this sheet.
+ # Breaks are tuples in the form (index of col after break, start row index, end row index).
+ # Populated only if open_workbook(formatting_info=True).
+ #
-- New in version 0.7.2
+ vertical_page_breaks = []
+
+
+ def __init__(self, book, position, name, number):
+ self.book = book
+ self.biff_version = book.biff_version
+ self._position = position
+ self.logfile = book.logfile
+ self.bt = array('B', [XL_CELL_EMPTY])
+ self.bf = array('h', [-1])
+ self.name = name
+ self.number = number
+ self.verbosity = book.verbosity
+ self.formatting_info = book.formatting_info
+ self.ragged_rows = book.ragged_rows
+ if self.ragged_rows:
+ self.put_cell = self.put_cell_ragged
+ else:
+ self.put_cell = self.put_cell_unragged
+ self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map
+ self.nrows = 0 # actual, including possibly empty cells
+ self.ncols = 0
+ self._maxdatarowx = -1 # highest rowx containing a non-empty cell
+ self._maxdatacolx = -1 # highest colx containing a non-empty cell
+ self._dimnrows = 0 # as per DIMENSIONS record
+ self._dimncols = 0
+ self._cell_values = []
+ self._cell_types = []
+ self._cell_xf_indexes = []
+ self.defcolwidth = None
+ self.standardwidth = None
+ self.default_row_height = None
+ self.default_row_height_mismatch = 0
+ self.default_row_hidden = 0
+ self.default_additional_space_above = 0
+ self.default_additional_space_below = 0
+ self.colinfo_map = {}
+ self.rowinfo_map = {}
+ self.col_label_ranges = []
+ self.row_label_ranges = []
+ self.merged_cells = []
+ self.rich_text_runlist_map = {}
+ self.horizontal_page_breaks = []
+ self.vertical_page_breaks = []
+ self._xf_index_stats = [0, 0, 0, 0]
+ self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record
+ for attr, defval in _WINDOW2_options:
+ setattr(self, attr, defval)
+ self.first_visible_rowx = 0
+ self.first_visible_colx = 0
+ self.gridline_colour_index = 0x40
+ self.gridline_colour_rgb = None # pre-BIFF8
+ self.hyperlink_list = []
+ self.hyperlink_map = {}
+ self.cell_note_map = {}
+
+ # Values calculated by xlrd to predict the mag factors that
+ # will actually be used by Excel to display your worksheet.
+ # Pass these values to xlwt when writing XLS files.
+ # Warning 1: Behaviour of OOo Calc and Gnumeric has been observed to differ from Excel's.
+ # Warning 2: A value of zero means almost exactly what it says. Your sheet will be
+ # displayed as a very tiny speck on the screen. xlwt will reject attempts to set
+ # a mag_factor that is not (10 <= mag_factor <= 400).
+ self.cooked_page_break_preview_mag_factor = 60
+ self.cooked_normal_view_mag_factor = 100
+
+ # Values (if any) actually stored on the XLS file
+ self.cached_page_break_preview_mag_factor = None # from WINDOW2 record
+ self.cached_normal_view_mag_factor = None # from WINDOW2 record
+ self.scl_mag_factor = None # from SCL record
+
+ self._ixfe = None # BIFF2 only
+ self._cell_attr_to_xfx = {} # BIFF2.0 only
+
+ #### Don't initialise this here, use class attribute initialisation.
+ #### self.gcw = (0, ) * 256 ####
+
+ if self.biff_version >= 80:
+ self.utter_max_rows = 65536
+ else:
+ self.utter_max_rows = 16384
+ self.utter_max_cols = 256
+
+ self._first_full_rowx = -1
+
+ # self._put_cell_exceptions = 0
+ # self._put_cell_row_widenings = 0
+ # self._put_cell_rows_appended = 0
+ # self._put_cell_cells_appended = 0
+
+
+ ##
+ # {@link #Cell} object in the given row and column.
+ def cell(self, rowx, colx):
+ if self.formatting_info:
+ xfx = self.cell_xf_index(rowx, colx)
+ else:
+ xfx = None
+ return Cell(
+ self._cell_types[rowx][colx],
+ self._cell_values[rowx][colx],
+ xfx,
+ )
+
+ ##
+ # Value of the cell in the given row and column.
+ def cell_value(self, rowx, colx):
+ return self._cell_values[rowx][colx]
+
+ ##
+ # Type of the cell in the given row and column.
+ # Refer to the documentation of the {@link #Cell} class.
+ def cell_type(self, rowx, colx):
+ return self._cell_types[rowx][colx]
+
+ ##
+ # XF index of the cell in the given row and column.
+ # This is an index into Book.{@link #Book.xf_list}.
+ #
-- New in version 0.6.1
+ def cell_xf_index(self, rowx, colx):
+ self.req_fmt_info()
+ xfx = self._cell_xf_indexes[rowx][colx]
+ if xfx > -1:
+ self._xf_index_stats[0] += 1
+ return xfx
+ # Check for a row xf_index
+ try:
+ xfx = self.rowinfo_map[rowx].xf_index
+ if xfx > -1:
+ self._xf_index_stats[1] += 1
+ return xfx
+ except KeyError:
+ pass
+ # Check for a column xf_index
+ try:
+ xfx = self.colinfo_map[colx].xf_index
+ if xfx == -1: xfx = 15
+ self._xf_index_stats[2] += 1
+ return xfx
+ except KeyError:
+ # If all else fails, 15 is used as hardwired global default xf_index.
+ self._xf_index_stats[3] += 1
+ return 15
+
+ ##
+ # Returns the effective number of cells in the given row. For use with
+ # open_workbook(ragged_rows=True) which is likely to produce rows
+ # with fewer than {@link #Sheet.ncols} cells.
+ #
-- New in version 0.7.2
+ def row_len(self, rowx):
+ return len(self._cell_values[rowx])
+
+ ##
+ # Returns a sequence of the {@link #Cell} objects in the given row.
+ def row(self, rowx):
+ return [
+ self.cell(rowx, colx)
+ for colx in xrange(len(self._cell_values[rowx]))
+ ]
+
+ ##
+ # Returns a generator for iterating through each row.
+ def get_rows(self):
+ return (self.row(index) for index in range(self.nrows))
+
+ ##
+ # Returns a slice of the types
+ # of the cells in the given row.
+ def row_types(self, rowx, start_colx=0, end_colx=None):
+ if end_colx is None:
+ return self._cell_types[rowx][start_colx:]
+ return self._cell_types[rowx][start_colx:end_colx]
+
+ ##
+ # Returns a slice of the values
+ # of the cells in the given row.
+ def row_values(self, rowx, start_colx=0, end_colx=None):
+ if end_colx is None:
+ return self._cell_values[rowx][start_colx:]
+ return self._cell_values[rowx][start_colx:end_colx]
+
+ ##
+ # Returns a slice of the {@link #Cell} objects in the given row.
+ def row_slice(self, rowx, start_colx=0, end_colx=None):
+ nc = len(self._cell_values[rowx])
+ if start_colx < 0:
+ start_colx += nc
+ if start_colx < 0:
+ start_colx = 0
+ if end_colx is None or end_colx > nc:
+ end_colx = nc
+ elif end_colx < 0:
+ end_colx += nc
+ return [
+ self.cell(rowx, colx)
+ for colx in xrange(start_colx, end_colx)
+ ]
+
+ ##
+ # Returns a slice of the {@link #Cell} objects in the given column.
+ def col_slice(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self.cell(rowx, colx)
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a slice of the values of the cells in the given column.
+ def col_values(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self._cell_values[rowx][colx]
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a slice of the types of the cells in the given column.
+ def col_types(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self._cell_types[rowx][colx]
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a sequence of the {@link #Cell} objects in the given column.
+ def col(self, colx):
+ return self.col_slice(colx)
+ # Above two lines just for the docs. Here's the real McCoy:
+ col = col_slice
+
+ # === Following methods are used in building the worksheet.
+ # === They are not part of the API.
+
+ def tidy_dimensions(self):
+ if self.verbosity >= 3:
+ fprintf(self.logfile,
+ "tidy_dimensions: nrows=%d ncols=%d \n",
+ self.nrows, self.ncols,
+ )
+ if 1 and self.merged_cells:
+ nr = nc = 0
+ umaxrows = self.utter_max_rows
+ umaxcols = self.utter_max_cols
+ for crange in self.merged_cells:
+ rlo, rhi, clo, chi = crange
+ if not (0 <= rlo < rhi <= umaxrows) \
+ or not (0 <= clo < chi <= umaxcols):
+ fprintf(self.logfile,
+ "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n",
+ self.number, self.name, crange)
+ if rhi > nr: nr = rhi
+ if chi > nc: nc = chi
+ if nc > self.ncols:
+ self.ncols = nc
+ if nr > self.nrows:
+ # we put one empty cell at (nr-1,0) to make sure
+ # we have the right number of rows. The ragged rows
+ # will sort out the rest if needed.
+ self.put_cell(nr-1, 0, XL_CELL_EMPTY, '', -1)
+ if self.verbosity >= 1 \
+ and (self.nrows != self._dimnrows or self.ncols != self._dimncols):
+ fprintf(self.logfile,
+ "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n",
+ self.number,
+ self.name,
+ self._dimnrows,
+ self._dimncols,
+ self.nrows,
+ self.ncols,
+ )
+ if not self.ragged_rows:
+ # fix ragged rows
+ ncols = self.ncols
+ s_cell_types = self._cell_types
+ s_cell_values = self._cell_values
+ s_cell_xf_indexes = self._cell_xf_indexes
+ s_fmt_info = self.formatting_info
+ # for rowx in xrange(self.nrows):
+ if self._first_full_rowx == -2:
+ ubound = self.nrows
+ else:
+ ubound = self._first_full_rowx
+ for rowx in xrange(ubound):
+ trow = s_cell_types[rowx]
+ rlen = len(trow)
+ nextra = ncols - rlen
+ if nextra > 0:
+ s_cell_values[rowx][rlen:] = [''] * nextra
+ trow[rlen:] = self.bt * nextra
+ if s_fmt_info:
+ s_cell_xf_indexes[rowx][rlen:] = self.bf * nextra
+
+ def put_cell_ragged(self, rowx, colx, ctype, value, xf_index):
+ if ctype is None:
+ # we have a number, so look up the cell type
+ ctype = self._xf_index_to_xl_type_map[xf_index]
+ assert 0 <= colx < self.utter_max_cols
+ assert 0 <= rowx < self.utter_max_rows
+ fmt_info = self.formatting_info
+
+ try:
+ nr = rowx + 1
+ if self.nrows < nr:
+
+ scta = self._cell_types.append
+ scva = self._cell_values.append
+ scxa = self._cell_xf_indexes.append
+ bt = self.bt
+ bf = self.bf
+ for _unused in xrange(self.nrows, nr):
+ scta(bt * 0)
+ scva([])
+ if fmt_info:
+ scxa(bf * 0)
+ self.nrows = nr
+
+ types_row = self._cell_types[rowx]
+ values_row = self._cell_values[rowx]
+ if fmt_info:
+ fmt_row = self._cell_xf_indexes[rowx]
+ ltr = len(types_row)
+ if colx >= self.ncols:
+ self.ncols = colx + 1
+ num_empty = colx - ltr
+ if not num_empty:
+ # most common case: colx == previous colx + 1
+ # self._put_cell_cells_appended += 1
+ types_row.append(ctype)
+ values_row.append(value)
+ if fmt_info:
+ fmt_row.append(xf_index)
+ return
+ if num_empty > 0:
+ num_empty += 1
+ # self._put_cell_row_widenings += 1
+ # types_row.extend(self.bt * num_empty)
+ # values_row.extend([''] * num_empty)
+ # if fmt_info:
+ # fmt_row.extend(self.bf * num_empty)
+ types_row[ltr:] = self.bt * num_empty
+ values_row[ltr:] = [''] * num_empty
+ if fmt_info:
+ fmt_row[ltr:] = self.bf * num_empty
+ types_row[colx] = ctype
+ values_row[colx] = value
+ if fmt_info:
+ fmt_row[colx] = xf_index
+ except:
+ print("put_cell", rowx, colx, file=self.logfile)
+ raise
+
+ def put_cell_unragged(self, rowx, colx, ctype, value, xf_index):
+ if ctype is None:
+ # we have a number, so look up the cell type
+ ctype = self._xf_index_to_xl_type_map[xf_index]
+ # assert 0 <= colx < self.utter_max_cols
+ # assert 0 <= rowx < self.utter_max_rows
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_cell extending", rowx, colx
+ # self.extend_cells(rowx+1, colx+1)
+ # self._put_cell_exceptions += 1
+ nr = rowx + 1
+ nc = colx + 1
+ assert 1 <= nc <= self.utter_max_cols
+ assert 1 <= nr <= self.utter_max_rows
+ if nc > self.ncols:
+ self.ncols = nc
+ # The row self._first_full_rowx and all subsequent rows
+ # are guaranteed to have length == self.ncols. Thus the
+ # "fix ragged rows" section of the tidy_dimensions method
+ # doesn't need to examine them.
+ if nr < self.nrows:
+ # cell data is not in non-descending row order *AND*
+ # self.ncols has been bumped up.
+ # This very rare case ruins this optmisation.
+ self._first_full_rowx = -2
+ elif rowx > self._first_full_rowx > -2:
+ self._first_full_rowx = rowx
+ if nr <= self.nrows:
+ # New cell is in an existing row, so extend that row (if necessary).
+ # Note that nr < self.nrows means that the cell data
+ # is not in ascending row order!!
+ trow = self._cell_types[rowx]
+ nextra = self.ncols - len(trow)
+ if nextra > 0:
+ # self._put_cell_row_widenings += 1
+ trow.extend(self.bt * nextra)
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx].extend(self.bf * nextra)
+ self._cell_values[rowx].extend([''] * nextra)
+ else:
+ scta = self._cell_types.append
+ scva = self._cell_values.append
+ scxa = self._cell_xf_indexes.append
+ fmt_info = self.formatting_info
+ nc = self.ncols
+ bt = self.bt
+ bf = self.bf
+ for _unused in xrange(self.nrows, nr):
+ # self._put_cell_rows_appended += 1
+ scta(bt * nc)
+ scva([''] * nc)
+ if fmt_info:
+ scxa(bf * nc)
+ self.nrows = nr
+ # === end of code from extend_cells()
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print("put_cell", rowx, colx, file=self.logfile)
+ raise
+ except:
+ print("put_cell", rowx, colx, file=self.logfile)
+ raise
+
+
+ # === Methods after this line neither know nor care about how cells are stored.
+
+ def read(self, bk):
+ global rc_stats
+ DEBUG = 0
+ blah = DEBUG or self.verbosity >= 2
+ blah_rows = DEBUG or self.verbosity >= 4
+ blah_formulas = 0 and blah
+ r1c1 = 0
+ oldpos = bk._position
+ bk._position = self._position
+ XL_SHRFMLA_ETC_ETC = (
+ XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2,
+ XL_ARRAY2, XL_TABLEOP_B2,
+ )
+ self_put_cell = self.put_cell
+ local_unpack = unpack
+ bk_get_record_parts = bk.get_record_parts
+ bv = self.biff_version
+ fmt_info = self.formatting_info
+ do_sst_rich_text = fmt_info and bk._rich_text_runlist_map
+ rowinfo_sharing_dict = {}
+ txos = {}
+ eof_found = 0
+ while 1:
+ # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position
+ rc, data_len, data = bk_get_record_parts()
+ # if rc in rc_stats:
+ # rc_stats[rc] += 1
+ # else:
+ # rc_stats[rc] = 1
+ # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data)
+ if rc == XL_NUMBER:
+ # [:14] in following stmt ignores extraneous rubbish at end of record.
+ # Sample file testEON-8.xls supplied by Jan Kraus.
+ rowx, colx, xf_index, d = local_unpack('
-- New in version 0.6.1
+ #
+ # @param colx Index of the queried column, range 0 to 255.
+ # Note that it is possible to find out the width that will be used to display
+ # columns with no cell information e.g. column IV (colx=255).
+ # @return The column width that will be used for displaying
+ # the given column by Excel, in units of 1/256th of the width of a
+ # standard character (the digit zero in the first font).
+
+ def computed_column_width(self, colx):
+ self.req_fmt_info()
+ if self.biff_version >= 80:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ if self.standardwidth is not None:
+ return self.standardwidth
+ elif self.biff_version >= 40:
+ if self.gcw[colx]:
+ if self.standardwidth is not None:
+ return self.standardwidth
+ else:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ elif self.biff_version == 30:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ # All roads lead to Rome and the DEFCOLWIDTH ...
+ if self.defcolwidth is not None:
+ return self.defcolwidth * 256
+ return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record
+
+ def handle_hlink(self, data):
+ # DEBUG = 1
+ if DEBUG: print("\n=== hyperlink ===", file=self.logfile)
+ record_size = len(data)
+ h = Hyperlink()
+ h.frowx, h.lrowx, h.fcolx, h.lcolx, guid0, dummy, options = unpack('
-- New in version 0.7.2
+#
Contains the attributes of a hyperlink.
+# Hyperlink objects are accessible through Sheet.{@link #Sheet.hyperlink_list}
+# and Sheet.{@link #Sheet.hyperlink_map}.
+#
-- New in version 0.7.2
+#
WARNING: You don't call this class yourself. You access Cell objects +# via methods of the {@link #Sheet} object(s) that you found in the {@link #Book} object that +# was returned when you called xlrd.open_workbook("myfile.xls").
+#Cell objects have three attributes: ctype is an int, value +# (which depends on ctype) and xf_index. +# If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. +# The following table describes the types of cells and how their values +# are represented in Python.
+# +#Type symbol | +#Type number | +#Python value | +#
---|---|---|
XL_CELL_EMPTY | +#0 | +#empty string u'' | +#
XL_CELL_TEXT | +#1 | +#a Unicode string | +#
XL_CELL_NUMBER | +#2 | +#float | +#
XL_CELL_DATE | +#3 | +#float | +#
XL_CELL_BOOLEAN | +#4 | +#int; 1 means TRUE, 0 means FALSE | +#
XL_CELL_ERROR | +#5 | +#int representing internal Excel codes; for a text representation, +# refer to the supplied dictionary error_text_from_code | +#
XL_CELL_BLANK | +#6 | +#empty string u''. Note: this type will appear only when +# open_workbook(..., formatting_info=True) is used. | +#
Here is the default hierarchy for width, according to the OOo docs:
+#
+#
"""In BIFF3, if a COLINFO record is missing for a column,
+# the width specified in the record DEFCOLWIDTH is used instead.
+#
+#
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
+# if the corresponding bit for this column is cleared in the GCW
+# record, otherwise the column width set in the DEFCOLWIDTH record
+# is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
+#
+#
In BIFF8, if a COLINFO record is missing for a column,
+# the width specified in the record STANDARDWIDTH is used.
+# If this [STANDARDWIDTH] record is also missing,
+# the column width of the record DEFCOLWIDTH is used instead."""
+#
+#
+# Footnote: The docs on the GCW record say this:
+# """
+# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
+# record. If a bit is cleared, the corresponding column uses the width set in the
+# COLINFO record for this column.
+#
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
+# the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
+# record of the worksheet will be used instead.
+#
"""
+# At the moment (2007-01-17) xlrd is going with the GCW version of the story.
+# Reference to the source may be useful: see the computed_column_width(colx) method
+# of the Sheet class.
+#
-- New in version 0.6.1
+#
Height and default formatting information that applies to a row in a sheet.
+# Derived from ROW records.
+#
-- New in version 0.6.1
height: Height of the row, in twips. One twip == 1/20 of a point.
+# +#has_default_height: 0 = Row has custom height; 1 = Row has default height.
+# +#outline_level: Outline level of the row (0 to 7)
+# +#outline_group_starts_ends: 1 = Outline group starts or ends here (depending on where the +# outline buttons are located, see WSBOOL record [TODO ??]), +# and is collapsed
+# +#hidden: 1 = Row is hidden (manually, or by a filter or outline group)
+# +#height_mismatch: 1 = Row height and default font height do not match
+# +#has_default_xf_index: 1 = the xf_index attribute is usable; 0 = ignore it
+# +#xf_index: Index to default XF record for empty cells in this row. +# Don't use this if has_default_xf_index == 0.
+# +#additional_space_above: This flag is set, if the upper border of at least one cell in this row +# or if the lower border of at least one cell in the row above is +# formatted with a thick line style. Thin and medium line styles are not +# taken into account.
+# +#additional_space_below: This flag is set, if the lower border of at least one cell in this row +# or if the upper border of at least one cell in the row below is +# formatted with a medium or thick line style. Thin line styles are not +# taken into account.
+ +class Rowinfo(BaseObject): + + if _USE_SLOTS: + __slots__ = ( + "height", + "has_default_height", + "outline_level", + "outline_group_starts_ends", + "hidden", + "height_mismatch", + "has_default_xf_index", + "xf_index", + "additional_space_above", + "additional_space_below", + ) + + def __init__(self): + self.height = None + self.has_default_height = None + self.outline_level = None + self.outline_group_starts_ends = None + self.hidden = None + self.height_mismatch = None + self.has_default_xf_index = None + self.xf_index = None + self.additional_space_above = None + self.additional_space_below = None + + def __getstate__(self): + return ( + self.height, + self.has_default_height, + self.outline_level, + self.outline_group_starts_ends, + self.hidden, + self.height_mismatch, + self.has_default_xf_index, + self.xf_index, + self.additional_space_above, + self.additional_space_below, + ) + + def __setstate__(self, state): + ( + self.height, + self.has_default_height, + self.outline_level, + self.outline_group_starts_ends, + self.hidden, + self.height_mismatch, + self.has_default_xf_index, + self.xf_index, + self.additional_space_above, + self.additional_space_below, + ) = state diff --git a/Assignment 3/packages/xlrd/timemachine.py b/Assignment 3/packages/xlrd/timemachine.py new file mode 100644 index 0000000..a068db3 --- /dev/null +++ b/Assignment 3/packages/xlrd/timemachine.py @@ -0,0 +1,52 @@ +## +#Copyright (c) 2006-2012 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## + +# timemachine.py -- adaptation for single codebase. +# Currently supported: 2.6 to 2.7, 3.2+ +# usage: from timemachine import * + +from __future__ import print_function +import sys + +python_version = sys.version_info[:2] # e.g. version 2.6 -> (2, 6) + +if python_version >= (3, 0): + # Python 3 + BYTES_LITERAL = lambda x: x.encode('latin1') + UNICODE_LITERAL = lambda x: x + BYTES_ORD = lambda byte: byte + from io import BytesIO as BYTES_IO + def fprintf(f, fmt, *vargs): + fmt = fmt.replace("%r", "%a") + if fmt.endswith('\n'): + print(fmt[:-1] % vargs, file=f) + else: + print(fmt % vargs, end=' ', file=f) + EXCEL_TEXT_TYPES = (str, bytes, bytearray) # xlwt: isinstance(obj, EXCEL_TEXT_TYPES) + REPR = ascii + xrange = range + unicode = lambda b, enc: b.decode(enc) + ensure_unicode = lambda s: s + unichr = chr +else: + # Python 2 + BYTES_LITERAL = lambda x: x + UNICODE_LITERAL = lambda x: x.decode('latin1') + BYTES_ORD = ord + from cStringIO import StringIO as BYTES_IO + def fprintf(f, fmt, *vargs): + if fmt.endswith('\n'): + print(fmt[:-1] % vargs, file=f) + else: + print(fmt % vargs, end=' ', file=f) + try: + EXCEL_TEXT_TYPES = basestring # xlwt: isinstance(obj, EXCEL_TEXT_TYPES) + except NameError: + EXCEL_TEXT_TYPES = (str, unicode) + REPR = repr + xrange = xrange + # following used only to overcome 2.x ElementTree gimmick which + # returns text as `str` if it's ascii, otherwise `unicode` + ensure_unicode = unicode # used only in xlsx.py diff --git a/Assignment 3/packages/xlrd/xldate.py b/Assignment 3/packages/xlrd/xldate.py new file mode 100644 index 0000000..dc7b9c8 --- /dev/null +++ b/Assignment 3/packages/xlrd/xldate.py @@ -0,0 +1,213 @@ +# -*- coding: cp1252 -*- + +# No part of the content of this file was derived from the works of David Giffin. + +## +#Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+# +#Provides function(s) for dealing with Microsoft Excel ™ dates.
+## + +# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099) + +# The conversion from days to (year, month, day) starts with +# an integral "julian day number" aka JDN. +# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713. +# More importantly: +# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0 +# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0 +import datetime + +_JDN_delta = (2415080 - 61, 2416482 - 1) +assert _JDN_delta[1] - _JDN_delta[0] == 1462 + +# Pre-calculate the datetime epochs for efficiency. +epoch_1904 = datetime.datetime(1904, 1, 1) +epoch_1900 = datetime.datetime(1899, 12, 31) +epoch_1900_minus_1 = datetime.datetime(1899, 12, 30) + +class XLDateError(ValueError): pass + +class XLDateNegative(XLDateError): pass +class XLDateAmbiguous(XLDateError): pass +class XLDateTooLarge(XLDateError): pass +class XLDateBadDatemode(XLDateError): pass +class XLDateBadTuple(XLDateError): pass + +_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01 + +## +# Convert an Excel number (presumed to represent a date, a datetime or a time) into +# a tuple suitable for feeding to datetime or mx.DateTime constructors. +# @param xldate The Excel number +# @param datemode 0: 1900-based, 1: 1904-based. +#