From 1f252b96a1dfc1b5307c97f989731eb5c4c6fcb7 Mon Sep 17 00:00:00 2001 From: = Date: Fri, 1 Mar 2019 16:14:04 -0800 Subject: [PATCH 1/3] Adding support for afChunks and form fields --- docx/__init__.py | 3 + docx/oxml/__init__.py | 38 ++++++- docx/oxml/document.py | 9 +- docx/oxml/text/form.py | 249 +++++++++++++++++++++++++++++++++++++++++ docx/parts/altchunk.py | 56 +++++++++ 5 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 docx/oxml/text/form.py create mode 100644 docx/parts/altchunk.py diff --git a/docx/__init__.py b/docx/__init__.py index 33e15e31a..56da241e5 100644 --- a/docx/__init__.py +++ b/docx/__init__.py @@ -14,12 +14,15 @@ from docx.parts.document import DocumentPart from docx.parts.hdrftr import FooterPart, HeaderPart from docx.parts.image import ImagePart +from docx.parts.altchunk import AltchunkPart from docx.parts.numbering import NumberingPart from docx.parts.settings import SettingsPart from docx.parts.styles import StylesPart def part_class_selector(content_type, reltype): + if reltype == RT.A_F_CHUNK: + return AltchunkPart if reltype == RT.IMAGE: return ImagePart return None diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 093c1b45b..8600f613a 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -72,9 +72,10 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): from .coreprops import CT_CoreProperties # noqa register_element_cls('cp:coreProperties', CT_CoreProperties) -from .document import CT_Body, CT_Document # noqa +from .document import CT_Body, CT_Document, CT_altChunk # noqa register_element_cls('w:body', CT_Body) register_element_cls('w:document', CT_Document) +register_element_cls("w:altChunk", CT_altChunk) from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr # noqa register_element_cls('w:abstractNumId', CT_DecimalNumber) @@ -246,3 +247,38 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls('w:br', CT_Br) register_element_cls('w:r', CT_R) register_element_cls('w:t', CT_Text) + +from .text.form import ( + CT_SimpleField, + CT_FldChar, + ST_FldCharType, + CT_FFData, + CT_FFDDList, + CT_FFCheckBox, + CT_FFTextInput, + CT_FFTextType, +) +register_element_cls('w:name', CT_String) +register_element_cls('w:label', CT_DecimalNumber) +register_element_cls('w:entryMacro', CT_String) +register_element_cls('w:exitMacro', CT_String) +register_element_cls('w:helpText', CT_String) +register_element_cls('w:StatusText', CT_String) # This is not technically correct. See 17.16.31 +register_element_cls("w:checkBox", CT_FFCheckBox) +register_element_cls("w:ddList", CT_FFDDList) +register_element_cls("w:textInput", CT_FFTextInput) +register_element_cls('w:label', CT_DecimalNumber) +register_element_cls('w:fldChar', CT_FldChar) +register_element_cls('w:ffData', CT_FFData) +register_element_cls('w:instrText', CT_Text) +register_element_cls('w:default', CT_String) +register_element_cls('w:result', CT_String) +register_element_cls('w:listEntry', CT_String) +register_element_cls('w:result', CT_DecimalNumber) +register_element_cls('w:label', CT_DecimalNumber) +register_element_cls('w:tabIndex', CT_DecimalNumber) +register_element_cls('w:checked', CT_OnOff) +register_element_cls('w:maxLength', CT_DecimalNumber) +register_element_cls('w:tabIndex', CT_DecimalNumber) +register_element_cls('w:format', CT_String) +register_element_cls('w:type', CT_FFTextType) diff --git a/docx/oxml/document.py b/docx/oxml/document.py index 4211b8ed1..f025e8ca1 100644 --- a/docx/oxml/document.py +++ b/docx/oxml/document.py @@ -5,7 +5,8 @@ . """ -from .xmlchemy import BaseOxmlElement, ZeroOrOne, ZeroOrMore +from .simpletypes import XsdString +from .xmlchemy import BaseOxmlElement, ZeroOrOne, ZeroOrMore, RequiredAttribute class CT_Document(BaseOxmlElement): @@ -31,6 +32,7 @@ class CT_Body(BaseOxmlElement): p = ZeroOrMore('w:p', successors=('w:sectPr',)) tbl = ZeroOrMore('w:tbl', successors=('w:sectPr',)) sectPr = ZeroOrOne('w:sectPr', successors=()) + altchunk = ZeroOrMore('w:altChunk', successors=()) def add_section_break(self): """Return `w:sectPr` element for new section added at end of document. @@ -65,3 +67,8 @@ def clear_content(self): content_elms = self[:] for content_elm in content_elms: self.remove(content_elm) + + +class CT_altChunk(BaseOxmlElement): + """`w:altChunk` element""" + rId = RequiredAttribute('r:id', XsdString) diff --git a/docx/oxml/text/form.py b/docx/oxml/text/form.py new file mode 100644 index 000000000..bc05f1fa8 --- /dev/null +++ b/docx/oxml/text/form.py @@ -0,0 +1,249 @@ +""" +Implementation of the following forms elements + +FORMCHECKBOX (§17.16.5.20), +FORMDROPDOWN (§17.16.5.21), +FORMTEXT (§17.16.5.22) + +""" + +from ..simpletypes import XsdUnsignedInt, XsdString, ST_OnOff, XsdStringEnumeration + +from ..xmlchemy import ( + BaseOxmlElement, + OptionalAttribute, RequiredAttribute, + OneAndOnlyOne, OneOrMore, ZeroOrMore, ZeroOrOne, ZeroOrOneChoice +) + + +# ------------------------------ +# Simple Field +# ------------------------------ + +class CT_SimpleField(BaseOxmlElement): + """ + 17.16.19 fldSimple (Simple Field) + + w_CT_SimpleField = + attribute w:instr { s_ST_String }, + attribute w:fldLock { s_ST_OnOff }?, + attribute w:dirty { s_ST_OnOff }?, + w_EG_PContent* + + + + + + + + + + """ + fldLock = OptionalAttribute('w:fldLock',ST_OnOff) + dirty = OptionalAttribute('w:dirty',ST_OnOff) + instr = OptionalAttribute('w:instr',XsdString) + + +# ------------------------------ +# Complex Field +# ------------------------------ + + +class ST_FldCharType(XsdStringEnumeration): + """ + Valid values for attribute + + + + + + + + + + """ + BEGIN = 'begin' + SEPARATE = 'separate' + END = 'end' + + _members = (BEGIN, SEPARATE, END) + + +class CT_FldChar(BaseOxmlElement): + """ + 17.16.18 fldChar (Complex Field) + + As well, because a complex field can specify both its field codes and its + current result within the document, these two items are separated by the + optional separator character, which defines the end of the field codes and + the beginning of the field contents. The omission of this character shall + be used to specify that the contents of the field are entirely field codes + (i.e. the field has no result). + + + + + + + + + + """ + ffData = ZeroOrOne("w:ffData") + + fldCharType = RequiredAttribute('w:val', ST_FldCharType) + fldLock = OptionalAttribute('w:fldLock', ST_OnOff) + dirty = OptionalAttribute('w:dirty', ST_OnOff) + + +class CT_FFData(BaseOxmlElement): + """ + 17.16.17 ffData (Form Field Properties) + + + + + + + + + + + + + + + + + + + + """ + + name = ZeroOrOne("w:name") + label = ZeroOrOne("w:label") + tabIndex = ZeroOrOne("w:tabIndex") + enabled = ZeroOrOne("w:enabled") + calcOnExit = ZeroOrOne("w:calcOnExit") + entryMacro = ZeroOrOne("w:entryMacro") + exitMacro = ZeroOrOne("w:exitMacro") + helpText = ZeroOrOne("w:helpText") + statusText = ZeroOrOne("w:statusText") + checkBox = ZeroOrOne("w:checkBox") + ddList = ZeroOrOne("w:ddList") + textInput = ZeroOrOne("w:textInput") + +# ------------------------------ +# Drop Down List +# ------------------------------ + +class CT_FFDDList(BaseOxmlElement): + """ + 17.16.9 ddList (Drop-Down List Form Field Properties) + + + + + + + + + + """ + default = ZeroOrOne("w:default") + result = ZeroOrOne("w:result") + listEntry = OneOrMore("w:listEntry") + +# ------------------------------ +# Check Box +# ------------------------------ + +class CT_FFCheckBox(BaseOxmlElement): # noqa + """ + 17.16.7 checkBox (Checkbox Form Field Properties) + + + + + + + + + + + + """ + + checked = OneAndOnlyOne('w:checked') + default = ZeroOrOne('w:default') + # NOTE: ignoring size and sizeAuto for now + + +# ------------------------------ +# Text Input +# ------------------------------ + +class CT_FFTextInput(BaseOxmlElement): # noqa + """ + 17.16.33 textInput (Text Box Form Field Properties) + + + + + + + + + + """ + + type_ = ZeroOrOne('w:type') + default = ZeroOrOne('w:default') + format_ = ZeroOrOne('w:format') + + + +class ST_FFTextType(XsdStringEnumeration): + """ + Valid values for attribute + + + + + + + + + + + + """ + + REGULAR = "regular" + NUMBER = "number" + DATE = "date" + CURRENTTIME = "currentTime" + CURRENTDATE = "currentDate" + CALCULATED = "calculated" + + _members = (REGULAR, NUMBER, DATE, CURRENTTIME, CURRENTDATE, CALCULATED) + + +class CT_FFTextType(BaseOxmlElement): # noqa + """ + Used for ```` and ```` elements and others, + containing a style name in its ``val`` attribute. + """ + val = RequiredAttribute('w:val', ST_FFTextType) + +#-- @classmethod +#-- def new(cls, nsptagname, val): +#-- """ +#-- Return a new ``CT_String`` element with tagname *nsptagname* and +#-- ``val`` attribute set to *val*. +#-- """ +#-- elm = OxmlElement(nsptagname) +#-- elm.val = val +#-- return elm + + diff --git a/docx/parts/altchunk.py b/docx/parts/altchunk.py new file mode 100644 index 000000000..8241cae2a --- /dev/null +++ b/docx/parts/altchunk.py @@ -0,0 +1,56 @@ +# encoding: utf-8 + +"""The |Altchunk| and closely objects""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +from docx import Document +from ..opc.part import Part +from io import BytesIO + +class AltchunkPart(Part): + """AltChunkPart for word document + + An AltChunk is a nested word document + """ + def __init__(self, partname, content_type, element, package): + super(AltchunkPart, self).__init__( + partname, content_type, package=package + ) + self._element = element + + @property + def blob(self): + stream = BytesIO() + self._element.save(stream) + return stream.getvalue() + + @property + def element(self): + """ + The root XML element of this XML part. + """ + return self._element + + @classmethod + def load(cls, partname, content_type, blob, package): + # TODO: is this a good place to catch the XMLSyntaxError and try + # parsing as a ZipFile? Perhaps this should be overridden int he + # document_part class definition since is seems to be specific to + # document chunks??? + + element = Document(BytesIO(blob)) + # alternatively docx.api.Package.open(BytesIO(blob)) + # Aside: element.part.package.main_document_part is element.part -> True + return cls(partname, content_type, element, package) + + @property + def part(self): + """ + Part of the parent protocol, "children" of the document will not know + the part that contains them so must ask their parent object. That + chain of delegation ends here for child objects. + """ + return self + + From e020b59ff6a066e1ff2c74d770bd7eade854aa11 Mon Sep 17 00:00:00 2001 From: Jason Thorpe Date: Tue, 26 Mar 2019 14:42:27 -0700 Subject: [PATCH 2/3] cleaning up comments --- docx/oxml/__init__.py | 15 +++- docx/oxml/numbering.py | 24 +++++ docx/oxml/text/form.py | 198 +++++++---------------------------------- docx/parts/altchunk.py | 15 +--- 4 files changed, 74 insertions(+), 178 deletions(-) diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 8600f613a..f6e33a14d 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -77,7 +77,9 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls('w:document', CT_Document) register_element_cls("w:altChunk", CT_altChunk) -from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr # noqa +from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr, CT_Lvl, CT_AbstractNum # noqa +register_element_cls('w:abstractNum', CT_AbstractNum) +register_element_cls('w:lvl', CT_Lvl) register_element_cls('w:abstractNumId', CT_DecimalNumber) register_element_cls('w:ilvl', CT_DecimalNumber) register_element_cls('w:lvlOverride', CT_NumLvl) @@ -86,6 +88,12 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls('w:numPr', CT_NumPr) register_element_cls('w:numbering', CT_Numbering) register_element_cls('w:startOverride', CT_DecimalNumber) +register_element_cls("w:start", CT_DecimalNumber) +register_element_cls("w:numFmt", CT_String) +register_element_cls("w:lvlRestart", CT_DecimalNumber) +register_element_cls("w:suff", CT_String) +register_element_cls("w:lvlText", CT_String) +register_element_cls("w:lvlJc", CT_String) from .section import ( # noqa CT_HdrFtr, @@ -173,6 +181,8 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls('w:tblLayout', CT_TblLayoutType) register_element_cls('w:tblPr', CT_TblPr) register_element_cls('w:tblStyle', CT_String) +register_element_cls('w:tblCaption', CT_String) +register_element_cls('w:tblDescription', CT_String) register_element_cls('w:tc', CT_Tc) register_element_cls('w:tcPr', CT_TcPr) register_element_cls('w:tcW', CT_TblWidth) @@ -276,8 +286,9 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls('w:listEntry', CT_String) register_element_cls('w:result', CT_DecimalNumber) register_element_cls('w:label', CT_DecimalNumber) -register_element_cls('w:tabIndex', CT_DecimalNumber) register_element_cls('w:checked', CT_OnOff) +register_element_cls('w:enabled', CT_OnOff) +register_element_cls('w:calcOnExit', CT_OnOff) register_element_cls('w:maxLength', CT_DecimalNumber) register_element_cls('w:tabIndex', CT_DecimalNumber) register_element_cls('w:format', CT_String) diff --git a/docx/oxml/numbering.py b/docx/oxml/numbering.py index aeedfa9a0..43c7c07f9 100644 --- a/docx/oxml/numbering.py +++ b/docx/oxml/numbering.py @@ -129,3 +129,27 @@ def _next_numId(self): if num not in num_ids: break return num + + +class CT_AbstractNum(BaseOxmlElement): + """ + ```` element, which represents a concrete list definition + """ + abstractNumId = OneAndOnlyOne('w:abstractNumId') + lvl = ZeroOrOne('w:lvl') + + +class CT_Lvl(BaseOxmlElement): + """ + ```` element, which represents a numbering level definition + """ + ilvl = RequiredAttribute("w:ilvl", ST_DecimalNumber) + start = ZeroOrOne("w:start") # register as ST_DecimalNumber + numFmt = ZeroOrOne("w:numFmt") # register as ST_String + pStyle = ZeroOrOne("w:pStyle") + lvlRestart = ZeroOrOne("w:lvlRestart") # register as ST_DecimalNumber + suff = ZeroOrOne("w:suff") # register as ST_String + lvlText = ZeroOrOne("w:lvlText") # register as ST_String + lvlJc = ZeroOrOne("w:lvlJc") # register as ST_String + pPr = ZeroOrOne("w:pPr") + rPr = ZeroOrOne("w:rPr") diff --git a/docx/oxml/text/form.py b/docx/oxml/text/form.py index bc05f1fa8..978014dbc 100644 --- a/docx/oxml/text/form.py +++ b/docx/oxml/text/form.py @@ -4,120 +4,53 @@ FORMCHECKBOX (§17.16.5.20), FORMDROPDOWN (§17.16.5.21), FORMTEXT (§17.16.5.22) - """ -from ..simpletypes import XsdUnsignedInt, XsdString, ST_OnOff, XsdStringEnumeration - +from ..simpletypes import XsdString, ST_OnOff, XsdStringEnumeration from ..xmlchemy import ( - BaseOxmlElement, - OptionalAttribute, RequiredAttribute, - OneAndOnlyOne, OneOrMore, ZeroOrMore, ZeroOrOne, ZeroOrOneChoice + BaseOxmlElement, + OptionalAttribute, + RequiredAttribute, + OneOrMore, + ZeroOrOne, ) -# ------------------------------ -# Simple Field -# ------------------------------ - class CT_SimpleField(BaseOxmlElement): """ - 17.16.19 fldSimple (Simple Field) - - w_CT_SimpleField = - attribute w:instr { s_ST_String }, - attribute w:fldLock { s_ST_OnOff }?, - attribute w:dirty { s_ST_OnOff }?, - w_EG_PContent* - - - - - - - - - + ```` element """ - fldLock = OptionalAttribute('w:fldLock',ST_OnOff) - dirty = OptionalAttribute('w:dirty',ST_OnOff) - instr = OptionalAttribute('w:instr',XsdString) + fldLock = OptionalAttribute("w:fldLock", ST_OnOff) + dirty = OptionalAttribute("w:dirty", ST_OnOff) + instr = OptionalAttribute("w:instr", XsdString) -# ------------------------------ -# Complex Field -# ------------------------------ - class ST_FldCharType(XsdStringEnumeration): """ Valid values for attribute - - - - - - - - - """ - BEGIN = 'begin' - SEPARATE = 'separate' - END = 'end' + BEGIN = "begin" + SEPARATE = "separate" + END = "end" _members = (BEGIN, SEPARATE, END) - + class CT_FldChar(BaseOxmlElement): """ - 17.16.18 fldChar (Complex Field) - - As well, because a complex field can specify both its field codes and its - current result within the document, these two items are separated by the - optional separator character, which defines the end of the field codes and - the beginning of the field contents. The omission of this character shall - be used to specify that the contents of the field are entirely field codes - (i.e. the field has no result). - - - - - - - - - + The ```` element """ - ffData = ZeroOrOne("w:ffData") - fldCharType = RequiredAttribute('w:val', ST_FldCharType) - fldLock = OptionalAttribute('w:fldLock', ST_OnOff) - dirty = OptionalAttribute('w:dirty', ST_OnOff) + ffData = ZeroOrOne("w:ffData") + fldCharType = RequiredAttribute("w:fldCharType", ST_FldCharType) + fldLock = OptionalAttribute("w:fldLock", ST_OnOff) + dirty = OptionalAttribute("w:dirty", ST_OnOff) class CT_FFData(BaseOxmlElement): """ - 17.16.17 ffData (Form Field Properties) - - - - - - - - - - - - - - - - - - - + The ```` element """ name = ZeroOrOne("w:name") @@ -133,90 +66,39 @@ class CT_FFData(BaseOxmlElement): ddList = ZeroOrOne("w:ddList") textInput = ZeroOrOne("w:textInput") -# ------------------------------ -# Drop Down List -# ------------------------------ class CT_FFDDList(BaseOxmlElement): """ - 17.16.9 ddList (Drop-Down List Form Field Properties) - - - - - - - - - + The ```` element. """ + default = ZeroOrOne("w:default") result = ZeroOrOne("w:result") listEntry = OneOrMore("w:listEntry") -# ------------------------------ -# Check Box -# ------------------------------ -class CT_FFCheckBox(BaseOxmlElement): # noqa +class CT_FFCheckBox(BaseOxmlElement): # noqa """ - 17.16.7 checkBox (Checkbox Form Field Properties) - - - - - - - - - - - + The ``FFCheckBox`` element. """ - checked = OneAndOnlyOne('w:checked') - default = ZeroOrOne('w:default') - # NOTE: ignoring size and sizeAuto for now - + checked = ZeroOrOne("w:checked") + default = ZeroOrOne("w:default") -# ------------------------------ -# Text Input -# ------------------------------ -class CT_FFTextInput(BaseOxmlElement): # noqa +class CT_FFTextInput(BaseOxmlElement): # noqa """ - 17.16.33 textInput (Text Box Form Field Properties) - - - - - - - - - + The '''' element """ - type_ = ZeroOrOne('w:type') - default = ZeroOrOne('w:default') - format_ = ZeroOrOne('w:format') - + type_ = ZeroOrOne("w:type") + default = ZeroOrOne("w:default") + format_ = ZeroOrOne("w:format") class ST_FFTextType(XsdStringEnumeration): """ - Valid values for attribute - - - - - - - - - - - + The ``fldCharType`` attribute of elements """ REGULAR = "regular" @@ -225,25 +107,13 @@ class ST_FFTextType(XsdStringEnumeration): CURRENTTIME = "currentTime" CURRENTDATE = "currentDate" CALCULATED = "calculated" - _members = (REGULAR, NUMBER, DATE, CURRENTTIME, CURRENTDATE, CALCULATED) -class CT_FFTextType(BaseOxmlElement): # noqa +class CT_FFTextType(BaseOxmlElement): # noqa """ Used for ```` and ```` elements and others, containing a style name in its ``val`` attribute. """ - val = RequiredAttribute('w:val', ST_FFTextType) - -#-- @classmethod -#-- def new(cls, nsptagname, val): -#-- """ -#-- Return a new ``CT_String`` element with tagname *nsptagname* and -#-- ``val`` attribute set to *val*. -#-- """ -#-- elm = OxmlElement(nsptagname) -#-- elm.val = val -#-- return elm - + val = RequiredAttribute("w:val", ST_FFTextType) diff --git a/docx/parts/altchunk.py b/docx/parts/altchunk.py index 8241cae2a..6e3fdef4f 100644 --- a/docx/parts/altchunk.py +++ b/docx/parts/altchunk.py @@ -1,12 +1,12 @@ # encoding: utf-8 -"""The |Altchunk| and closely objects""" +"""The |Altchunk| and closely related objects""" from __future__ import absolute_import, division, print_function, unicode_literals +from io import BytesIO from docx import Document from ..opc.part import Part -from io import BytesIO class AltchunkPart(Part): """AltChunkPart for word document @@ -34,14 +34,7 @@ def element(self): @classmethod def load(cls, partname, content_type, blob, package): - # TODO: is this a good place to catch the XMLSyntaxError and try - # parsing as a ZipFile? Perhaps this should be overridden int he - # document_part class definition since is seems to be specific to - # document chunks??? - - element = Document(BytesIO(blob)) - # alternatively docx.api.Package.open(BytesIO(blob)) - # Aside: element.part.package.main_document_part is element.part -> True + element = Document(BytesIO(blob)) return cls(partname, content_type, element, package) @property @@ -52,5 +45,3 @@ def part(self): chain of delegation ends here for child objects. """ return self - - From 48eb2612db00f4ead03d4aee0bcbc58ff5eef015 Mon Sep 17 00:00:00 2001 From: Jason Thorpe Date: Tue, 26 Mar 2019 15:08:42 -0700 Subject: [PATCH 3/3] addign encoding declaration --- docx/oxml/text/form.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docx/oxml/text/form.py b/docx/oxml/text/form.py index 978014dbc..ef643e3cd 100644 --- a/docx/oxml/text/form.py +++ b/docx/oxml/text/form.py @@ -1,3 +1,4 @@ +# encoding: utf-8 """ Implementation of the following forms elements