CenterForOpenScience · jlward · May 16, 2016 · May 11, 2016 · May 11, 2016 · May 11, 2016
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,3 +1,8 @@
+**dev**
+
+- Textboxes have been implemented. We no longer lose the content inside of
+  them.
+
 **0.9.6**
 
 - Fixed issue in PyDocX CLI tool and added new test cases for the same

diff --git a/pydocx/export/base.py b/pydocx/export/base.py
@@ -15,7 +15,7 @@
     NumberingSpan,
     NumberingSpanBuilder,
 )
-from pydocx.openxml import wordprocessing, vml
+from pydocx.openxml import wordprocessing, vml, markup_compatibility
 from pydocx.openxml.packaging import WordprocessingDocument
 
 
@@ -67,6 +67,10 @@ def __init__(self, path):
             wordprocessing.EmbeddedObject: self.export_embedded_object,
             NumberingSpan: self.export_numbering_span,
             NumberingItem: self.export_numbering_item,
+            markup_compatibility.AlternateContent: self.export_alternate_content,
+            markup_compatibility.Fallback: self.export_fallback,
+            wordprocessing.Textbox: self.export_textbox,
+            wordprocessing.TxBxContent: self.export_textbox_content,
         }
         self.field_type_to_export_func_map = {
             'HYPERLINK': getattr(self, 'export_field_hyperlink', None),
@@ -531,3 +535,16 @@ def export_field_char(self, field_char):
 
     def export_field_code(self, field_code):
         pass
+
+    def export_textbox(self, textbox):
+        return self.yield_nested(textbox.children, self.export_node)
+
+    def export_textbox_content(self, textbox_content):
+        return self.yield_nested(textbox_content.children, self.export_node)
+
+    # Markup Compatibility exporters
+    def export_alternate_content(self, alternate_content):
+        return self.yield_nested(alternate_content.children, self.export_node)
+
+    def export_fallback(self, fallback):
+        return self.yield_nested(fallback.children, self.export_node)
diff --git a/pydocx/models.py b/pydocx/models.py
@@ -5,6 +5,7 @@
     unicode_literals,
 )
 
+import importlib
 import inspect
 from collections import defaultdict
 
@@ -120,9 +121,24 @@ class ParkingLot(XmlModel):
     def __init__(self, *types, **kwargs):
         default = kwargs.pop('default', [])
         super(XmlCollection, self).__init__(self, default=default)
-        self.types = set(types)
+        self._types = types
         self._name_to_type_map = None
 
+    @property
+    def types(self):
+        return set(self._set_types(*self._types))
+
+    def _set_types(self, *types):
+        base_path = 'pydocx.openxml.{0}'
+        for _type in types:
+            try:
+                path, klass, = _type.rsplit('.', 1)
+            except AttributeError:
+                yield _type
+            else:
+                module = importlib.import_module(base_path.format(path))
+                yield getattr(module, klass)
+
     @property
     def name_to_type_map(self):
         if self._name_to_type_map is None:

diff --git a/pydocx/openxml/markup_compatibility/__init__.py b/pydocx/openxml/markup_compatibility/__init__.py
@@ -0,0 +1,7 @@
+from pydocx.openxml.markup_compatibility.alternate_content import AlternateContent
+from pydocx.openxml.markup_compatibility.fallback import Fallback
+
+__all__ = [
+    'AlternateContent',
+    'Fallback',
+]
diff --git a/pydocx/openxml/markup_compatibility/alternate_content.py b/pydocx/openxml/markup_compatibility/alternate_content.py
@@ -0,0 +1,13 @@
+# coding: utf-8
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+)
+
+from pydocx.models import XmlModel, XmlCollection
+
+
+class AlternateContent(XmlModel):
+    XML_TAG = 'AlternateContent'
+    children = XmlCollection('markup_compatibility.Fallback')
diff --git a/pydocx/openxml/markup_compatibility/fallback.py b/pydocx/openxml/markup_compatibility/fallback.py
@@ -0,0 +1,13 @@
+# coding: utf-8
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+)
+
+from pydocx.models import XmlModel, XmlCollection
+
+
+class Fallback(XmlModel):
+    XML_TAG = 'Fallback'
+    children = XmlCollection('wordprocessing.Picture')
diff --git a/pydocx/openxml/vml/shape.py b/pydocx/openxml/vml/shape.py
@@ -6,14 +6,13 @@
 )
 
 from pydocx.models import XmlModel, XmlCollection, XmlAttribute
-from pydocx.openxml.vml.image_data import ImageData
 
 
 class Shape(XmlModel):
     XML_TAG = 'shape'
 
     style = XmlAttribute()
-    children = XmlCollection(ImageData)
+    children = XmlCollection('vml.ImageData', 'wordprocessing.Textbox')
 
     # TODO perhaps we could have a prepare_style, or clean_style convention?
 

diff --git a/pydocx/openxml/wordprocessing/__init__.py b/pydocx/openxml/wordprocessing/__init__.py
@@ -40,6 +40,7 @@
 from pydocx.openxml.wordprocessing.table_cell_properties import TableCellProperties  # noqa
 from pydocx.openxml.wordprocessing.table_row import TableRow
 from pydocx.openxml.wordprocessing.text import Text
+from pydocx.openxml.wordprocessing.textbox import Textbox, TxBxContent
 
 __all__ = [
     'AbstractNum',
@@ -82,4 +83,6 @@
     'TableCell',
     'TableRow',
     'Text',
+    'Textbox',
+    'TxBxContent',
 ]
diff --git a/pydocx/openxml/wordprocessing/deleted_run.py b/pydocx/openxml/wordprocessing/deleted_run.py
@@ -6,17 +6,14 @@
 )
 
 from pydocx.models import XmlModel, XmlCollection
-from pydocx.openxml.wordprocessing.run import Run
-from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun
 
 
 class DeletedRun(XmlModel):
     XML_TAG = 'del'
 
     children = XmlCollection(
-        Run,
-        SmartTagRun,
+        'wordprocessing.Run',
+        'wordprocessing.SmartTagRun',
+        'wordprocessing.DeletedRun',
         # TODO Needs InsertedRun
     )
-
-DeletedRun.children.types.add(DeletedRun)
diff --git a/pydocx/openxml/wordprocessing/inserted_run.py b/pydocx/openxml/wordprocessing/inserted_run.py
@@ -6,17 +6,14 @@
 )
 
 from pydocx.models import XmlModel, XmlCollection
-from pydocx.openxml.wordprocessing.run import Run
-from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun
 
 
 class InsertedRun(XmlModel):
     XML_TAG = 'ins'
 
     children = XmlCollection(
-        Run,
-        SmartTagRun,
+        'wordprocessing.Run',
+        'wordprocessing.SmartTagRun',
+        'wordprocessing.InsertedRun',
         # TODO Needs DeletedRun
     )
-
-InsertedRun.children.types.add(InsertedRun)
diff --git a/pydocx/openxml/wordprocessing/run.py b/pydocx/openxml/wordprocessing/run.py
@@ -8,18 +8,6 @@
 
 from pydocx.models import XmlModel, XmlCollection, XmlChild
 from pydocx.openxml.wordprocessing.run_properties import RunProperties
-from pydocx.openxml.wordprocessing.br import Break
-from pydocx.openxml.wordprocessing.drawing import Drawing
-from pydocx.openxml.wordprocessing.field_char import FieldChar
-from pydocx.openxml.wordprocessing.field_code import FieldCode
-from pydocx.openxml.wordprocessing.picture import Picture
-from pydocx.openxml.wordprocessing.no_break_hyphen import NoBreakHyphen
-from pydocx.openxml.wordprocessing.text import Text
-from pydocx.openxml.wordprocessing.tab_char import TabChar
-from pydocx.openxml.wordprocessing.deleted_text import DeletedText
-from pydocx.openxml.wordprocessing.footnote_reference import FootnoteReference
-from pydocx.openxml.wordprocessing.footnote_reference_mark import FootnoteReferenceMark
-from pydocx.openxml.wordprocessing.embedded_object import EmbeddedObject
 from pydocx.util.memoize import memoized
 
 
@@ -29,18 +17,19 @@ class Run(XmlModel):
     properties = XmlChild(type=RunProperties)
 
     children = XmlCollection(
-        EmbeddedObject,
-        TabChar,
-        Break,
-        NoBreakHyphen,
-        Text,
-        Drawing,
-        Picture,
-        DeletedText,
-        FootnoteReference,
-        FootnoteReferenceMark,
-        FieldChar,
-        FieldCode,
+        'wordprocessing.EmbeddedObject',
+        'wordprocessing.TabChar',
+        'wordprocessing.Break',
+        'wordprocessing.NoBreakHyphen',
+        'wordprocessing.Text',
+        'wordprocessing.Drawing',
+        'wordprocessing.Picture',
+        'wordprocessing.DeletedText',
+        'wordprocessing.FootnoteReference',
+        'wordprocessing.FootnoteReferenceMark',
+        'wordprocessing.FieldChar',
+        'wordprocessing.FieldCode',
+        'markup_compatibility.AlternateContent',
     )
 
     def get_style_chain_stack(self):

diff --git a/pydocx/openxml/wordprocessing/smart_tag_run.py b/pydocx/openxml/wordprocessing/smart_tag_run.py
@@ -6,14 +6,12 @@
 )
 
 from pydocx.models import XmlModel, XmlCollection
-from pydocx.openxml.wordprocessing.run import Run
 
 
 class SmartTagRun(XmlModel):
     XML_TAG = 'smartTag'
 
     children = XmlCollection(
-        Run,
+        'wordprocessing.Run',
+        'wordprocessing.SmartTagRun',
     )
-
-SmartTagRun.children.types.add(SmartTagRun)
diff --git a/pydocx/openxml/wordprocessing/table.py b/pydocx/openxml/wordprocessing/table.py
@@ -8,15 +8,13 @@
 from collections import defaultdict
 
 from pydocx.models import XmlModel, XmlCollection
-from pydocx.openxml.wordprocessing.table_cell import TableCell
-from pydocx.openxml.wordprocessing.table_row import TableRow
 
 
 class Table(XmlModel):
     XML_TAG = 'tbl'
 
     rows = XmlCollection(
-        TableRow,
+        'wordprocessing.TableRow',
     )
 
     def calculate_table_cell_spans(self):
@@ -46,7 +44,3 @@ def calculate_table_cell_spans(self):
                         if active_rowspan_for_column:
                             cell_to_rowspan_count[active_rowspan_for_column] += 1  # noqa
         return dict(cell_to_rowspan_count)
-
-
-# Python makes defining nested class hierarchies at the global level difficult
-TableCell.children.types.add(Table)
diff --git a/pydocx/openxml/wordprocessing/table_cell.py b/pydocx/openxml/wordprocessing/table_cell.py
@@ -6,7 +6,6 @@
 )
 
 from pydocx.models import XmlModel, XmlCollection, XmlChild
-from pydocx.openxml.wordprocessing.paragraph import Paragraph
 from pydocx.openxml.wordprocessing.table_cell_properties import TableCellProperties  # noqa
 
 
@@ -16,6 +15,6 @@ class TableCell(XmlModel):
     properties = XmlChild(type=TableCellProperties)
 
     children = XmlCollection(
-        Paragraph,
-        # Table is added in wordprocessing.table
+        'wordprocessing.Paragraph',
+        'wordprocessing.Table',
     )
diff --git a/pydocx/openxml/wordprocessing/textbox.py b/pydocx/openxml/wordprocessing/textbox.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+)
+
+from pydocx.models import XmlModel, XmlCollection
+
+
+class TxBxContent(XmlModel):
+    XML_TAG = 'txbxContent'
+    children = XmlCollection(
+        'wordprocessing.Paragraph',
+    )
+
+
+class Textbox(XmlModel):
+    XML_TAG = 'textbox'
+
+    children = XmlCollection(
+        TxBxContent,
+    )
diff --git a/tests/export/test_docx.py b/tests/export/test_docx.py
@@ -24,34 +24,35 @@ def convert(path, *args, **kwargs):
 
 class ConvertDocxToHtmlTestCase(DocXFixtureTestCaseFactory):
     cases = (
-        'read_same_image_multiple_times',
         'all_configured_styles',
+        'export_from_googledocs',
+        'external_image',
+        'has_missing_image',
+        'has_missing_image',
         'has_title',
         'inline_tags',
-        'has_missing_image',
         'justification',
         'list_in_table',
-        'external_image',
-        'export_from_googledocs',
-        'has_missing_image',
         'lists_with_styles',
         'missing_numbering',
         'missing_style',
         'nested_lists',
         'nested_table_rowspan',
         'nested_tables',
         'no_break_hyphen',
+        'read_same_image_multiple_times',
+        'rotate_image',
         'shift_enter',
         'simple',
         'simple_lists',
         'simple_table',
         'special_chars',
         'styled_bolding',
         'table_col_row_span',
+        'table_with_multi_rowspan',
         'tables_in_lists',
+        'textbox',
         'track_changes_on',
-        'table_with_multi_rowspan',
-        'rotate_image'
     )
 
     @raises(MalformedDocxException)

diff --git a/tests/fixtures/textbox.docx b/tests/fixtures/textbox.docx
diff --git a/tests/fixtures/textbox.html b/tests/fixtures/textbox.html
@@ -0,0 +1,5 @@
+<p>
+<p>AAA</p>
+<p>BBB</p>
+<p>CCCDDD</p>
+</p>
diff --git a/tox.ini b/tox.ini
@@ -12,6 +12,7 @@ commands =
 deps =
   -rrequirements/testing.txt
   defusedxml: defusedxml==0.4.1
+  py26: importlib
 
 [testenv:docs]
 commands =