From ed0a145a8cacef5867b784d57605ba41f40105ea Mon Sep 17 00:00:00 2001
From: John Stewart <cane.cubo@gmail.com>
Date: Fri, 10 Jan 2020 21:43:14 -0500
Subject: [PATCH] Doctests for Dependency Tree and more properties on Doc (#33)

* adapted the dependency tree class to CLTKv1

* working on not reloading the full stanford pipeline on every process call

* cache stanford NLP objects in wrapper class

* fixed doctests for Stanford process

* automatically reformatted files

* implemented the pipeline pattern, and extraction of sentential structure of input text in the Stanford process

* implemented true pipelines and sentence extraction for stanfordNLP

* :

* moved code out of __init__.py

* added doctests and properties of Doc

* fixed trailing whitespace

* repaired governor and parent references in word; created a core package for essential classes; normalized POS and morpho features attributes of words

* interrupted infinite recursion in parent token
---
 poetry.lock                                  | 124 +++++++++----------
 src/cltkv1/core/__init__.py                  |   2 +
 src/cltkv1/{utils => core}/data_types.py     |  90 +++++++++-----
 src/cltkv1/{utils => core}/exceptions.py     |  18 +--
 src/cltkv1/dependency/tree.py                | 102 ++++++++-------
 src/cltkv1/languages/glottolog.py            |   2 +-
 src/cltkv1/{utils => languages}/pipelines.py |  12 +-
 src/cltkv1/languages/utils.py                |   8 +-
 src/cltkv1/nlp.py                            |  38 +++---
 src/cltkv1/tokenizers/word.py                |  11 +-
 src/cltkv1/utils/__init__.py                 |   1 -
 src/cltkv1/utils/example_texts.py            |   6 +-
 src/cltkv1/wrappers/stanford.py              |  54 ++++----
 13 files changed, 259 insertions(+), 209 deletions(-)
 create mode 100644 src/cltkv1/core/__init__.py
 rename src/cltkv1/{utils => core}/data_types.py (65%)
 rename src/cltkv1/{utils => core}/exceptions.py (59%)
 rename src/cltkv1/{utils => languages}/pipelines.py (92%)

diff --git a/poetry.lock b/poetry.lock
index 503c851..fb1dd74 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -266,7 +266,7 @@ marker = "python_version < \"3.8\""
 name = "importlib-metadata"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
-version = "1.3.0"
+version = "1.4.0"
 
 [package.dependencies]
 zipp = ">=0.5"
@@ -452,7 +452,7 @@ description = "Node.js virtual environment builder"
 name = "nodeenv"
 optional = false
 python-versions = "*"
-version = "1.3.3"
+version = "1.3.4"
 
 [[package]]
 category = "dev"
@@ -468,7 +468,7 @@ description = "NumPy is the fundamental package for array computing with Python.
 name = "numpy"
 optional = false
 python-versions = ">=3.5"
-version = "1.18.0"
+version = "1.18.1"
 
 [[package]]
 category = "dev"
@@ -695,8 +695,8 @@ category = "main"
 description = "YAML parser and emitter for Python"
 name = "pyyaml"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-version = "5.2"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+version = "5.3"
 
 [[package]]
 category = "main"
@@ -704,7 +704,7 @@ description = "Alternative regular expression module, to replace re."
 name = "regex"
 optional = false
 python-versions = "*"
-version = "2019.12.20"
+version = "2020.1.8"
 
 [[package]]
 category = "main"
@@ -1191,8 +1191,8 @@ imagesize = [
     {file = "imagesize-1.2.0.tar.gz", hash = "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1"},
 ]
 importlib-metadata = [
-    {file = "importlib_metadata-1.3.0-py2.py3-none-any.whl", hash = "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f"},
-    {file = "importlib_metadata-1.3.0.tar.gz", hash = "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45"},
+    {file = "importlib_metadata-1.4.0-py2.py3-none-any.whl", hash = "sha256:bdd9b7c397c273bcc9a11d6629a38487cd07154fa255a467bf704cd2c258e359"},
+    {file = "importlib_metadata-1.4.0.tar.gz", hash = "sha256:f17c015735e1a88296994c0697ecea7e11db24290941983b08c9feb30921e6d8"},
 ]
 ipython = [
     {file = "ipython-7.11.1-py3-none-any.whl", hash = "sha256:387686dd7fc9caf29d2fddcf3116c4b07a11d9025701d220c589a430b0171d8a"},
@@ -1325,7 +1325,7 @@ nltk = [
     {file = "nltk-3.4.5.zip", hash = "sha256:bed45551259aa2101381bbdd5df37d44ca2669c5c3dad72439fa459b29137d94"},
 ]
 nodeenv = [
-    {file = "nodeenv-1.3.3.tar.gz", hash = "sha256:ad8259494cf1c9034539f6cced78a1da4840a4b157e23640bc4a0c0546b0cb7a"},
+    {file = "nodeenv-1.3.4-py2.py3-none-any.whl", hash = "sha256:561057acd4ae3809e665a9aaaf214afff110bbb6a6d5c8a96121aea6878408b3"},
 ]
 nose = [
     {file = "nose-1.3.7-py2-none-any.whl", hash = "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a"},
@@ -1333,27 +1333,27 @@ nose = [
     {file = "nose-1.3.7.tar.gz", hash = "sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98"},
 ]
 numpy = [
-    {file = "numpy-1.18.0-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:b091e5d4cbbe79f0e8b6b6b522346e54a282eadb06e3fd761e9b6fafc2ca91ad"},
-    {file = "numpy-1.18.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:443ab93fc35b31f01db8704681eb2fd82f3a1b2fa08eed2dd0e71f1f57423d4a"},
-    {file = "numpy-1.18.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:88c5ccbc4cadf39f32193a5ef22e3f84674418a9fd877c63322917ae8f295a56"},
-    {file = "numpy-1.18.0-cp35-cp35m-win32.whl", hash = "sha256:e1080e37c090534adb2dd7ae1c59ee883e5d8c3e63d2a4d43c20ee348d0459c5"},
-    {file = "numpy-1.18.0-cp35-cp35m-win_amd64.whl", hash = "sha256:f084d513de729ff10cd72a1f80db468cff464fedb1ef2fea030221a0f62d7ff4"},
-    {file = "numpy-1.18.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1baefd1fb4695e7f2e305467dbd876d765e6edd30c522894df76f8301efaee36"},
-    {file = "numpy-1.18.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:cc070fc43a494e42732d6ae2f6621db040611c1dde64762a40c8418023af56d7"},
-    {file = "numpy-1.18.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:6f8113c8dbfc192b58996ee77333696469ea121d1c44ea429d8fd266e4c6be51"},
-    {file = "numpy-1.18.0-cp36-cp36m-win32.whl", hash = "sha256:a30f5c3e1b1b5d16ec1f03f4df28e08b8a7529d8c920bbed657f4fde61f1fbcd"},
-    {file = "numpy-1.18.0-cp36-cp36m-win_amd64.whl", hash = "sha256:3c68c827689ca0ca713dba598335073ce0966850ec0b30715527dce4ecd84055"},
-    {file = "numpy-1.18.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f6a7421da632fc01e8a3ecd19c3f7350258d82501a646747664bae9c6a87c731"},
-    {file = "numpy-1.18.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:905cd6fa6ac14654a6a32b21fad34670e97881d832e24a3ca32e19b455edb4a8"},
-    {file = "numpy-1.18.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:854f6ed4fa91fa6da5d764558804ba5b0f43a51e5fe9fc4fdc93270b052f188a"},
-    {file = "numpy-1.18.0-cp37-cp37m-win32.whl", hash = "sha256:ac3cf835c334fcc6b74dc4e630f9b5ff7b4c43f7fb2a7813208d95d4e10b5623"},
-    {file = "numpy-1.18.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62506e9e4d2a39c87984f081a2651d4282a1d706b1a82fe9d50a559bb58e705a"},
-    {file = "numpy-1.18.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9d6de2ad782aae68f7ed0e0e616477fbf693d6d7cc5f0f1505833ff12f84a673"},
-    {file = "numpy-1.18.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1c35fb1131362e6090d30286cfda52ddd42e69d3e2bf1fea190a0fad83ea3a18"},
-    {file = "numpy-1.18.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:56710a756c5009af9f35b91a22790701420406d9ac24cf6b652b0e22cfbbb7ff"},
-    {file = "numpy-1.18.0-cp38-cp38-win32.whl", hash = "sha256:03bbde29ac8fba860bb2c53a1525b3604a9b60417855ac3119d89868ec6041c3"},
-    {file = "numpy-1.18.0-cp38-cp38-win_amd64.whl", hash = "sha256:712f0c32555132f4b641b918bdb1fd3c692909ae916a233ce7f50eac2de87e37"},
-    {file = "numpy-1.18.0.zip", hash = "sha256:a9d72d9abaf65628f0f31bbb573b7d9304e43b1e6bbae43149c17737a42764c4"},
+    {file = "numpy-1.18.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:20b26aaa5b3da029942cdcce719b363dbe58696ad182aff0e5dcb1687ec946dc"},
+    {file = "numpy-1.18.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:70a840a26f4e61defa7bdf811d7498a284ced303dfbc35acb7be12a39b2aa121"},
+    {file = "numpy-1.18.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:17aa7a81fe7599a10f2b7d95856dc5cf84a4eefa45bc96123cbbc3ebc568994e"},
+    {file = "numpy-1.18.1-cp35-cp35m-win32.whl", hash = "sha256:f3d0a94ad151870978fb93538e95411c83899c9dc63e6fb65542f769568ecfa5"},
+    {file = "numpy-1.18.1-cp35-cp35m-win_amd64.whl", hash = "sha256:1786a08236f2c92ae0e70423c45e1e62788ed33028f94ca99c4df03f5be6b3c6"},
+    {file = "numpy-1.18.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ae0975f42ab1f28364dcda3dde3cf6c1ddab3e1d4b2909da0cb0191fa9ca0480"},
+    {file = "numpy-1.18.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:cf7eb6b1025d3e169989416b1adcd676624c2dbed9e3bcb7137f51bfc8cc2572"},
+    {file = "numpy-1.18.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b765ed3930b92812aa698a455847141869ef755a87e099fddd4ccf9d81fffb57"},
+    {file = "numpy-1.18.1-cp36-cp36m-win32.whl", hash = "sha256:2d75908ab3ced4223ccba595b48e538afa5ecc37405923d1fea6906d7c3a50bc"},
+    {file = "numpy-1.18.1-cp36-cp36m-win_amd64.whl", hash = "sha256:9acdf933c1fd263c513a2df3dceecea6f3ff4419d80bf238510976bf9bcb26cd"},
+    {file = "numpy-1.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:56bc8ded6fcd9adea90f65377438f9fea8c05fcf7c5ba766bef258d0da1554aa"},
+    {file = "numpy-1.18.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e422c3152921cece8b6a2fb6b0b4d73b6579bd20ae075e7d15143e711f3ca2ca"},
+    {file = "numpy-1.18.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b3af02ecc999c8003e538e60c89a2b37646b39b688d4e44d7373e11c2debabec"},
+    {file = "numpy-1.18.1-cp37-cp37m-win32.whl", hash = "sha256:d92350c22b150c1cae7ebb0ee8b5670cc84848f6359cf6b5d8f86617098a9b73"},
+    {file = "numpy-1.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:77c3bfe65d8560487052ad55c6998a04b654c2fbc36d546aef2b2e511e760971"},
+    {file = "numpy-1.18.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c98c5ffd7d41611407a1103ae11c8b634ad6a43606eca3e2a5a269e5d6e8eb07"},
+    {file = "numpy-1.18.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9537eecf179f566fd1c160a2e912ca0b8e02d773af0a7a1120ad4f7507cd0d26"},
+    {file = "numpy-1.18.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e840f552a509e3380b0f0ec977e8124d0dc34dc0e68289ca28f4d7c1d0d79474"},
+    {file = "numpy-1.18.1-cp38-cp38-win32.whl", hash = "sha256:590355aeade1a2eaba17617c19edccb7db8d78760175256e3cf94590a1a964f3"},
+    {file = "numpy-1.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:39d2c685af15d3ce682c99ce5925cc66efc824652e10990d2462dfe9b8918c6a"},
+    {file = "numpy-1.18.1.zip", hash = "sha256:b6ff59cee96b454516e47e7721098e6ceebef435e3e21ac2d6c3b8b02628eb77"},
 ]
 packaging = [
     {file = "packaging-20.0-py2.py3-none-any.whl", hash = "sha256:aec3fdbb8bc9e4bb65f0634b9f551ced63983a529d6a8931817d52fdd0816ddb"},
@@ -1485,40 +1485,40 @@ pyuca = [
     {file = "pyuca-1.2.tar.gz", hash = "sha256:8a382fe74627f08c0d18908c0713ca4a20aad5385f077579e56208beea2893b2"},
 ]
 pyyaml = [
-    {file = "PyYAML-5.2-cp27-cp27m-win32.whl", hash = "sha256:35ace9b4147848cafac3db142795ee42deebe9d0dad885ce643928e88daebdcc"},
-    {file = "PyYAML-5.2-cp27-cp27m-win_amd64.whl", hash = "sha256:ebc4ed52dcc93eeebeae5cf5deb2ae4347b3a81c3fa12b0b8c976544829396a4"},
-    {file = "PyYAML-5.2-cp35-cp35m-win32.whl", hash = "sha256:38a4f0d114101c58c0f3a88aeaa44d63efd588845c5a2df5290b73db8f246d15"},
-    {file = "PyYAML-5.2-cp35-cp35m-win_amd64.whl", hash = "sha256:483eb6a33b671408c8529106df3707270bfacb2447bf8ad856a4b4f57f6e3075"},
-    {file = "PyYAML-5.2-cp36-cp36m-win32.whl", hash = "sha256:7f38e35c00e160db592091751d385cd7b3046d6d51f578b29943225178257b31"},
-    {file = "PyYAML-5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:0e7f69397d53155e55d10ff68fdfb2cf630a35e6daf65cf0bdeaf04f127c09dc"},
-    {file = "PyYAML-5.2-cp37-cp37m-win32.whl", hash = "sha256:e4c015484ff0ff197564917b4b4246ca03f411b9bd7f16e02a2f586eb48b6d04"},
-    {file = "PyYAML-5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:4b6be5edb9f6bb73680f5bf4ee08ff25416d1400fbd4535fe0069b2994da07cd"},
-    {file = "PyYAML-5.2-cp38-cp38-win32.whl", hash = "sha256:8100c896ecb361794d8bfdb9c11fce618c7cf83d624d73d5ab38aef3bc82d43f"},
-    {file = "PyYAML-5.2-cp38-cp38-win_amd64.whl", hash = "sha256:2e9f0b7c5914367b0916c3c104a024bb68f269a486b9d04a2e8ac6f6597b7803"},
-    {file = "PyYAML-5.2.tar.gz", hash = "sha256:c0ee8eca2c582d29c3c2ec6e2c4f703d1b7f1fb10bc72317355a746057e7346c"},
+    {file = "PyYAML-5.3-cp27-cp27m-win32.whl", hash = "sha256:940532b111b1952befd7db542c370887a8611660d2b9becff75d39355303d82d"},
+    {file = "PyYAML-5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:059b2ee3194d718896c0ad077dd8c043e5e909d9180f387ce42012662a4946d6"},
+    {file = "PyYAML-5.3-cp35-cp35m-win32.whl", hash = "sha256:4fee71aa5bc6ed9d5f116327c04273e25ae31a3020386916905767ec4fc5317e"},
+    {file = "PyYAML-5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:dbbb2379c19ed6042e8f11f2a2c66d39cceb8aeace421bfc29d085d93eda3689"},
+    {file = "PyYAML-5.3-cp36-cp36m-win32.whl", hash = "sha256:e3a057b7a64f1222b56e47bcff5e4b94c4f61faac04c7c4ecb1985e18caa3994"},
+    {file = "PyYAML-5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:74782fbd4d4f87ff04159e986886931456a1894c61229be9eaf4de6f6e44b99e"},
+    {file = "PyYAML-5.3-cp37-cp37m-win32.whl", hash = "sha256:24521fa2890642614558b492b473bee0ac1f8057a7263156b02e8b14c88ce6f5"},
+    {file = "PyYAML-5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:1cf708e2ac57f3aabc87405f04b86354f66799c8e62c28c5fc5f88b5521b2dbf"},
+    {file = "PyYAML-5.3-cp38-cp38-win32.whl", hash = "sha256:70024e02197337533eef7b85b068212420f950319cc8c580261963aefc75f811"},
+    {file = "PyYAML-5.3-cp38-cp38-win_amd64.whl", hash = "sha256:cb1f2f5e426dc9f07a7681419fe39cee823bb74f723f36f70399123f439e9b20"},
+    {file = "PyYAML-5.3.tar.gz", hash = "sha256:e9f45bd5b92c7974e59bcd2dcc8631a6b6cc380a904725fce7bc08872e691615"},
 ]
 regex = [
-    {file = "regex-2019.12.20-cp27-cp27m-win32.whl", hash = "sha256:7bbbdbada3078dc360d4692a9b28479f569db7fc7f304b668787afc9feb38ec8"},
-    {file = "regex-2019.12.20-cp27-cp27m-win_amd64.whl", hash = "sha256:a83049eb717ae828ced9cf607845929efcb086a001fc8af93ff15c50012a5716"},
-    {file = "regex-2019.12.20-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:27d1bd20d334f50b7ef078eba0f0756a640fd25f5f1708d3b5bed18a5d6bced9"},
-    {file = "regex-2019.12.20-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1768cf42a78a11dae63152685e7a1d90af7a8d71d2d4f6d2387edea53a9e0588"},
-    {file = "regex-2019.12.20-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:4850c78b53acf664a6578bba0e9ebeaf2807bb476c14ec7e0f936f2015133cae"},
-    {file = "regex-2019.12.20-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:78b3712ec529b2a71731fbb10b907b54d9c53a17ca589b42a578bc1e9a2c82ea"},
-    {file = "regex-2019.12.20-cp36-cp36m-win32.whl", hash = "sha256:8d9ef7f6c403e35e73b7fc3cde9f6decdc43b1cb2ff8d058c53b9084bfcb553e"},
-    {file = "regex-2019.12.20-cp36-cp36m-win_amd64.whl", hash = "sha256:faad39fdbe2c2ccda9846cd21581063086330efafa47d87afea4073a08128656"},
-    {file = "regex-2019.12.20-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:adc35d38952e688535980ae2109cad3a109520033642e759f987cf47fe278aa1"},
-    {file = "regex-2019.12.20-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ef0b828a7e22e58e06a1cceddba7b4665c6af8afeb22a0d8083001330572c147"},
-    {file = "regex-2019.12.20-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:0e6cf1e747f383f52a0964452658c04300a9a01e8a89c55ea22813931b580aa8"},
-    {file = "regex-2019.12.20-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:032fdcc03406e1a6485ec09b826eac78732943840c4b29e503b789716f051d8d"},
-    {file = "regex-2019.12.20-cp37-cp37m-win32.whl", hash = "sha256:77ae8d926f38700432807ba293d768ba9e7652df0cbe76df2843b12f80f68885"},
-    {file = "regex-2019.12.20-cp37-cp37m-win_amd64.whl", hash = "sha256:c29a77ad4463f71a506515d9ec3a899ed026b4b015bf43245c919ff36275444b"},
-    {file = "regex-2019.12.20-cp38-cp38-manylinux1_i686.whl", hash = "sha256:57eacd38a5ec40ed7b19a968a9d01c0d977bda55664210be713e750dd7b33540"},
-    {file = "regex-2019.12.20-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:724eb24b92fc5fdc1501a1b4df44a68b9c1dda171c8ef8736799e903fb100f63"},
-    {file = "regex-2019.12.20-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d508875793efdf6bab3d47850df8f40d4040ae9928d9d80864c1768d6aeaf8e3"},
-    {file = "regex-2019.12.20-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:cfd31b3300fefa5eecb2fe596c6dee1b91b3a05ece9d5cfd2631afebf6c6fadd"},
-    {file = "regex-2019.12.20-cp38-cp38-win32.whl", hash = "sha256:29b20f66f2e044aafba86ecf10a84e611b4667643c42baa004247f5dfef4f90b"},
-    {file = "regex-2019.12.20-cp38-cp38-win_amd64.whl", hash = "sha256:d3ee0b035816e0520fac928de31b6572106f0d75597f6fa3206969a02baba06f"},
-    {file = "regex-2019.12.20.tar.gz", hash = "sha256:106e25a841921d8259dcef2a42786caae35bc750fb996f830065b3dfaa67b77e"},
+    {file = "regex-2020.1.8-cp27-cp27m-win32.whl", hash = "sha256:4e8f02d3d72ca94efc8396f8036c0d3bcc812aefc28ec70f35bb888c74a25161"},
+    {file = "regex-2020.1.8-cp27-cp27m-win_amd64.whl", hash = "sha256:e6c02171d62ed6972ca8631f6f34fa3281d51db8b326ee397b9c83093a6b7242"},
+    {file = "regex-2020.1.8-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4eae742636aec40cf7ab98171ab9400393360b97e8f9da67b1867a9ee0889b26"},
+    {file = "regex-2020.1.8-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:bd25bb7980917e4e70ccccd7e3b5740614f1c408a642c245019cff9d7d1b6149"},
+    {file = "regex-2020.1.8-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:3e77409b678b21a056415da3a56abfd7c3ad03da71f3051bbcdb68cf44d3c34d"},
+    {file = "regex-2020.1.8-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:07b39bf943d3d2fe63d46281d8504f8df0ff3fe4c57e13d1656737950e53e525"},
+    {file = "regex-2020.1.8-cp36-cp36m-win32.whl", hash = "sha256:23e2c2c0ff50f44877f64780b815b8fd2e003cda9ce817a7fd00dea5600c84a0"},
+    {file = "regex-2020.1.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27429b8d74ba683484a06b260b7bb00f312e7c757792628ea251afdbf1434003"},
+    {file = "regex-2020.1.8-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0e182d2f097ea8549a249040922fa2b92ae28be4be4895933e369a525ba36576"},
+    {file = "regex-2020.1.8-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e3cd21cc2840ca67de0bbe4071f79f031c81418deb544ceda93ad75ca1ee9f7b"},
+    {file = "regex-2020.1.8-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:ecc6de77df3ef68fee966bb8cb4e067e84d4d1f397d0ef6fce46913663540d77"},
+    {file = "regex-2020.1.8-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:26ff99c980f53b3191d8931b199b29d6787c059f2e029b2b0c694343b1708c35"},
+    {file = "regex-2020.1.8-cp37-cp37m-win32.whl", hash = "sha256:7bcd322935377abcc79bfe5b63c44abd0b29387f267791d566bbb566edfdd146"},
+    {file = "regex-2020.1.8-cp37-cp37m-win_amd64.whl", hash = "sha256:10671601ee06cf4dc1bc0b4805309040bb34c9af423c12c379c83d7895622bb5"},
+    {file = "regex-2020.1.8-cp38-cp38-manylinux1_i686.whl", hash = "sha256:98b8ed7bb2155e2cbb8b76f627b2fd12cf4b22ab6e14873e8641f266e0fb6d8f"},
+    {file = "regex-2020.1.8-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6a6ba91b94427cd49cd27764679024b14a96874e0dc638ae6bdd4b1a3ce97be1"},
+    {file = "regex-2020.1.8-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:6a6ae17bf8f2d82d1e8858a47757ce389b880083c4ff2498dba17c56e6c103b9"},
+    {file = "regex-2020.1.8-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:0932941cdfb3afcbc26cc3bcf7c3f3d73d5a9b9c56955d432dbf8bbc147d4c5b"},
+    {file = "regex-2020.1.8-cp38-cp38-win32.whl", hash = "sha256:d58e4606da2a41659c84baeb3cfa2e4c87a74cec89a1e7c56bee4b956f9d7461"},
+    {file = "regex-2020.1.8-cp38-cp38-win_amd64.whl", hash = "sha256:e7c7661f7276507bce416eaae22040fd91ca471b5b33c13f8ff21137ed6f248c"},
+    {file = "regex-2020.1.8.tar.gz", hash = "sha256:d0f424328f9822b0323b3b6f2e4b9c90960b24743d220763c7f07071e0778351"},
 ]
 requests = [
     {file = "requests-2.22.0-py2.py3-none-any.whl", hash = "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"},
diff --git a/src/cltkv1/core/__init__.py b/src/cltkv1/core/__init__.py
new file mode 100644
index 0000000..4f02e41
--- /dev/null
+++ b/src/cltkv1/core/__init__.py
@@ -0,0 +1,2 @@
+from .data_types import *
+from .exceptions import *
diff --git a/src/cltkv1/utils/data_types.py b/src/cltkv1/core/data_types.py
similarity index 65%
rename from src/cltkv1/utils/data_types.py
rename to src/cltkv1/core/data_types.py
index e7b4741..9821121 100644
--- a/src/cltkv1/utils/data_types.py
+++ b/src/cltkv1/core/data_types.py
@@ -2,15 +2,15 @@
 of the NLP pipeline.
 
 
->>> from cltkv1.utils.data_types import Language
->>> from cltkv1.utils.data_types import Word
->>> from cltkv1.utils.data_types import Process
->>> from cltkv1.utils.data_types import Doc
->>> from cltkv1.utils.data_types import Pipeline
+>>> from cltkv1.core.data_types import Language
+>>> from cltkv1.core.data_types import Word
+>>> from cltkv1.core.data_types import Process
+>>> from cltkv1.core.data_types import Doc
+>>> from cltkv1.core.data_types import Pipeline
 """
 
 from dataclasses import dataclass
-from typing import Any, Callable, List, Type, Union
+from typing import Any, Callable, Dict, List, Type, Union
 
 
 @dataclass
@@ -20,7 +20,7 @@ class Language:
     ``cltkv1.lagnuages.glottolog.LANGUAGES`` May be extended by
     user for dialects or languages not documented by ISO 639-3.
 
-    >>> from cltkv1.utils.data_types import Language
+    >>> from cltkv1.core.data_types import Language
     >>> from cltkv1.languages.utils import get_lang
     >>> latin = get_lang("lat")
     >>> isinstance(latin, Language)
@@ -46,14 +46,14 @@ class Word:
     """Contains attributes of each processed word in a list of
     words. Designed to be used in the ``Doc.words`` dataclass.
 
-    >>> from cltkv1.utils.data_types import Word
+    >>> from cltkv1.core.data_types import Word
     >>> from cltkv1.utils.example_texts import get_example_text
     >>> get_example_text("lat")[:25]
     'Gallia est omnis divisa i'
     >>> from cltkv1.languages.utils import get_lang
     >>> latin = get_lang("lat")
     >>> Word(index_char_start=0, index_char_stop=6, index_token=0, string=get_example_text("lat")[0:6], pos="nom")
-    Word(index_char_start=0, index_char_stop=6, index_token=0, index_sentence=None, string='Gallia', pos='nom', lemma=None, scansion=None, xpos=None, upos=None, dependency_relation=None, governor=None, parent_token=None, feats=None)
+    Word(index_char_start=0, index_char_stop=6, index_token=0, index_sentence=None, string='Gallia', pos='nom', lemma=None, scansion=None, xpos=None, upos=None, dependency_relation=None, governor=None, parent=None, features=None)
     """
 
     index_char_start: int = None
@@ -67,9 +67,9 @@ class Word:
     xpos: str = None  # treebank-specific POS tag (from stanfordnlp)
     upos: str = None  # universal POS tag (from stanfordnlp)
     dependency_relation: str = None  # (from stanfordnlp)
-    governor: str = None  # (from stanfordnlp)
-    parent_token: str = None  # (from stanfordnlp)
-    feats: str = None  # morphological features (from stanfordnlp)
+    governor: "Word" = None
+    parent: "Word" = None
+    features: Dict[str, str] = None  # morphological features (from stanfordnlp)
 
 
 @dataclass
@@ -89,38 +89,66 @@ class Doc:
     True
     """
 
-    indices_sentences: List[List[int]] = None
-    indices_tokens: List[List[int]] = None
     language: str = None
     words: List[Word] = None
-    pipeline: List["Process"] = None
+    pipeline: "Pipeline" = None
     raw: str = None
 
     @property
-    def sentences(self):
-        return [
-            [self.words[token_index] for token_index in sentence]
-            for sentence in self.indices_tokens
-        ]
+    def sentences(self) -> List[List[Word]]:
+        sentences = {}
+        for word in self.words:
+            sentence = sentences.get(word.index_sentence, {})
+            sentence[word.index_token] = word
+            sentences[word.index_sentence] = sentence
 
-    @property
-    def tokens_list(self) -> List[str]:
-        """Returns a list of string word tokens.
+        sorted_values = lambda dict: [x[1] for x in sorted(dict.items())]
+
+        return [sorted_values(sentence) for sentence in sorted_values(sentences)]
 
-        TODO: Why does ``Doc.tokens`` fail?
+    def _get_words_attribute(self, attribute):
+        return [getattr(word, attribute) for word in self.words]
+
+    @property
+    def tokens(self) -> List[str]:
+        """Returns a list of string word tokens of all words in the doc.
 
         >>> from cltkv1 import NLP
         >>> from cltkv1.utils.example_texts import get_example_text
         >>> cltk_nlp = NLP(language="lat")
-        >>> cltk_nlp.language.name
-        'Latin'
-        >>> isinstance(cltk_nlp.language, Language)
-        True
         >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("lat"))
-        >>> cltk_doc.tokens_list[:10]
+        >>> cltk_doc.tokens[:10]
         ['Gallia', 'est', 'omnis', 'divisa', 'in', 'partes', 'tres', ',', 'quarum', 'unam']
         """
-        return [word_obj.string for word_obj in self.words]
+        return self._get_words_attribute("string")
+
+    @property
+    def pos(self) -> List[str]:
+        """Returns a list of the POS tags of all words in the doc.
+
+        >>> from cltkv1 import NLP
+        >>> from cltkv1.utils.example_texts import get_example_text
+        >>> cltk_nlp = NLP(language="lat")
+        >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("lat"))
+        >>> cltk_doc.pos[:3]
+        ['NOUN', 'AUX', 'DET']
+        """
+        return self._get_words_attribute("upos")
+
+    @property
+    def morphosyntactic_features(self) -> Dict[str, str]:
+        """Returns a list of dictionaries containing the morphosyntactic features
+        of each word (when available).
+        Each dictionary specifies feature names as keys and feature values as values.
+
+        >>> from cltkv1 import NLP
+        >>> from cltkv1.utils.example_texts import get_example_text
+        >>> cltk_nlp = NLP(language="lat")
+        >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("lat"))
+        >>> cltk_doc.morphosyntactic_features[:3]
+        [{'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'}, {'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'}, {'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing', 'PronType': 'Ind'}]
+        """
+        return self._get_words_attribute("features")
 
 
 @dataclass
@@ -164,7 +192,7 @@ class Pipeline:
 
     # TODO: Consider adding a Unicode normalization as a default first Process
 
-    >>> from cltkv1.utils.data_types import Process, Pipeline
+    >>> from cltkv1.core.data_types import Process, Pipeline
     >>> from cltkv1.languages.utils import get_lang
     >>> from cltkv1.tokenizers import LatinTokenizationProcess
     >>> a_pipeline = Pipeline(description="A custom Latin pipeline", processes=[LatinTokenizationProcess], language=get_lang("lat"))
diff --git a/src/cltkv1/utils/exceptions.py b/src/cltkv1/core/exceptions.py
similarity index 59%
rename from src/cltkv1/utils/exceptions.py
rename to src/cltkv1/core/exceptions.py
index 28f4895..f305734 100644
--- a/src/cltkv1/utils/exceptions.py
+++ b/src/cltkv1/core/exceptions.py
@@ -4,13 +4,13 @@
 class CLTKException(Exception):
     """Exception class for the ``cltkv1`` library.
 
-    >>> from cltkv1.utils.exceptions import CLTKException
+    >>> from cltkv1.core.exceptions import CLTKException
     >>> raise CLTKException
     Traceback (most recent call last):
       ...
-      File "<doctest cltkv1.utils.exceptions.CLTKException[1]>", line 1, in <module>
+      File "<doctest cltkv1.core.exceptions.CLTKException[1]>", line 1, in <module>
         raise CLTKException
-    cltkv1.utils.exceptions.CLTKException
+    cltkv1.core.exceptions.CLTKException
     """
 
 
@@ -18,13 +18,13 @@ class UnimplementedLanguageError(CLTKException):
     """Exception for when a language is supported by the CLTK however
     a particular process is not available for that language.
 
-    >>> from cltkv1.utils.exceptions import UnimplementedLanguageError
+    >>> from cltkv1.core.exceptions import UnimplementedLanguageError
     >>> raise UnimplementedLanguageError
     Traceback (most recent call last):
       ...
-      File "<doctest cltkv1.utils.exceptions.UnimplementedLanguageError[1]>", line 1, in <module>
+      File "<doctest cltkv1.core.exceptions.UnimplementedLanguageError[1]>", line 1, in <module>
         raise UnimplementedLanguageError
-    cltkv1.utils.exceptions.UnimplementedLanguageError
+    cltkv1.core.exceptions.UnimplementedLanguageError
     """
 
 
@@ -34,11 +34,11 @@ class UnknownLanguageError(CLTKException):
 
     TODO: Mk separate exceptions for unknown lang vs unimplemented process for a known lang
 
-    >>> from cltkv1.utils.exceptions import UnknownLanguageError
+    >>> from cltkv1.core.exceptions import UnknownLanguageError
     >>> raise UnknownLanguageError
     Traceback (most recent call last):
       ...
-      File "<doctest cltkv1.utils.exceptions.UnknownLanguageError[1]>", line 1, in <module>
+      File "<doctest cltkv1.core.exceptions.UnknownLanguageError[1]>", line 1, in <module>
         raise UnknownLanguageError
-    cltkv1.utils.exceptions.UnknownLanguageError
+    cltkv1.core.exceptions.UnknownLanguageError
     """
diff --git a/src/cltkv1/dependency/tree.py b/src/cltkv1/dependency/tree.py
index 019e275..0c72b68 100644
--- a/src/cltkv1/dependency/tree.py
+++ b/src/cltkv1/dependency/tree.py
@@ -5,7 +5,7 @@
 from typing import List, Union
 from xml.etree.ElementTree import Element, ElementTree
 
-from cltkv1.utils.data_types import Doc, Process, Word
+from cltkv1.core.data_types import Doc, Process, Word
 
 
 class Form(Element):
@@ -69,17 +69,17 @@ def get_dependencies(self, relation: str) -> List["Dependency"]:
         """Extract dependents of this form for the specified
         dependency relation.
 
-        TODO: Add doctest for ``Form.get_dependencies()``
-
         >>> john = Form('John', 1) / 'NNP'
-        >>> john
-        John_1/NNP
         >>> loves = Form('loves', 2) / 'VRB'
-        >>> loves
-        loves_2/VRB
         >>> mary = Form('Mary', 3) / 'NNP'
-        >>> mary
-        Mary_3/NNP
+        >>> loves >> john | 'subj'
+        subj(loves_2/VRB, John_1/NNP)
+        >>> loves >> mary | 'obj'
+        obj(loves_2/VRB, Mary_3/NNP)
+        >>> loves.get_dependencies('subj')
+        [subj(loves_2/VRB, John_1/NNP)]
+        >>> loves.get_dependencies('obj')
+        [obj(loves_2/VRB, Mary_3/NNP)]
         """
         deps = self.findall('*[@relation="{}"]'.format(relation))
         return [Dependency(self, dep, relation) for dep in deps]
@@ -99,12 +99,15 @@ def full_str(self, include_relation=True) -> str:
         The ID is attached to the text, and the relation is
         optionally suppressed.
 
-        TODO: Make this test more meaningful. KJ couldn't get the ``desc_form.full_str()`` to equal the target.
+        >>> loves = Form('loves', 2) / 'VRB'
+        >>> loves.full_str()
+        'loves_2 [pos=VRB]'
+        >>> john = Form('John', 1) / 'NNP'
+        >>> loves >> john | 'subj'
+        subj(loves_2/VRB, John_1/NNP)
+        >>> john.full_str(True)
+        'John_1 [pos=NNP,relation=subj]'
 
-        >>> f = Form
-        >>> desc_form = f('described')
-        >>> type(desc_form.full_str())
-        <class 'str'>
         """
         excluded = ["form_id", "relation"] if not include_relation else ["form_id"]
         return "{0}_{1} [{2}]".format(
@@ -141,19 +144,24 @@ def to_form(word: Word) -> "Form":
         form.set("upos", word.upos)
         form.set("xpos", word.xpos)
 
-        if word.feats != "_":
-            for f in word.feats.split("|"):
-                feature = f.split("=")
-                form.set(feature[0], feature[1])
+        for (feature_name, feature_value) in word.features.items():
+            form.set(feature_name, feature_value)
 
         return form
 
 
 class Dependency:
-    """The relationship (or edge) between a hierarchical
-    and subordinate Node.
+    """The asymmetric binary relationship (or edge) between a governing
+    Form (the "head") and a subordinate Form (the "dependent").
+
+    In principle the relationship could capture any form-to-form relation
+    that the systems deems of interest, be it syntactic, semantic, or discursive.
 
-    TODO: Explain this better.
+    If the `relation` attribute is not speficied, then the dependency simply states
+    that there's some asymmetric relationship between the head and the dependenent.
+    This is an *untyped* dependency.
+
+    For a *typed* dependency, a string value is supplied for the `relation` attribute.
     """
 
     def __init__(self, head: Form, dep: Form, relation: str = None) -> None:
@@ -182,43 +190,43 @@ def __init__(self, root: Form) -> None:
 
         ElementTree.__init__(self, root)
 
-    def _get_deps(self, node: Form, deps: List[Dependency]) -> List[Dependency]:
-        """
-        TODO: Add docstring and doctests
-        TODO: What is difference btw this and ``DependencyTree.get_dependencies()``?
-        """
-        for child_node in list(node):
-            deps = self._get_deps(child_node, deps)
-            deps.extend(node.get_dependencies(child_node("relation")))
-        return deps
-
     def get_dependencies(self) -> List[Dependency]:
         """Returns a list of all the dependency relations in the tree,
         generated by depth-first search.
 
-        TODO: Add doctests
-        """
-        deps = self._get_deps(self.getroot(), [])
-        deps.append(Dependency(None, self.getroot(), "root"))
-        return deps
-
-    def _print_treelet(self, node: Form, indent: int, all_features: bool):
+        >>> from cltkv1 import NLP
+        >>> from cltkv1.utils.example_texts import get_example_text
+        >>> cltk_nlp = NLP(language="lat")
+        >>> doc = cltk_nlp.analyze(text=get_example_text("lat"))
+        >>> t = DependencyTree.to_tree(doc.sentences[0])
+        >>> len(t.get_dependencies())
+        30
         """
 
-        TODO: Add docstring and doctest
-        """
-        edge = "└─ " if indent > 0 else ""
-        node_str = node.full_str(False) if all_features else str(node)
-        print(" " * indent + edge + node("relation") + " | " + node_str)
+        def _get_deps(node: Form, deps: List[Dependency]) -> List[Dependency]:
+            for child_node in list(node):
+                deps = _get_deps(child_node, deps)
+                deps.extend(node.get_dependencies(child_node("relation")))
+            return deps
 
-        for child_node in list(node):
-            self._print_treelet(child_node, indent + 4, all_features)
+        deps = _get_deps(self.getroot(), [])
+        deps.append(Dependency(None, self.getroot(), "root"))
+        return deps
 
     def print_tree(self, all_features: bool = True):
         """Prints a pretty-printed (indented) representation
         of the dependency tree. If all_features is True, then
-        each node is printed with its complete feature bundle.
+        each node is printed with its complete feature bundles.
         """
+
+        def _print_treelet(node: Form, indent: int, all_features: bool):
+            edge = "└─ " if indent > 0 else ""
+            node_str = node.full_str(False) if all_features else str(node)
+            print(" " * indent + edge + node("relation") + " | " + node_str)
+
+            for child_node in list(node):
+                _print_treelet(child_node, indent + 4, all_features)
+
         self._print_treelet(self.getroot(), indent=0, all_features=all_features)
 
     @staticmethod
@@ -241,7 +249,7 @@ def to_tree(sentence: List[Word]) -> "DependencyTree":
             if word.dependency_relation == "root":
                 root = forms[word.index_token]
             else:
-                gov = forms[word.governor]
+                gov = forms[word.governor.index_token]
                 dep = forms[word.index_token]
                 gov >> dep | word.dependency_relation
 
diff --git a/src/cltkv1/languages/glottolog.py b/src/cltkv1/languages/glottolog.py
index 2e98a8c..85a3ccc 100644
--- a/src/cltkv1/languages/glottolog.py
+++ b/src/cltkv1/languages/glottolog.py
@@ -253,7 +253,7 @@
 from collections import OrderedDict
 from typing import List
 
-from cltkv1.utils.data_types import Language
+from cltkv1.core.data_types import Language
 
 LANGUAGES = OrderedDict(
     [
diff --git a/src/cltkv1/utils/pipelines.py b/src/cltkv1/languages/pipelines.py
similarity index 92%
rename from src/cltkv1/utils/pipelines.py
rename to src/cltkv1/languages/pipelines.py
index 96cc343..c2f9aa6 100644
--- a/src/cltkv1/utils/pipelines.py
+++ b/src/cltkv1/languages/pipelines.py
@@ -9,9 +9,9 @@
 from dataclasses import dataclass, field
 from typing import Callable, List, Type
 
+from cltkv1.core.data_types import Language, Pipeline, Process
 from cltkv1.languages.utils import get_lang
 from cltkv1.tokenizers import DefaultTokenizationProcess, LatinTokenizationProcess
-from cltkv1.utils.data_types import Language, Pipeline, Process
 from cltkv1.wrappers.stanford import StanfordNLPProcess
 
 
@@ -19,7 +19,7 @@
 class LatinPipeline(Pipeline):
     """Default ``Pipeline`` for Latin.
 
-    >>> from cltkv1.utils.pipelines import LatinPipeline
+    >>> from cltkv1.languages.pipelines import LatinPipeline
     >>> a_pipeline = LatinPipeline()
     >>> a_pipeline.description
     'Pipeline for the Latin language'
@@ -40,7 +40,7 @@ class LatinPipeline(Pipeline):
 class GreekPipeline(Pipeline):
     """Default ``Pipeline`` for Ancient Greek.
 
-    >>> from cltkv1.utils.pipelines import GreekPipeline
+    >>> from cltkv1.languages.pipelines import GreekPipeline
     >>> a_pipeline = GreekPipeline()
     >>> a_pipeline.description
     'Pipeline for the Greek language'
@@ -61,7 +61,7 @@ class GreekPipeline(Pipeline):
 class OCSPipeline(Pipeline):
     """Default ``Pipeline`` for Old Church Slavonic.
 
-    >>> from cltkv1.utils.pipelines import OCSPipeline
+    >>> from cltkv1.languages.pipelines import OCSPipeline
     >>> a_pipeline = OCSPipeline()
     >>> a_pipeline.description
     'Pipeline for the Old Church Slavonic language'
@@ -82,7 +82,7 @@ class OCSPipeline(Pipeline):
 class OldFrenchPipeline(Pipeline):
     """Default ``Pipeline`` for Old French.
 
-    >>> from cltkv1.utils.pipelines import OldFrenchPipeline
+    >>> from cltkv1.languages.pipelines import OldFrenchPipeline
     >>> a_pipeline = OldFrenchPipeline()
     >>> a_pipeline.description
     'Pipeline for the Old French language'
@@ -103,7 +103,7 @@ class OldFrenchPipeline(Pipeline):
 class GothicPipeline(Pipeline):
     """Default ``Pipeline`` for Gothic.
 
-    >>> from cltkv1.utils.pipelines import GothicPipeline
+    >>> from cltkv1.languages.pipelines import GothicPipeline
     >>> a_pipeline = GothicPipeline()
     >>> a_pipeline.description
     'Pipeline for the Gothic language'
diff --git a/src/cltkv1/languages/utils.py b/src/cltkv1/languages/utils.py
index 6c0b7fe..84f9207 100644
--- a/src/cltkv1/languages/utils.py
+++ b/src/cltkv1/languages/utils.py
@@ -1,8 +1,8 @@
 from typing import List
 
+from cltkv1.core.data_types import Language
+from cltkv1.core.exceptions import UnknownLanguageError
 from cltkv1.languages.glottolog import LANGUAGES
-from cltkv1.utils.data_types import Language
-from cltkv1.utils.exceptions import UnknownLanguageError
 
 
 def get_lang(iso_code: str) -> Language:
@@ -12,11 +12,11 @@ def get_lang(iso_code: str) -> Language:
     >>> from cltkv1.languages.utils import get_lang
     >>> get_lang("akk")
     Language(name='Akkadian', glottolog_id='akka1240', latitude=33.1, longitude=44.1, dates=[], family_id='afro1255', parent_id='east2678', level='language', iso_639_3_code='akk', type='a')
-    >>> from cltkv1.utils.exceptions import UnknownLanguageError
+    >>> from cltkv1.core.exceptions import UnknownLanguageError
     >>> get_lang("xxx")
     Traceback (most recent call last):
       ...
-    cltkv1.utils.exceptions.UnknownLanguageError
+    cltkv1.core.exceptions.UnknownLanguageError
     """
     try:
         return LANGUAGES[iso_code]
diff --git a/src/cltkv1/nlp.py b/src/cltkv1/nlp.py
index fc011b7..7da9207 100644
--- a/src/cltkv1/nlp.py
+++ b/src/cltkv1/nlp.py
@@ -2,16 +2,16 @@
 
 from typing import List
 
-from cltkv1.languages.utils import get_lang
-from cltkv1.utils.data_types import Doc, Language, Pipeline, Type
-from cltkv1.utils.exceptions import UnimplementedLanguageError, UnknownLanguageError
-from cltkv1.utils.pipelines import (
+from cltkv1.core.data_types import Doc, Language, Pipeline, Type
+from cltkv1.core.exceptions import UnimplementedLanguageError, UnknownLanguageError
+from cltkv1.languages.pipelines import (
     GothicPipeline,
     GreekPipeline,
     LatinPipeline,
     OCSPipeline,
     OldFrenchPipeline,
 )
+from cltkv1.languages.utils import get_lang
 
 pipelines = {
     "lat": LatinPipeline,
@@ -35,8 +35,8 @@ def __init__(self, language: str, custom_pipeline: Pipeline = None) -> None:
         >>> NLP(language="xxx")
         Traceback (most recent call last):
           ...
-        cltkv1.utils.exceptions.UnknownLanguageError: Unknown language 'xxx'. Use ISO 639-3 languages.
-        >>> from cltkv1.utils.data_types import Pipeline
+        cltkv1.core.exceptions.UnknownLanguageError: Unknown language 'xxx'. Use ISO 639-3 languages.
+        >>> from cltkv1.core.data_types import Pipeline
         >>> from cltkv1.tokenizers import LatinTokenizationProcess
         >>> from cltkv1.languages.utils import get_lang
         >>> a_pipeline = Pipeline(description="A custom Latin pipeline", processes=[LatinTokenizationProcess], language=get_lang("lat"))
@@ -58,7 +58,7 @@ def _get_pipeline(self) -> Pipeline:
         are valid, both in themselves and in unison.
 
         >>> from cltkv1 import NLP
-        >>> from cltkv1.utils.data_types import Pipeline
+        >>> from cltkv1.core.data_types import Pipeline
         >>> cltk_nlp = NLP(language="lat")
         >>> lat_pipeline = cltk_nlp._get_pipeline()
         >>> isinstance(cltk_nlp.pipeline, Pipeline)
@@ -68,7 +68,7 @@ def _get_pipeline(self) -> Pipeline:
         >>> cltk_nlp = NLP(language="axm")
         Traceback (most recent call last):
           ...
-        cltkv1.utils.exceptions.UnimplementedLanguageError: axm
+        cltkv1.core.exceptions.UnimplementedLanguageError: axm
         """
         try:
             return pipelines[self.language.iso_639_3_code]()
@@ -83,34 +83,34 @@ def analyze(self, text: str) -> Doc:
 
         >>> from cltkv1 import NLP
         >>> from cltkv1.utils.example_texts import get_example_text
-        >>> from cltkv1.utils.data_types import Doc
+        >>> from cltkv1.core.data_types import Doc
         >>> cltk_nlp = NLP(language="lat")
         >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("lat"))
         >>> isinstance(cltk_doc, Doc)
         True
-        >>> cltk_doc.words[0]
-        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='Gallia', pos='A1|grn1|casA|gen2|stAM', lemma='aallius', scansion=None, xpos='A1|grn1|casA|gen2|stAM', upos='NOUN', dependency_relation='nsubj', governor=4, parent_token=<Token index=1;words=[<Word index=1;text=Gallia;lemma=aallius;upos=NOUN;xpos=A1|grn1|casA|gen2|stAM;feats=Case=Nom|Degree=Pos|Gender=Fem|Number=Sing;governor=4;dependency_relation=nsubj>]>, feats='Case=Nom|Degree=Pos|Gender=Fem|Number=Sing')
+        >>> cltk_doc.words[0] # doctest: +ELLIPSIS
+        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='Gallia', pos='A1|grn1|casA|gen2|stAM', lemma='aallius', scansion=None, xpos='A1|grn1|casA|gen2|stAM', upos='NOUN', dependency_relation='nsubj', governor=..., parent=..., features={'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'})
 
         >>> from cltkv1.utils.example_texts import get_example_text
         >>> cltk_nlp = NLP(language="grc")
         >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("grc"))
-        >>> cltk_doc.words[0]
-        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='ὅτι', pos='Df', lemma='ὅτι#1', scansion=None, xpos='Df', upos='ADV', dependency_relation='advmod', governor=13, parent_token=<Token index=1;words=[<Word index=1;text=ὅτι;lemma=ὅτι#1;upos=ADV;xpos=Df;feats=_;governor=13;dependency_relation=advmod>]>, feats='_')
+        >>> cltk_doc.words[0] # doctest: +ELLIPSIS
+        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='ὅτι', pos='Df', lemma='ὅτι#1', scansion=None, xpos='Df', upos='ADV', dependency_relation='advmod', governor=..., parent=..., features={})
 
         >>> cltk_nlp = NLP(language="chu")
         >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("chu"))
-        >>> cltk_doc.words[0]
-        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='отьчє', pos='Nb', lemma='отьць', scansion=None, xpos='Nb', upos='NOUN', dependency_relation='nsubj', governor=6, parent_token=<Token index=1;words=[<Word index=1;text=отьчє;lemma=отьць;upos=NOUN;xpos=Nb;feats=Case=Nom|Gender=Masc|Number=Sing;governor=6;dependency_relation=nsubj>]>, feats='Case=Nom|Gender=Masc|Number=Sing')
+        >>> cltk_doc.words[0] # doctest: +ELLIPSIS
+        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='отьчє', pos='Nb', lemma='отьць', scansion=None, xpos='Nb', upos='NOUN', dependency_relation='nsubj', governor=..., parent=..., features={'Case': 'Nom', 'Gender': 'Masc', 'Number': 'Sing'})
 
         >>> cltk_nlp = NLP(language="fro")
         >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("fro"))
-        >>> cltk_doc.words[0]
-        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='Une', pos='DETndf', lemma='Une', scansion=None, xpos='DETndf', upos='DET', dependency_relation='det', governor=2, parent_token=<Token index=1;words=[<Word index=1;text=Une;lemma=Une;upos=DET;xpos=DETndf;feats=Definite=Ind|PronType=Art;governor=2;dependency_relation=det>]>, feats='Definite=Ind|PronType=Art')
+        >>> cltk_doc.words[0] # doctest: +ELLIPSIS
+        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='Une', pos='DETndf', lemma='Une', scansion=None, xpos='DETndf', upos='DET', dependency_relation='det', governor=..., parent=..., features={'Definite': 'Ind', 'PronType': 'Art'})
 
         >>> cltk_nlp = NLP(language="got")
         >>> cltk_doc = cltk_nlp.analyze(text=get_example_text("got"))
-        >>> cltk_doc.words[0]
-        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='swa', pos='Df', lemma='swa', scansion=None, xpos='Df', upos='ADV', dependency_relation='advmod', governor=2, parent_token=<Token index=1;words=[<Word index=1;text=swa;lemma=swa;upos=ADV;xpos=Df;feats=_;governor=2;dependency_relation=advmod>]>, feats='_')
+        >>> cltk_doc.words[0] # doctest: +ELLIPSIS
+        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='swa', pos='Df', lemma='swa', scansion=None, xpos='Df', upos='ADV', dependency_relation='advmod', governor=..., parent=..., features={})
         >>> len(cltk_doc.sentences)
         4
         """
diff --git a/src/cltkv1/tokenizers/word.py b/src/cltkv1/tokenizers/word.py
index 64949d6..514e490 100644
--- a/src/cltkv1/tokenizers/word.py
+++ b/src/cltkv1/tokenizers/word.py
@@ -8,7 +8,7 @@
 
 from cltk.tokenize.word import WordTokenizer
 
-from cltkv1.utils.data_types import Doc, Process
+from cltkv1.core.data_types import Doc, Process, Word
 
 
 # a closure for marshalling Docs to CLTK tokenizers
@@ -16,7 +16,12 @@ def make_tokenizer_algorithm(language: str) -> Callable[[Doc], Doc]:
     tokenizer = WordTokenizer(language=language)
 
     def algorithm(self, doc: Doc) -> Doc:
-        doc.tokens = tokenizer.tokenize(doc.raw)
+        doc.words = []
+
+        for i, token in enumerate(tokenizer.tokenize(doc.raw)):
+            word = Word(string=token, index_token=i)
+            doc.words.append(word)
+
         return doc
 
     return algorithm
@@ -42,7 +47,7 @@ class TokenizationProcess(Process):
     Example: ``TokenizationProcess`` -> ``LatinTokenizationProcess``
 
     >>> from cltkv1.tokenizers.word import TokenizationProcess
-    >>> from cltkv1.utils.data_types import Process
+    >>> from cltkv1.core.data_types import Process
     >>> issubclass(TokenizationProcess, Process)
     True
     >>> tok = TokenizationProcess(input_doc=Doc(raw="some input data"))
diff --git a/src/cltkv1/utils/__init__.py b/src/cltkv1/utils/__init__.py
index d953c86..85f9d33 100644
--- a/src/cltkv1/utils/__init__.py
+++ b/src/cltkv1/utils/__init__.py
@@ -1,4 +1,3 @@
 """Init for `cltkv1.utils`."""
 
-from .exceptions import *
 from .utils import *
diff --git a/src/cltkv1/utils/example_texts.py b/src/cltkv1/utils/example_texts.py
index 7e1b230..7f569e7 100644
--- a/src/cltkv1/utils/example_texts.py
+++ b/src/cltkv1/utils/example_texts.py
@@ -13,8 +13,8 @@
 
 # pylint: disable=line-too-long
 
+from cltkv1.core.exceptions import UnimplementedLanguageError
 from cltkv1.languages.utils import get_lang
-from cltkv1.utils.exceptions import UnimplementedLanguageError
 
 EXAMPLE_TEXTS = dict(
     # Akkadian
@@ -134,11 +134,11 @@ def get_example_text(iso_code: str) -> str:
     >>> get_example_text("zkz")
     Traceback (most recent call last):
       ...
-    cltkv1.utils.exceptions.UnimplementedLanguageError: Example text unavailable for ISO 639-3 code 'zkz'.
+    cltkv1.core.exceptions.UnimplementedLanguageError: Example text unavailable for ISO 639-3 code 'zkz'.
     >>> get_example_text("xxx")
     Traceback (most recent call last):
       ...
-    cltkv1.utils.exceptions.UnknownLanguageError
+    cltkv1.core.exceptions.UnknownLanguageError
     """
     get_lang(iso_code=iso_code)
     try:
diff --git a/src/cltkv1/wrappers/stanford.py b/src/cltkv1/wrappers/stanford.py
index 4e9c7f5..849b5c9 100644
--- a/src/cltkv1/wrappers/stanford.py
+++ b/src/cltkv1/wrappers/stanford.py
@@ -8,14 +8,9 @@
 
 import stanfordnlp  # type: ignore
 
-from cltkv1.utils import (
-    UnimplementedLanguageError,
-    UnknownLanguageError,
-    example_texts,
-    file_exists,
-    suppress_stdout,
-)
-from cltkv1.utils.data_types import Doc, Process, Word
+from cltkv1.core.data_types import Doc, Process, Word
+from cltkv1.core.exceptions import UnimplementedLanguageError, UnknownLanguageError
+from cltkv1.utils import example_texts, file_exists, suppress_stdout
 from cltkv1.utils.example_texts import EXAMPLE_TEXTS
 
 LOG = logging.getLogger(__name__)
@@ -53,7 +48,7 @@ def __init__(self, language: str, treebank: Optional[str] = None) -> None:
         >>> StanfordNLPWrapper(language="xxx")
         Traceback (most recent call last):
           ...
-        cltkv1.utils.exceptions.UnknownLanguageError: Language 'xxx' either not in scope for CLTK or not supported by StanfordNLP.
+        cltkv1.core.exceptions.UnknownLanguageError: Language 'xxx' either not in scope for CLTK or not supported by StanfordNLP.
 
         >>> stanford_wrapper = StanfordNLPWrapper(language="grc", treebank="grc_proiel")
         >>> snlp_doc = stanford_wrapper.parse(get_example_text("grc"))
@@ -70,7 +65,7 @@ def __init__(self, language: str, treebank: Optional[str] = None) -> None:
         >>> stanford_wrapper = StanfordNLPWrapper(language="lat", treebank="xxx")
         Traceback (most recent call last):
           ...
-        cltkv1.utils.exceptions.UnimplementedLanguageError: Invalid treebank 'xxx' for language 'lat'.
+        cltkv1.core.exceptions.UnimplementedLanguageError: Invalid treebank 'xxx' for language 'lat'.
         """
         self.language = language
         self.treebank = treebank
@@ -346,17 +341,15 @@ def __init__(self, input_doc, language):
 
     def algorithm(self, doc):
         stanfordnlp_doc = self.stanfordnlp_wrapper.parse(doc.raw)
-        (cltk_words, indices_tokens) = StanfordNLPProcess.stanfordnlp_to_cltk_word_type(
-            stanfordnlp_doc
-        )
+        cltk_words = StanfordNLPProcess.stanfordnlp_to_cltk_word_type(stanfordnlp_doc)
         doc.words = cltk_words
-        doc.indices_tokens = indices_tokens
         doc.stanfordnlp_doc = stanfordnlp_doc
 
         return doc
 
     @staticmethod
     def stanfordnlp_to_cltk_word_type(stanfordnlp_doc):
+
         """Take an entire ``stanfordnlp`` document, extract
         each word, and encode it in the way expected by
         the CLTK's ``Word`` type.
@@ -371,13 +364,13 @@ def stanfordnlp_to_cltk_word_type(stanfordnlp_doc):
         >>> isinstance(cltk_words[0], Word)
         True
         >>> cltk_words[0]
-        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='Gallia', pos='A1|grn1|casA|gen2|stAM', lemma='aallius', scansion=None, xpos='A1|grn1|casA|gen2|stAM', upos='NOUN', dependency_relation='nsubj', governor=4, parent_token=<Token index=1;words=[<Word index=1;text=Gallia;lemma=aallius;upos=NOUN;xpos=A1|grn1|casA|gen2|stAM;feats=Case=Nom|Degree=Pos|Gender=Fem|Number=Sing;governor=4;dependency_relation=nsubj>]>, feats='Case=Nom|Degree=Pos|Gender=Fem|Number=Sing')
+        Word(index_char_start=None, index_char_stop=None, index_token=1, index_sentence=0, string='Gallia', pos='A1|grn1|casA|gen2|stAM', lemma='aallius', scansion=None, xpos='A1|grn1|casA|gen2|stAM', upos='NOUN', dependency_relation='nsubj', governor=Word(index_char_start=None, index_char_stop=None, index_token=4, index_sentence=0, string='divisa', pos='L2', lemma='divido', scansion=None, xpos='L2', upos='VERB', dependency_relation='root', governor=None, parent=None, features={'Aspect': 'Perf', 'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing', 'Tense': 'Past', 'VerbForm': 'Part', 'Voice': 'Pass'}), parent=None, features={'Case': 'Nom', 'Degree': 'Pos', 'Gender': 'Fem', 'Number': 'Sing'})
         """
         words_list = list()
-        sentence_list = list()
 
         for sentence_index, sentence in enumerate(stanfordnlp_doc.sentences):
-            token_indices = list()
+            sent_words = dict()
+            indices = list()
 
             for token_index, token in enumerate(sentence.tokens):
                 stanfordnlp_word = token.words[0]
@@ -390,12 +383,27 @@ def stanfordnlp_to_cltk_word_type(stanfordnlp_doc):
                     upos=stanfordnlp_word.upos,
                     lemma=stanfordnlp_word.lemma,
                     dependency_relation=stanfordnlp_word.dependency_relation,
-                    governor=stanfordnlp_word.governor,
-                    parent_token=stanfordnlp_word.parent_token,
-                    feats=stanfordnlp_word.feats,
+                    features={}
+                    if stanfordnlp_word.feats == "_"
+                    else dict(
+                        [f.split("=") for f in stanfordnlp_word.feats.split("|")]
+                    ),
+                )
+                sent_words[cltk_word.index_token] = cltk_word
+                indices.append(
+                    (
+                        int(stanfordnlp_word.governor),
+                        int(stanfordnlp_word.parent_token.index),
+                    )
                 )
                 words_list.append(cltk_word)
-                token_indices.append(token_index)
-            sentence_list.append(token_indices)
 
-        return (words_list, sentence_list)
+            for i, cltk_word in enumerate(sent_words.values()):
+                (governor_index, parent_index) = indices[i]
+                cltk_word.governor = (
+                    sent_words[governor_index] if governor_index > 0 else None
+                )
+                if cltk_word.index_token != sent_words[parent_index].index_token:
+                    cltk_word.parent = sent_words[parent_index]
+
+        return words_list