diff --git a/inspire_schemas/parsers/arxiv.py b/inspire_schemas/parsers/arxiv.py
index 191e87e5..154f1654 100644
--- a/inspire_schemas/parsers/arxiv.py
+++ b/inspire_schemas/parsers/arxiv.py
@@ -29,13 +29,6 @@
 import six
 from inspire_utils.dedupers import dedupe_list
 from inspire_utils.helpers import maybe_int
-from inspire_utils.utils import (
-    CONFERENCE_WORDS,
-    THESIS_WORDS,
-    coll_cleanforthe,
-    get_node,
-    split_fullname,
-)
 from pylatexenc.latex2text import (
     EnvironmentTextSpec,
     LatexNodes2Text,
@@ -44,6 +37,13 @@
 )
 
 from inspire_schemas.api import LiteratureBuilder
+from inspire_schemas.parsers.utils import (
+    CONFERENCE_WORDS,
+    THESIS_WORDS,
+    coll_cleanforthe,
+    get_node,
+    split_fullname,
+)
 from inspire_schemas.utils import classify_field, normalize_arxiv_category
 
 RE_CONFERENCE = re.compile(
diff --git a/inspire_schemas/parsers/elsevier.py b/inspire_schemas/parsers/elsevier.py
index 7985da21..30fcb18b 100644
--- a/inspire_schemas/parsers/elsevier.py
+++ b/inspire_schemas/parsers/elsevier.py
@@ -28,9 +28,9 @@
 import six
 from inspire_utils.date import PartialDate
 from inspire_utils.helpers import maybe_int, remove_tags
-from inspire_utils.utils import get_node
 
 from inspire_schemas.api import LiteratureBuilder, ReferenceBuilder
+from inspire_schemas.parsers.utils import get_node
 
 DOCTYPE_MAPPING = {
     "abs": "abstract",
diff --git a/inspire_schemas/parsers/jats.py b/inspire_schemas/parsers/jats.py
index ae6c9f61..d6397aa4 100644
--- a/inspire_schemas/parsers/jats.py
+++ b/inspire_schemas/parsers/jats.py
@@ -30,9 +30,9 @@
 from idutils import normalize_orcid
 from inspire_utils.date import PartialDate
 from inspire_utils.helpers import maybe_int, remove_tags
-from inspire_utils.utils import get_node
 
 from inspire_schemas.api import LiteratureBuilder, ReferenceBuilder
+from inspire_schemas.parsers.utils import get_node
 from inspire_schemas.utils import split_page_artid
 
 JOURNAL_TITLES_MAPPING = {"Physics": "APS Physics"}
diff --git a/inspire_schemas/parsers/utils.py b/inspire_schemas/parsers/utils.py
new file mode 100644
index 00000000..6c6b67d3
--- /dev/null
+++ b/inspire_schemas/parsers/utils.py
@@ -0,0 +1,140 @@
+from __future__ import (
+    absolute_import,
+    division,
+    print_function,
+)
+
+import re
+
+from scrapy.selector import Selector
+
+RE_FOR_THE = re.compile(
+    r'\b(?:for|on behalf of|representing)\b',
+    re.IGNORECASE,
+)
+INST_PHRASES = ['for the development', ]
+
+
+def get_node(text, namespaces=None):
+    """Get a scrapy selector for the given text node."""
+    node = Selector(text=text, type="xml")
+    if namespaces:
+        for ns in namespaces:
+            node.register_namespace(ns[0], ns[1])
+    return node
+
+
+def coll_cleanforthe(coll):
+    """ Cleanup collaboration, try to find author """
+    author = None
+
+    if any(phrase for phrase in INST_PHRASES if phrase in coll.lower()):
+        # don't touch it, doesn't look like a collaboration
+        return coll, author
+
+    coll = coll.strip('.; ')
+
+    if RE_FOR_THE.search(coll):
+        # get strings leading and trailing 'for the'
+        (lead, trail) = RE_FOR_THE.split(coll, maxsplit=1)
+        if re.search(r'\w', lead):
+            author = lead.strip()
+        if re.search(r'\w', trail):
+            coll = trail
+
+    coll = re.sub('(?i)^ *the ', '', coll)
+    coll = re.sub('(?i) *collaborations? *', '', coll)
+    coll = coll.strip()
+
+    return coll, author
+
+
+def split_fullname(author, switch_name_order=False):
+    """Split an author name to surname and given names.
+
+    It accepts author strings with and without comma separation.
+    As default surname is first in case of comma separation, otherwise last.
+    Multi-part surnames are incorrectly detected in strings without comma
+    separation.
+    """
+    if not author:
+        return "", ""
+
+    if "," in author:
+        fullname = [n.strip() for n in author.split(',')]
+        surname_first = True
+    else:
+        fullname = [n.strip() for n in author.split()]
+        surname_first = False
+
+    if switch_name_order:
+        surname_first = not surname_first
+
+    if surname_first:
+        surname = fullname[0]
+        given_names = " ".join(fullname[1:])
+    else:
+        surname = fullname[-1]
+        given_names = " ".join(fullname[:-1])
+
+    return surname, given_names
+
+
+CONFERENCE_WORDS = [
+    'colloquium',
+    'colloquiums',
+    'conf',
+    'conference',
+    'conferences',
+    'contrib',
+    'contributed',
+    'contribution',
+    'contributions',
+    'forum',
+    'lecture',
+    'lectures',
+    'meeting',
+    'meetings',
+    'pres',
+    'presented',
+    'proc',
+    'proceeding',
+    'proceedings',
+    'rencontre',
+    'rencontres',
+    'school',
+    'schools',
+    'seminar',
+    'seminars',
+    'symp',
+    'symposium',
+    'symposiums',
+    'talk',
+    'talks',
+    'workshop',
+    'workshops'
+]
+
+THESIS_WORDS = [
+    'diploma',
+    'diplomarbeit',
+    'diplome',
+    'dissertation',
+    'doctoraal',
+    'doctoral',
+    'doctorat',
+    'doctorate',
+    'doktorarbeit',
+    'dottorato',
+    'habilitationsschrift',
+    'hochschule',
+    'inauguraldissertation',
+    'memoire',
+    'phd',
+    'proefschrift',
+    'schlussbericht',
+    'staatsexamensarbeit',
+    'tesi',
+    'thesis',
+    'travail'
+]
diff --git a/setup.py b/setup.py
index 25df2db4..41778c82 100644
--- a/setup.py
+++ b/setup.py
@@ -223,6 +223,8 @@ def do_setup():
             # requests requires a urllib3 version <1.26 but not 1.25.0 and 1.25.1
             # we pin it down here to solve dependency problems
             'urllib3>=1.21.1,<1.26,!=1.25.0,!=1.25.1',
+            'scrapy',
+            'pylatexenc',
         ],
         tests_require=tests_require,
         extras_require=extras_require,
diff --git a/tests/unit/test_parsers_arxiv.py b/tests/unit/test_parsers_arxiv.py
index 6e1938e0..91c425d1 100644
--- a/tests/unit/test_parsers_arxiv.py
+++ b/tests/unit/test_parsers_arxiv.py
@@ -26,7 +26,7 @@
     print_function,
 )
 
-from inspire_utils.parsers.arxiv import ArxivParser
+from inspire_schemas.parsers.arxiv import ArxivParser
 
 
 def test_latex_to_unicode_handles_arxiv_escape_sequences():
diff --git a/tests/unit/test_parsers_author_xml.py b/tests/unit/test_parsers_author_xml.py
index 56236c8d..379547b6 100644
--- a/tests/unit/test_parsers_author_xml.py
+++ b/tests/unit/test_parsers_author_xml.py
@@ -26,7 +26,7 @@
     print_function,
 )
 
-from inspire_utils.parsers.author_xml import AuthorXMLParser
+from inspire_schemas.parsers.author_xml import AuthorXMLParser
 
 
 def test_parsing_author_xml():
diff --git a/tests/unit/test_parsers_crossref.py b/tests/unit/test_parsers_crossref.py
index 396cd59a..641efd3c 100644
--- a/tests/unit/test_parsers_crossref.py
+++ b/tests/unit/test_parsers_crossref.py
@@ -31,8 +31,8 @@
 import pytest
 import yaml
 from fixtures import get_test_suite_path
-from inspire_utils.parsers.crossref import CrossrefParser
 
+from inspire_schemas.parsers.crossref import CrossrefParser
 from inspire_schemas.utils import validate
 
 
diff --git a/tests/unit/test_parsers_elsevier.py b/tests/unit/test_parsers_elsevier.py
index ad6cf0ee..32e9f6b8 100644
--- a/tests/unit/test_parsers_elsevier.py
+++ b/tests/unit/test_parsers_elsevier.py
@@ -32,8 +32,8 @@
 import yaml
 from deepdiff import DeepDiff
 from fixtures import get_test_suite_path
-from inspire_utils.parsers.elsevier import ElsevierParser
 
+from inspire_schemas.parsers.elsevier import ElsevierParser
 from inspire_schemas.utils import validate
 
 
diff --git a/tests/unit/test_parsers_jats.py b/tests/unit/test_parsers_jats.py
index 8ecbddb8..3876a483 100644
--- a/tests/unit/test_parsers_jats.py
+++ b/tests/unit/test_parsers_jats.py
@@ -32,8 +32,8 @@
 import yaml
 from deepdiff import DeepDiff
 from fixtures import get_test_suite_path
-from inspire_utils.parsers.jats import JatsParser
 
+from inspire_schemas.parsers.jats import JatsParser
 from inspire_schemas.utils import validate