diff --git a/examples/linformer/README.md b/examples/linformer/README.md
index e5c11e052d..cedd667835 100644
--- a/examples/linformer/README.md
+++ b/examples/linformer/README.md
@@ -6,7 +6,7 @@ This example contains code to train Linformer models as described in our paper
 ## Training a new Linformer RoBERTa model
 
 You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md),
-but replace the architecture with `--arch linformer_roberta_base` in your training command.
+updating your training command with `--user-dir examples/linformer/src --arch linformer_roberta_base`.
 
 ## Citation
 
diff --git a/fairseq/utils.py b/fairseq/utils.py
index fdbf66cf3f..0044d76f3d 100644
--- a/fairseq/utils.py
+++ b/fairseq/utils.py
@@ -433,11 +433,17 @@ def import_user_module(args):
     if module_path is not None:
         module_path = os.path.abspath(args.user_dir)
         if not os.path.exists(module_path):
-            fairseq_rel_path = os.path.join(
-                os.path.dirname(__file__), "..", args.user_dir
-            )
+            fairseq_rel_path = os.path.join(os.path.dirname(__file__), args.user_dir)
             if os.path.exists(fairseq_rel_path):
                 module_path = fairseq_rel_path
+            else:
+                fairseq_rel_path = os.path.join(
+                    os.path.dirname(__file__), "..", args.user_dir
+                )
+                if os.path.exists(fairseq_rel_path):
+                    module_path = fairseq_rel_path
+                else:
+                    raise FileNotFoundError(module_path)
 
         # We want to import the module under a unique name so that it doesn't
         # collide with existing modules. At the same time we don't want to
diff --git a/setup.py b/setup.py
index ad2ea2088b..54c752d257 100644
--- a/setup.py
+++ b/setup.py
@@ -127,51 +127,86 @@ def include_dirs(self, dirs):
     )
 
 
-setup(
-    name="fairseq",
-    version="0.9.0",
-    description="Facebook AI Research Sequence-to-Sequence Toolkit",
-    url="https://github.com/pytorch/fairseq",
-    classifiers=[
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.6",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
-    long_description=readme,
-    long_description_content_type="text/markdown",
-    setup_requires=[
-        "cython",
-        "numpy",
-        "setuptools>=18.0",
-    ],
-    install_requires=[
-        "cffi",
-        "cython",
-        "dataclasses",
-        "editdistance",
-        "hydra-core",
-        "numpy",
-        "regex",
-        "sacrebleu>=1.4.12",
-        "torch",
-        "tqdm",
-    ],
-    dependency_links=dependency_links,
-    packages=find_packages(exclude=["scripts", "tests"]),
-    ext_modules=extensions,
-    test_suite="tests",
-    entry_points={
-        "console_scripts": [
-            "fairseq-eval-lm = fairseq_cli.eval_lm:cli_main",
-            "fairseq-generate = fairseq_cli.generate:cli_main",
-            "fairseq-interactive = fairseq_cli.interactive:cli_main",
-            "fairseq-preprocess = fairseq_cli.preprocess:cli_main",
-            "fairseq-score = fairseq_cli.score:cli_main",
-            "fairseq-train = fairseq_cli.train:cli_main",
-            "fairseq-validate = fairseq_cli.validate:cli_main",
+def do_setup(package_data):
+    setup(
+        name="fairseq",
+        version="0.9.0",
+        description="Facebook AI Research Sequence-to-Sequence Toolkit",
+        url="https://github.com/pytorch/fairseq",
+        classifiers=[
+            "Intended Audience :: Science/Research",
+            "License :: OSI Approved :: MIT License",
+            "Programming Language :: Python :: 3.6",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
         ],
-    },
-    cmdclass=cmdclass,
-    zip_safe=False,
-)
+        long_description=readme,
+        long_description_content_type="text/markdown",
+        setup_requires=[
+            "cython",
+            "numpy",
+            "setuptools>=18.0",
+        ],
+        install_requires=[
+            "cffi",
+            "cython",
+            "dataclasses",
+            "editdistance",
+            "hydra-core",
+            "numpy",
+            "regex",
+            "sacrebleu>=1.4.12",
+            "torch",
+            "tqdm",
+        ],
+        dependency_links=dependency_links,
+        packages=find_packages(
+            exclude=[
+                "examples",
+                "examples.*",
+                "scripts",
+                "scripts.*",
+                "tests",
+                "tests.*",
+            ]
+        ),
+        package_data=package_data,
+        ext_modules=extensions,
+        test_suite="tests",
+        entry_points={
+            "console_scripts": [
+                "fairseq-eval-lm = fairseq_cli.eval_lm:cli_main",
+                "fairseq-generate = fairseq_cli.generate:cli_main",
+                "fairseq-interactive = fairseq_cli.interactive:cli_main",
+                "fairseq-preprocess = fairseq_cli.preprocess:cli_main",
+                "fairseq-score = fairseq_cli.score:cli_main",
+                "fairseq-train = fairseq_cli.train:cli_main",
+                "fairseq-validate = fairseq_cli.validate:cli_main",
+            ],
+        },
+        cmdclass=cmdclass,
+        zip_safe=False,
+    )
+
+
+def get_files(path, relative_to="fairseq"):
+    all_files = []
+    for root, _dirs, files in os.walk(path, followlinks=True):
+        root = os.path.relpath(root, relative_to)
+        for file in files:
+            if file.endswith(".pyc"):
+                continue
+            all_files.append(os.path.join(root, file))
+    return all_files
+
+
+try:
+    # symlink config and examples into fairseq package so package_data accepts them
+    os.symlink(os.path.join("..", "config"), "fairseq/config")
+    os.symlink(os.path.join("..", "examples"), "fairseq/examples")
+    package_data = {
+        "fairseq": get_files("fairseq/config") + get_files("fairseq/examples"),
+    }
+    do_setup(package_data)
+finally:
+    os.unlink("fairseq/config")
+    os.unlink("fairseq/examples")
diff --git a/tests/test_binaries.py b/tests/test_binaries.py
index 4b87afea55..c6722402a1 100644
--- a/tests/test_binaries.py
+++ b/tests/test_binaries.py
@@ -7,6 +7,7 @@
 import logging
 import os
 import random
+import sys
 import tempfile
 import unittest
 from io import StringIO
@@ -294,6 +295,7 @@ def test_multilingual_transformer(self):
                             + dec_ltok_flag,
                         )
 
+    @unittest.skipIf(sys.platform.lower() == "darwin", "skip latent depth test on MacOS")
     def test_multilingual_translation_latent_depth(self):
         # test with latent depth in encoder, decoder, or both
         encoder_latent_layer = [[], ["--encoder-latent-layer"]]