diff --git a/.github/workflows/ci-test.yaml b/.github/workflows/ci-test.yaml
index 2a744ed5..190b0380 100644
--- a/.github/workflows/ci-test.yaml
+++ b/.github/workflows/ci-test.yaml
@@ -14,15 +14,23 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, ubuntu-22.04]
-        gcc-version: [11, 12]
+        os: [ubuntu-24.04, ubuntu-22.04]
+        gcc-version: [11, 12, 13, 14]
         mpi-type: [mpich, openmpi]
         exclude:
-          - os: ubuntu-latest
-            gcc-version: 8
+          - os: ubuntu-22.04
+            gcc-version: 13
+          - os: ubuntu-22.04
+            gcc-version: 14
+          - os: ubuntu-24.04
+            mpi-type: mpich
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
+      - name: Update apt
+        run: |
+          sudo add-apt-repository -y universe
+          sudo apt-get update
       - name: Cache boost
         uses: actions/cache@v4
         id: cache-boost
@@ -49,8 +57,8 @@ jobs:
         if: matrix.mpi-type == 'openmpi'
         run: sudo apt-get install openmpi-bin libopenmpi-dev
       - name: Install GCC-${{ matrix.gcc-version }}
-        if: matrix.gcc-version == '8'
-        run: sudo apt-get install gcc-8 g++-8
+        if: (matrix.gcc-version == '11' && matrix.os == 'ubuntu-24.04')
+        run: sudo apt-get install gcc-11 g++-11
       - name: Make
         run: |
           echo Run 'make'
diff --git a/.gitignore b/.gitignore
index 7d9106a8..cb84f20f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 *#*
 build*
 .vscode*
-.idea*
\ No newline at end of file
+.idea*
+.cache/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 2ea25151..dab9ff42 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -2,7 +2,7 @@ version: 2
 
 # Set OS and Python versions
 build:
-  os: ubuntu-22.04
+  os: ubuntu-24.04
   tools:
     python: "3.12"
 
@@ -13,4 +13,4 @@ python:
 
 # Change the location of the configuration file
 sphinx:
-  configuration: docs/rtd/conf.py
\ No newline at end of file
+  configuration: docs/rtd/conf.py
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 027214a1..31a372bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -144,7 +144,7 @@ endif ()
 #
 include(FindArrowParquet)
 option(YGM_REQUIRE_ARROW_PARQUET "YGM requires Apache Arrow Parquet." OFF)
-find_arrow_parquet()
+find_or_install_arrow_parquet()
 
 #
 # Create the YGM target library
diff --git a/Readme.md b/Readme.md
index f7c12e9f..8ac66c22 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,33 +1,70 @@
-# What is YGM?
-
-YGM is an asynchronous communication library designed for irregular communication patterns. It is built on a
-communicator abstraction, much like MPI, but communication is handled asynchronously and is initiated by senders without
-any interaction with receivers. YGM features
-* **Message buffering** - Increases application throughput.
-* **Fire-and-Forget RPC Semantics** - A sender provides the function and function arguments for execution on a specified
-  destination rank through an `async` call. This function will complete on the destination rank at an unspecified time
-  in the future, but YGM does not explicitly make the sender aware of this completion.
-* **Storage Containers** - YGM provides a collection of distributed storage containers with asynchronous
-  interfaces, used for many common distributed memory operations. Containers are designed to partition data, allowing
-insertions to occur from any rank. Data is accessed through collective `for_all` operations that execute a user-provided
-function on every stored object, or, when a particular piece of data's location is known, `visit`-type operations that
-perform a user-provided function only on the desired data. These containers are found
-[here](/include/ygm/container/).
-
-# Getting Started
+## What is YGM?
+
+YGM is an asynchronous communication library written in C++ and designed for high-performance computing (HPC) use cases featuring 
+irregular communication patterns. YGM includes a collection of
+distributed-memory storage containers designed to express common algorithmic and data-munging tasks. These containers
+automatically partition data, allowing insertions and, with most containers, processing of individual elements to be
+initiated from any runninng YGM process.
+
+Underlying YGM's containers is a communicator abstraction. This communicator asynchronously sends messages spawned by
+senders with receivers needing no knowledge of incoming messages prior to their arrival. YGM communications take the
+form of *active messages*; each message contains a function object to execute (often in the form of C++ lambdas), data
+and/or pointers to data for this function to execute on, and a destination process for the message to be executed at.
+
+YGM also includes a set of I/O primitives for parsing collections of input documents in parallel as independent lines of
+text and streaming output lines to
+large numbers of destination files. Current parsing functionality supports reading input as CSV, ndjson, and
+unstructured lines of data.
+
+## General YGM Operations
+
+YGM is built on its ability to communicate active messages asynchronously between running processes. This does not
+capture every operation that can be useful, for instance collective operations are still widely needed. YGM uses
+prefixes on function names to distinguish their behaviors in terms of the processes involved. These prefixes are:
+   * `async_`: Asynchronous operation initiated on a single process. The execution of the underlying function may
+     occur on a remote process.
+   * `local_`: Function performs only local operations on data of the current process. In uses within YGM containers
+     with partitioning schemes that determine item ownership, care must be taken to ensure the process a `local_`
+     operation is called from aligns with the item's owner. For instance, calling `ygm::container::map::local_insert`
+     will store an item on the process where the call is made, but the `ygm::container::map` may not be able to look
+     up this location if it is on the wrong process.
+   * No Prefix: Collective operation that must be called from all processes.
+
+The primary workhorse functions in YGM fall into the two categories of `async_` and `for_all` operations. In an
+`async_` operation, a lambda is asynchronously sent to a (potentially) remote process for execution. In many cases
+with YGM containers, the lambda being executed is not provided by the user and is instead part of the function itself,
+e.g. `async_insert` calls on most containers. A `for_all` operation is a collective operation in which a lambda is 
+executed locally on every process while iterating over all locally held items of some YGM object. The items iterated
+over can be items in a YGM container, items coming from a map, filter, or flatten applied to a container, or all lines
+in a collection of files in a YGM I/O parser.
+
+### Lambda Capture Rules
+Certain `async_` and `for_all` operations require users to provide lambdas as part of their executions. The lambdas
+that can be accepted by these two classes of functions follow different rules pertaining to the capturing of variables:
+   * `async_` calls cannot capture (most) variables in lambdas. Variables necessary for lambda execution must be
+     provided as arguments to the `async_` call. In the event that the data for the lambda resides on the remote
+     process the lambda will execute on, a `ygm::ygm_ptr` should be passed as an argument to the `async_`.
+   * `for_all` calls assume lambdas take only the arguments inherently provided by the YGM object being iterated over.
+     All other necessary variables *must* be captured. The types of arguments provided to the lambda can be identified
+     by the `for_all_args` type within the YGM object.
+
+These differences in behavior arise from the distinction that `async_` lambdas may execute on a remote process, while
+`for_all` lambdas are guaranteed to execute locally to a process. In the case of `async_` operations, the lambda and
+all arguments must be serialized for communication, but C++ does not provide a method for inspection of variables
+captured in the closure of a lambda. In the case of `for_all` operations, the execution is equivalent to calling
+[`std::for_each`](https://en.cppreference.com/w/cpp/algorithm/for_each) on entire collection of items held locally.
 
 ## Requirements
-* C++17 - GCC versions 8, 9 and 10 are tested. Your mileage may vary with other compilers.
+* C++20 - GCC versions 11 and 12 are tested. Your mileage may vary with other compilers.
 * [Cereal](https://github.com/USCiLab/cereal) - C++ serialization library
 * MPI
 * Optionally, Boost 1.77 to enable Boost.JSON support.  
 
-
 ## Using YGM with CMake
 YGM is a header-only library that is easy to incorporate into a project through CMake. Adding the following to
 CMakeLists.txt will install YGM and its dependencies as part of your project:
 ```
-set(DESIRED_YGM_VERSION 0.4)
+set(DESIRED_YGM_VERSION 0.6)
 find_package(ygm ${DESIRED_YGM_VERSION} CONFIG)
 if (NOT ygm_FOUND)
     FetchContent_Declare(
@@ -52,62 +89,6 @@ else ()
 endif ()
 ```
 
-# Anatomy of a YGM Program
-Here we will walk through a basic "hello world" YGM program. The [examples directory](/examples/) contains several other
-examples, including many using YGM's storage containers.
-
-To begin, headers for a YGM communicator are needed
-``` C++
-#include <ygm/comm.hpp>
-```
-
-At the beginning of the program, a YGM communicator must be constructed. It will be given `argc` and `argv` like
-`MPI_Init`, and it has an optional third argument that specifies the aggregate size (in bytes) allowed for all send
-buffers before YGM begins flushing sends. Here, we will make a buffer with 32MB of aggregate send buffer space.
-``` C++
-ygm::comm world(&argc, &argv, 32*1024*1024);
-```
-
-Next, we need a lambda to send through YGM. We'll do a simple hello\_world type of lambda.
-``` C++
-auto hello_world_lambda = [](const std::string &name) {
-	std::cout << "Hello " << name << std::endl;
-};
-```
-
-Finally, we use this lambda inside of our `async` calls. In this case, we will have rank 0 send a message to rank 1,
-telling it to greet the world
-``` C++
-if (world.rank0()) {
-	world.async(1, hello_world_lambda, std::string("world"));
-}
-```
-
-The full, compilable version of this example is found [here](/examples/hello_world.cpp). Running it prints a single
-"Hello world".
-
-# Potential Pitfalls
-
-## Allowed Lambdas
-There are two distinct classes of lambdas that can be given to YGM: *remote lambdas* and *local lambdas*, each of which
-has different requirements.
-
-### Remote Lambdas
-A *remote lambda* is any lambda that may potentially be executed on a different rank. These lambdas are identified as
-being those given to a `ygm::comm` or any of the storage containers through a function prefixed by `async_`.
-
-The defining feature of remote lambdas is they **must not** capture any variables; all variables must be provided as
-arguments. This limitation is due to the lack of
-ability for YGM to inspect and extract these arguments when serializing messages to be sent to other ranks.
-
-### Local Lambdas
-A *local lambda* is any lambda that is guaranteed not to be sent to a remote rank. These lambdas are identified as being
-those given to a `for_all` operation on a storage container.
-
-The defining feature of local lambdas is that all arguments besides what is stored in the container must be captured.
-Internally, these lambdas may be given to a [`std::for_each`](https://en.cppreference.com/w/cpp/algorithm/for_each) that
-iterates over the container's elements stored locally on each rank.
-
 # License
 YGM is distributed under the MIT license.
 
diff --git a/cmake/FindArrowParquet.cmake b/cmake/FindArrowParquet.cmake
index a40e6cab..b922d07f 100644
--- a/cmake/FindArrowParquet.cmake
+++ b/cmake/FindArrowParquet.cmake
@@ -1,42 +1,4 @@
-# Find Arrow and Parquet using find_package
-function(find_arrow_parquet_config)
-    # Find Arrow >- 8.0
-    foreach (VERSION 16.0 15.0 14.0 13.0 12.0 11.0 10.0 9.0 8.0)
-        find_package(Arrow ${VERSION} QUIET)
-        if (Arrow_FOUND)
-            break()
-        endif ()
-    endforeach ()
-    set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE)
-
-    # Find Parquet
-    if (Arrow_FOUND)
-        find_package(Parquet QUIET PATHS ${Arrow_DIR})
-    endif ()
-    set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE)
-
-    # Show Arrow and Parquet info
-    if (Arrow_FOUND AND Parquet_FOUND)
-        if (Arrow_FOUND)
-            message(STATUS ${PROJECT_NAME} " found Arrow")
-            message(STATUS "Arrow version: ${ARROW_VERSION}")
-            message(STATUS "Arrow SO version: ${ARROW_FULL_SO_VERSION}")
-        endif ()
-
-        if (Parquet_FOUND)
-            message(STATUS ${PROJECT_NAME} " found Parquet")
-            message(STATUS "Parquet version: ${PARQUET_VERSION}")
-            message(STATUS "Parquet SO version: ${PARQUET_FULL_SO_VERSION}")
-        endif ()
-    else ()
-        if (YGM_REQUIRE_ARROW_PARQUET)
-            message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet >= 8.0 but Arrow Parquet was not found.")
-        else ()
-            message(WARNING "${PROJECT_NAME} did not find Arrow Parquet >= 8.0. Building without Arrow Parquet.")
-        endif ()
-    endif ()
-endfunction()
-
+include(PythonUtilities)
 
 # Find Arrow and Parquet installed along with pyarrow by pip.
 #
@@ -58,7 +20,7 @@ endfunction()
 # If Arrow and Parquet are found, set Arrow_FOUND and Parquet_FOUND to TRUE.
 # Also, Arrow::arrow_shared and Parquet::parquet_shared are created as imported targets.
 # Those targets can be used to link Arrow and Parquet as find_package() is used.
-function(find_pyarrow)
+function(find_pip_installed_pyarrow)
     if (PIP_PYARROW_ROOT)
         # Find libarrow
         file(GLOB Arrow_LIBRARIES LIST_DIRECTORIES false "${PIP_PYARROW_ROOT}/libarrow.so.*")
@@ -114,12 +76,6 @@ function(find_pyarrow)
             endif ()
 
             message(STATUS "Arrow include dir: ${Arrow_INCLUDE_DIRS}")
-        else () # Arrow or Parquet not found
-            if (YGM_REQUIRE_ARROW_PARQUET)
-                message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet but Arrow Parquet was not found.")
-            else ()
-                message(WARNING "${PROJECT_NAME} did not find Arrow Parquet. Building without Arrow Parquet.")
-            endif ()
         endif ()
     else ()
         message(FATAL_ERROR "PIP_PYARROW_ROOT is not set. PIP_PYARROW_ROOT must be set to the root of the pyarrow installation.")
@@ -128,24 +84,149 @@ function(find_pyarrow)
 endfunction()
 
 
-# Find Arrow and Parquet using find_arrow or find_pyarrow
-# If PIP_PYARROW_ROOT is set, find_pyarrow is used.
+# Find the directory where pyarrow is installed.
+# This function executes a Python script to find the pyarrow module and
+# **does not assume that pyarrow is installed by pip**.
 #
 # Output:
-# Arrow_FOUND and Parquet_FOUND are set to TRUE if Arrow and Parquet are found.
-function(find_arrow_parquet)
-    if (PIP_PYARROW_ROOT)
-        find_pyarrow()
-    else ()
-        find_arrow_parquet_config()
+# PYARROW_ROOT is set to the root of the pyarrow installation.
+function(find_pyarrow_package)
+    find_python3_module(pyarrow)
+    if (PYTHON3_MODULE_PATH)
+        get_filename_component(PYARROW_ROOT ${PYTHON3_MODULE_PATH} DIRECTORY)
+        set(PYARROW_ROOT ${PYARROW_ROOT} PARENT_SCOPE)
     endif ()
+endfunction()
+
+# Install pyarrow using pip
+# Output:
+# PIP_PYARROW_ROOT is set to the root of the pyarrow installation.
+function(install_pyarrow_in_venv)
+    setup_python_venv()
+    if (NOT PYTHON_VENV_ROOT)
+        return()
+    endif ()
+
+    activate_python_venv(${PYTHON_VENV_ROOT})
+    if (NOT PYTHON_VENV_ACTIVATED)
+        return()
+    endif ()
+
+    # Use only the Python 3 interpreter in the virtual environment
+    set(Python3_FIND_VIRTUALENV ONLY)
+
+    # Upgrade pip
+    # Ignore the error status as failing to upgrade is not the end of the world
+    upgrade_pip()
+
+    # Install pyarrow
+    pip_install_python_package("pyarrow==16.1.*")
+    if (PIP_INSTALL_SUCCEEDED)
+        find_pyarrow_package()
+        if (PYARROW_ROOT)
+            set(PIP_PYARROW_ROOT ${PYARROW_ROOT} PARENT_SCOPE)
+        endif ()
+    endif ()
+
+    deactivate_python_venv()
+endfunction()
+
+
+# Find Arrow and Parquet using find_package
+# Output:
+# Arrow_FOUND is set to TRUE if Arrow is found.
+# Parquet_FOUND is set to TRUE if Parquet is found.
+function(find_arrow_parquet_config)
+    # Find Arrow >= 8.0.
+    # Start major version from 100 so that we do not have to update
+    # this code every time Arrow releases a major version.
+    foreach (MAJOR_VERSION RANGE 100 8 -1)
+        find_package(Arrow "${MAJOR_VERSION}.0" QUIET)
+        if (Arrow_FOUND)
+            break()
+        endif ()
+    endforeach ()
     set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE)
+
+    # Find Parquet
+    if (Arrow_FOUND)
+        find_package(Parquet QUIET PATHS ${Arrow_DIR})
+    endif ()
     set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE)
+
+    # Show Arrow and Parquet info
+    if (Arrow_FOUND AND Parquet_FOUND)
+        if (Arrow_FOUND)
+            message(STATUS ${PROJECT_NAME} " found Arrow")
+            message(STATUS "Arrow version: ${ARROW_VERSION}")
+            message(STATUS "Arrow SO version: ${ARROW_FULL_SO_VERSION}")
+        endif ()
+
+        if (Parquet_FOUND)
+            message(STATUS ${PROJECT_NAME} " found Parquet")
+            message(STATUS "Parquet version: ${PARQUET_VERSION}")
+            message(STATUS "Parquet SO version: ${PARQUET_FULL_SO_VERSION}")
+        endif ()
+    endif ()
+endfunction()
+
+# Find Arrow and Parquet. If not found, install pyarrow using pip in a Python virtual environmental space.
+# Input:
+#   PIP_PYARROW_ROOT (option) The root directory of a pyarrow installed by pip.
+#   YGM_REQUIRE_ARROW_PARQUET (option) If TRUE, an fatal error is thrown when Arrow Parquet is not found.
+# Output:
+#   Arrow_FOUND and Parquet_FOUND are defined and set to TRUE if Arrow and Parquet are found.
+function(find_or_install_arrow_parquet)
+    if (PIP_PYARROW_ROOT)
+        find_pip_installed_pyarrow()
+        if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
+            if (YGM_REQUIRE_ARROW_PARQUET)
+                message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet but Arrow Parquet was not found in ${PIP_PYARROW_ROOT}.")
+            else ()
+                message(WARNING "${PROJECT_NAME} did not find Arrow Parquet in ${PIP_PYARROW_ROOT}. Building without Arrow Parquet.")
+            endif ()
+            return()
+        endif ()
+    endif ()
+
+    if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
+        find_arrow_parquet_config()
+    endif ()
+
+    if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
+        find_pyarrow_package()
+        if (PYARROW_ROOT)
+            # Assume that the found pip was installed by pip.
+            set(PIP_PYARROW_ROOT ${PYARROW_ROOT})
+            find_pip_installed_pyarrow()
+        endif ()
+    endif ()
+
+    if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
+        install_pyarrow_in_venv()
+        if (PIP_PYARROW_ROOT)
+            find_pip_installed_pyarrow()
+        endif ()
+    endif ()
+
+    if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
+        message(STATUS "${PROJECT_NAME} could not find Arrow Parquet.")
+        message(STATUS "If this is an unexpected result, try the following command to install pyarrow: export Python3_ROOT_DIR=/path/to/python3; /path/to/python3 -m pip pyarrow")
+        if (YGM_REQUIRE_ARROW_PARQUET)
+            message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet.")
+        else ()
+            message(WARNING "${PROJECT_NAME} keep the build process without Arrow Parquet.")
+        endif ()
+        return()
+    endif ()
+
+    set(Arrow_FOUND TRUE PARENT_SCOPE)
+    set(Parquet_FOUND TRUE PARENT_SCOPE)
 endfunction()
 
 
 # Link Arrow and Parquet to the target
-# This function must be called after find_arrow_parquet().
+# This function must be called after find_or_install_arrow_parquet().
 function(link_arrow_parquet target)
     if (Arrow_FOUND AND Parquet_FOUND)
         target_link_libraries(${target} PUBLIC
@@ -153,4 +234,4 @@ function(link_arrow_parquet target)
     else ()
         message(WARNING "Arrow or Parquet not found. Not linking Arrow or Parquet.")
     endif ()
-endfunction()
\ No newline at end of file
+endfunction()
diff --git a/cmake/FindPython3Module.cmake b/cmake/FindPython3Module.cmake
new file mode 100644
index 00000000..9d3b4c8e
--- /dev/null
+++ b/cmake/FindPython3Module.cmake
@@ -0,0 +1,18 @@
+# Find a Python3 module using CMake's FindPython3 module.
+# Input: module name to find
+# Python3_ROOT_DIR can be used as a hint to find Python3
+#
+# Output: PYTHON3_MODULE_PATH is set to the path of the module if found
+function(find_python3_module module_name)
+    find_package(Python3 COMPONENTS Interpreter REQUIRED)
+
+    execute_process(
+            COMMAND ${Python3_EXECUTABLE} -c "import importlib; import sys; module_name = '${module_name}'; spec = importlib.util.find_spec(module_name); print(spec.origin if spec else ''); sys.exit(0 if spec else 1)"
+            OUTPUT_VARIABLE MODULE_PATH
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+    if (Python3_FOUND AND MODULE_PATH)
+        set(PYTHON3_MODULE_PATH ${MODULE_PATH} PARENT_SCOPE)
+    endif ()
+endfunction()
\ No newline at end of file
diff --git a/cmake/PythonUtilities.cmake b/cmake/PythonUtilities.cmake
new file mode 100644
index 00000000..709f1bf6
--- /dev/null
+++ b/cmake/PythonUtilities.cmake
@@ -0,0 +1,103 @@
+# Create and activate a Python3 virtual environment
+#
+# Output: PYTHON_VENV_ROOT is set to the path of the virtual environment
+# if created successfully
+function(setup_python_venv)
+    find_package(Python3 COMPONENTS Interpreter QUIET)
+    if (NOT Python3_Interpreter_FOUND)
+        message(WARNING "Python3 interpreter not found")
+        return()
+    endif()
+
+    set(PYTHON_VENV_ROOT "${CMAKE_BINARY_DIR}/${PROJECT_NAME}-venv")
+    execute_process(
+            COMMAND ${Python3_EXECUTABLE} -m venv ${PYTHON_VENV_ROOT}
+            RESULT_VARIABLE result
+            OUTPUT_QUIET
+    )
+    if (result EQUAL "0")
+        message(STATUS "Created Python virtual environment in ${PYTHON_VENV_ROOT}")
+        set(PYTHON_VENV_ROOT ${PYTHON_VENV_ROOT} PARENT_SCOPE)
+    endif()
+endfunction()
+
+# Activate a Python3 virtual environment
+# Input: A path to the virtual environment
+# Output: PYTHON_VENV_ACTIVATED is set to TRUE if activated successfully
+function(activate_python_venv venv_path)
+    set (ENV{VIRTUAL_ENV} ${venv_path})
+    set(PYTHON_VENV_ACTIVATED TRUE PARENT_SCOPE)
+endfunction()
+
+# Deactivate a Python3 virtual environment
+function(deactivate_python_venv)
+    unset(ENV{VIRTUAL_ENV})
+    set(PYTHON_VENV_ACTIVATED FALSE PARENT_SCOPE)
+endfunction()
+
+# Upgrade pip in the Python3 interpreter
+# Output: PIP_UPGRADE_SUCCEEDED is set to TRUE if pip was upgraded successfully
+function(upgrade_pip)
+    find_package(Python3 COMPONENTS Interpreter QUIET)
+    if (NOT Python3_Interpreter_FOUND)
+        message(WARNING "Python3 interpreter not found")
+        return()
+    endif()
+
+    execute_process(
+            COMMAND ${Python3_EXECUTABLE} -m pip install --upgrade pip
+            RESULT_VARIABLE result
+            OUTPUT_QUIET
+    )
+    if(result EQUAL "0")
+        set(PIP_UPGRADE_SUCCEEDED TRUE PARENT_SCOPE)
+    endif()
+endfunction()
+
+# Install a Python3 package using pip
+#
+# Input: A path to pip_executable and a package name
+# Output: PIP_INSTALL_SUCCEEDED is set to TRUE
+# if the package was installed successfully
+function(pip_install_python_package package_name)
+    find_package(Python3 COMPONENTS Interpreter QUIET)
+    if (NOT Python3_Interpreter_FOUND)
+        message(WARNING "Python3 interpreter not found")
+        return()
+    endif()
+
+    execute_process(
+            COMMAND ${Python3_EXECUTABLE} -m pip install ${package_name}
+            RESULT_VARIABLE result
+            OUTPUT_QUIET
+    )
+    if(result EQUAL "0")
+        message(STATUS "Installed ${package_name}")
+        set(PIP_INSTALL_SUCCEEDED TRUE PARENT_SCOPE)
+    endif()
+endfunction()
+
+# Find a Python3 module using CMake's FindPython3 module.
+# Input: module name to find
+# Python3_ROOT_DIR can be used as a hint to find Python3
+#
+# Output: PYTHON3_MODULE_PATH is set to the path of the module if found
+function(find_python3_module module_name)
+    find_package(Python3 COMPONENTS Interpreter QUIET)
+    if (NOT Python3_Interpreter_FOUND)
+        message(WARNING "Python3 interpreter not found")
+        return()
+    endif()
+
+    execute_process(
+            COMMAND ${Python3_EXECUTABLE} -c "import importlib.util; import sys; module_name = '${module_name}'; spec = importlib.util.find_spec(module_name); print(spec.origin if spec else ''); sys.exit(0 if spec else 1)"
+            OUTPUT_VARIABLE MODULE_PATH
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+            RESULT_VARIABLE result
+    )
+
+    if (result EQUAL "0")
+        set(PYTHON3_MODULE_PATH ${MODULE_PATH} PARENT_SCOPE)
+        message(STATUS "Found Python module ${module_name} at ${MODULE_PATH}")
+    endif()
+endfunction()
\ No newline at end of file
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 55a1d082..8265c559 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -29,4 +29,4 @@ doxygen_add_docs(doxygen
         COMMENT "Generate documentation by Doxygen"
         )
 
-add_subdirectory(rtd)
\ No newline at end of file
+add_subdirectory(rtd)
diff --git a/docs/rtd/getting_started.rst b/docs/rtd/getting_started.rst
new file mode 100644
index 00000000..2b88bf84
--- /dev/null
+++ b/docs/rtd/getting_started.rst
@@ -0,0 +1,117 @@
+Getting Started
+***************
+
+What is YGM?
+============
+
+YGM is an asynchronous communication library written in C++ and designed for high-performance computing (HPC) use cases featuring 
+irregular communication patterns. YGM includes a collection of
+distributed-memory storage containers designed to express common algorithmic and data-munging tasks. These containers
+automatically partition data, allowing insertions and, with most containers, processing of individual elements to be
+initiated from any runninng YGM process.
+
+Underlying YGM's containers is a communicator abstraction. This communicator asynchronously sends messages spawned by
+senders with receivers needing no knowledge of incoming messages prior to their arrival. YGM communications take the
+form of *active messages*; each message contains a function object to execute (often in the form of C++ lambdas), data
+and/or pointers to data for this function to execute on, and a destination process for the message to be executed at.
+
+YGM also includes a set of I/O primitives for parsing collections of input documents in parallel as independent lines of
+text and streaming output lines to
+large numbers of destination files. Current parsing functionality supports reading input as CSV, ndjson, and
+unstructured lines of data.
+
+General YGM Operations
+======================
+
+YGM is built on its ability to communicate active messages asynchronously between running processes. This does not
+capture every operation that can be useful, for instance collective operations are still widely needed. YGM uses
+prefixes on function names to distinguish their behaviors in terms of the processes involved. These prefixes are:
+   * ``async_``: Asynchronous operation initiated on a single process. The execution of the underlying function may
+     occur on a remote process.
+   * ``local_``: Function performs only local operations on data of the current process. In uses within YGM containers
+     with partitioning schemes that determine item ownership, care must be taken to ensure the process a ``local_``
+     operation is called from aligns with the item's owner. For instance, calling ``ygm::container::map::local_insert``
+     will store an item on the process where the call is made, but the ``ygm::container::map`` may not be able to look
+     up this location if it is on the wrong process.
+   * No Prefix: Collective operation that must be called from all processes.
+
+The primary workhorse functions in YGM fall into the two categories of ``async_`` and ``for_all`` operations. In an
+``async_`` operation, a lambda is asynchronously sent to a (potentially) remote process for execution. In many cases
+with YGM containers, the lambda being executed is not provided by the user and is instead part of the function itself,
+e.g. ``async_insert`` calls on most containers. A ``for_all`` operation is a collective operation in which a lambda is 
+executed locally on every process while iterating over all locally held items of some YGM object. The items iterated
+over can be items in a YGM container, items coming from a map, filter, or flatten applied to a container, or all lines
+in a collection of files in a YGM I/O parser.
+
+Lambda Capture Rules
+--------------------
+Certain ``async_`` and ``for_all`` operations require users to provide lambdas as part of their executions. The lambdas
+that can be accepted by these two classes of functions follow different rules pertaining to the capturing of variables:
+   * ``async_`` calls cannot capture (most) variables in lambdas. Variables necessary for lambda execution must be
+     provided as arguments to the ``async_`` call. In the event that the data for the lambda resides on the remote
+     process the lambda will execute on, a ``ygm::ygm_ptr`` should be passed as an argument to the ``async_``.
+   * ``for_all`` calls assume lambdas take only the arguments inherently provided by the YGM object being iterated over.
+     All other necessary variables *must* be captured. The types of arguments provided to the lambda can be identified
+     by the ``for_all_args`` type within the YGM object.
+
+These differences in behavior arise from the distinction that ``async_`` lambdas may execute on a remote process, while
+``for_all`` lambdas are guaranteed to execute locally to a process. In the case of ``async_`` operations, the lambda and
+all arguments must be serialized for communication, but C++ does not provide a method for inspection of variables
+captured in the closure of a lambda. In the case of ``for_all`` operations, the execution is equivalent to calling
+`std::for_each <https://en.cppreference.com/w/cpp/algorithm/for_each>`_ on entire collection of items held locally.
+
+Requirements
+============
+
+* C++20 - GCC versions 11 and 12 are tested. Your mileage may vary with other compilers.
+* `Cereal <https://github.com/USCiLab/cereal>`_ - C++ serialization library
+* MPI
+* Optionally, Boost 1.77 to enable Boost.JSON support.  
+
+
+Using YGM with CMake
+====================
+YGM is a header-only library that is easy to incorporate into a project through CMake. Adding the following to
+CMakeLists.txt will install YGM and its dependencies as part of your project:
+
+.. code-block:: CMake
+
+   set(DESIRED_YGM_VERSION 0.6)
+   find_package(ygm ${DESIRED_YGM_VERSION} CONFIG)
+   if (NOT ygm_FOUND)
+       FetchContent_Declare(
+           ygm
+           GIT_REPOSITORY https://github.com/LLNL/ygm
+           GIT_TAG v${DESIRED_YGM_VERSION}
+       )
+       FetchContent_GetProperties(ygm)
+       if (ygm_POPULATED)
+           message(STATUS "Found already populated ygm dependency: "
+                          ${ygm_SOURCE_DIR}
+           )
+       else ()
+           set(JUST_INSTALL_YGM ON)
+           set(YGM_INSTALL ON)
+           FetchContent_Populate(ygm)
+           add_subdirectory(${ygm_SOURCE_DIR} ${ygm_BINARY_DIR})
+           message(STATUS "Cloned ygm dependency " ${ygm_SOURCE_DIR})
+       endif ()
+   else ()
+       message(STATUS "Found installed ygm dependency " ${ygm_DIR})
+   endif ()
+
+License
+=======
+YGM is distributed under the MIT license.
+
+All new contributions must be made under the MIT license.
+
+See `LICENSE-MIT <https://github.com/LLNL/ygm/blob/master/LICENSE-MIT>`_, `NOTICE
+<https://github.com/LLNL/ygm/blob/master/NOTICE>`_, and `COPYRIGHT <https://github.com/LLNL/ygm/blob/master/COPYRIGHT>`_ for
+details.
+
+SPDX-License-Identifier: MIT
+
+Release
+=======
+LLNL-CODE-789122
diff --git a/docs/rtd/index.rst b/docs/rtd/index.rst
index a32c2d2b..0f429a5a 100644
--- a/docs/rtd/index.rst
+++ b/docs/rtd/index.rst
@@ -10,6 +10,7 @@ YGM library documentation
    :maxdepth: 2
    :caption: Contents:
 
+   getting_started
    ygm/comm
    ygm/container
 
@@ -26,4 +27,4 @@ Indices and tables
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`
\ No newline at end of file
+* :ref:`search`
diff --git a/docs/rtd/ygm/comm.rst b/docs/rtd/ygm/comm.rst
index 7c4340a7..440073eb 100644
--- a/docs/rtd/ygm/comm.rst
+++ b/docs/rtd/ygm/comm.rst
@@ -3,10 +3,59 @@
 :code:`ygm::comm` class reference.
 ==================================
 
+Communicator Overview
+=====================
+
 The communicator :code:`ygm::comm` is the central object in YGM.
 The communicator controls an interface to an MPI communicator, and its
 functionality can be modified by additional optional parameters.
 
+Communicator Features:
+   * **Message Buffering** - Increases application throughput at the expense of increased message latency.
+   * **Message Routing** - Extends benefits of message buffering to extremely large HPC allocations.
+   * **Fire-and-Forget RPC Semantics** - A sender provides the function and function arguments for execution on a specified
+     destination rank through an `async` call. This function will complete on the destination rank at an unspecified time
+     in the future, but YGM does not explicitly make the sender aware of this completion.
+
+Communicator Hello World
+========================
+
+Here we will walk through a basic "hello world" YGM program. The [examples directory](/examples/) contains several other
+examples, including many using YGM's storage containers.
+
+To begin, headers for a YGM communicator are needed:
+   
+.. code-block:: C++
+
+   #include <ygm/comm.hpp>
+
+At the beginning of the program, a YGM communicator must be constructed. It will be given ``argc`` and ``argv`` like
+``MPI_Init``.
+
+.. code-block:: C++
+
+   ygm::comm world(&argc, &argv);
+
+Next, we need a lambda to send through YGM. We'll do a simple hello\_world type of lambda.
+
+.. code-block:: C++
+
+   auto hello_world_lambda = [](const std::string &name) {
+	   std::cout << "Hello " << name << std::endl;
+   };
+
+Finally, we use this lambda inside of our `async` calls. In this case, we will have rank 0 send a message to rank 1,
+telling it to greet the world
+
+.. code-block:: C++
+
+   if (world.rank0()) {
+	   world.async(1, hello_world_lambda, std::string("world"));
+   }
+
+The full, compilable version of this example is found `here </examples/hello_world.cpp>`_. Running it prints a single
+"Hello world".
+
 .. doxygenclass:: ygm::comm
   :members:
-  :undoc-members:
\ No newline at end of file
+  :undoc-members:
diff --git a/docs/rtd/ygm/container.rst b/docs/rtd/ygm/container.rst
index 05b3ba85..758c9b74 100644
--- a/docs/rtd/ygm/container.rst
+++ b/docs/rtd/ygm/container.rst
@@ -11,34 +11,68 @@ operations that need to be performed on the data stored in a container while
 abstracting the locality and access details of said data.
 While insiration is taken from STL, the top priority is to provide expressive
 and performant tools within the YGM framework.
-Interaction with containers occurs in one of two classes of operations:
-:code:`for_all` and `async_visit`.
-
-Both classes expect a function as a primary argument, similar to
-:code:`ygm::comm::async`.
-However, the passed function signature must match the contents of the container.
-Value store containers holding :code:`value_type` objects expect the first
-argument of passed functions to address objects with the syntax
-:code:`[](value_type &data_item){}`.
-Key-value store containers expect these functions instead to support separate
-:code:`key_type` (which must be immutable) and :code:`value_type` arguments with
-the syntax :code:`[](key_type key, value_type &value){}`.
-Although all of these operations agree as to how contained objects are addressed
-by functions, the interfaces are subtly different and support additional
-optional features.
+
+Implemented Storage Containers
+======================
+
+The currently implemented containers include a mix of distributed versions of familiar containers and
+distributed-specific containers:
+
+   * ``ygm::container::bag`` - An unordered collection of objects partitioned across processes. Ideally suited for
+     iteration over all items with no capability for identifying or searching for an individual item within the bag.
+   * ``ygm::container::set`` - Analogous to ``std::set``. An unordered collection of unique objects with the ability to iterate
+     and search for individual items. Insertion and iteration are slower than a ``ygm::container::bag``.
+   * ``ygm::container::multiset`` - Analogous to ``std::multiset``. A set where multiple instances of the same object
+     may appear.
+   * ``ygm::container::map`` - Analogous to ``std::map``. A collection of keys with assigned values. Keys and values can
+     be inserted and looked up individually or iterated over collectively.
+   * ``ygm::container::multimap`` - Analogous to ``std::multimap``. A map where keys may appear with multiple values.
+   * ``ygm::container::array`` - A collection of items indexed by an integer type. Items can be inserted and looked up
+     by their index values independently or iterated over collectively. Differs from a ``std::array`` in that sizes do
+     not need to known at compile-time, and a ``ygm::container::array`` can be dynamically resized through a
+     (potentially expensive) function at runtime.
+   * ``ygm::container::counting_set`` - A container for counting occurrences of items. Can be thought of as a
+     ``ygm::container::map`` that maps items to integer counts but optimized for the case of frequent duplication of
+     keys.
+   * ``ygm::container::disjoint_set`` - A distributed disjoint set data structure. Implements asynchronous union
+     operation for maintaining membership of items within mathematical disjoint sets. Eschews the find operation of most
+     disjoint set data structures and instead allows for execution of user-provided lambdas upon successful completion
+     of set merges.
+
+Typical Container Operations
+============================
+
+Most interaction with containers occurs in one of two classes of operations:
+:code:`for_all` and `async_`.
+
+:code:`for_all` Operations
+--------------------------
 
 :code:`for_all`-class operations are barrier-inducing collectives that direct
-ranks to iteratively apply the passed function to all locally-held data.
+ranks to iteratively apply a user-provided function to all locally-held data.
 Functions passed to the :code:`for_all` interface do not support additional
 variadic parameters.
 However, these functions are stored and executed locally on each rank, and so
 can capture objects in rank-local scope.
 
-:code:`async_visit`-class operations provide a mechanism for executing a
-function at a particular piece of data stored within a container.
-YGM handles the creation and invocation of a YGM communicator :code:`async`
-call, freeing the user to consider algorithmic details.
-Not all containers support :code:`async_visit`-class operations.
+:code:`async_` Operations
+-------------------------
+
+Operations prefixed with ``async_`` perform operations on containers that can be spawned from any process and
+execute on the correct process using YGM's asynchronous runtime. The most common `async` operations are:
+
+   * ``async_insert`` - Inserts an item or a key and value, depending on the container being used. The process responsible
+     for storing the inserted object is determined using the container's partitioner. Depending on the container, this
+     partitioner may determine this location using a hash of the item or by heuristics that attempt to evenly spread
+     data across processes (in the case of ``ygm::container::bag``).
+   * ``async_visit`` - Items within YGM containers will be distributed across the universe of running processes. Instead
+     of providing operations to look up this data directly, which would involve a round-trip communication with the
+     process storing the item of interest, most YGM containers provide ``async_visit``. A call to ``async_visit`` takes
+     a function to execute and arguments to pass to the function and asynchronously executes the provided function with
+     arguments that are the item stored in the container and the additional arguments passed to ``async_visit``.
+
+Specific containers may have additional ``async_`` operations (or may be missing some of the above) based on the
+capabilities of the container. Consult the documentation of individual containers for more details.
 
 .. toctree::
    :maxdepth: 2
@@ -53,7 +87,20 @@ Not all containers support :code:`async_visit`-class operations.
    container/multiset
    container/set
 
-YGM also supports adaptor classes and functions that wrap an existing class to
-either add or modify operation functionality.
+YGM Container Example
+=====================
+
+.. literalinclude:: ../../../examples/container/map_visit.cpp
+   :language: C++
+
+Container Transformation Objects
+================================
+
+``ygm::container`` provides a number of transformation objects that can be applied to containers to alter the appearance
+of items passed to ``for_all`` operations without modifying the items within the container itself. The currently
+supported transformation objects are:
 
-.. doxygenfunction:: ygm::container::reduce_by_key_map
\ No newline at end of file
+   * ``filter`` - Filters items in a container to only execute on the portion of the container satisfying a provided
+     boolean function.
+   * ``flatten`` - Extract the elements from tuple-like objects before passing to the user's ``for_all`` function.
+   * ``map`` - Apply a generic function to the container's items before passing to the user's ``for_all`` function.
diff --git a/examples/container/bag_filter.cpp b/examples/container/bag_filter.cpp
index 8dae39c3..928bd1cd 100644
--- a/examples/container/bag_filter.cpp
+++ b/examples/container/bag_filter.cpp
@@ -80,7 +80,7 @@ int main(int argc, char **argv) {
   ygm::container::map<std::string, size_t> word_count(world);
 
   bag3.filter([](std::string s) { return s.size() == 3; })
-      .map([](std::string s) { return std::make_pair(s, size_t(1)); })
+      .transform([](std::string s) { return std::make_pair(s, size_t(1)); })
       .reduce_by_key(word_count, std::plus<size_t>());
 
   word_count.for_all(
@@ -92,7 +92,7 @@ int main(int argc, char **argv) {
     ygm::io::line_parser lp(world, {"dummy"});
 
     lp.filter([](std::string s) { return s.size() == 3; })
-        .map([](std::string s) { return std::make_pair(s, size_t(1)); })
+        .transform([](std::string s) { return std::make_pair(s, size_t(1)); })
         .reduce_by_key(word_count, std::plus<size_t>());
   }
 
diff --git a/examples/container/word_counter.cpp b/examples/container/word_counter.cpp
index 2da77508..9fecdc50 100644
--- a/examples/container/word_counter.cpp
+++ b/examples/container/word_counter.cpp
@@ -60,7 +60,7 @@ int main(int argc, char **argv) {
     to_gather = {"freedom"};
   }
 
-  auto counts = word_counter.key_gather(to_gather);
+  auto counts = word_counter.gather_keys(to_gather);
 
   for (auto &word_count : counts) {
     std::cout << word_count.first << " -> " << word_count.second << std::endl;
diff --git a/include/ygm/collective.hpp b/include/ygm/collective.hpp
index 056f2db1..6c4dd9dd 100644
--- a/include/ygm/collective.hpp
+++ b/include/ygm/collective.hpp
@@ -23,7 +23,7 @@ T prefix_sum(const T &value, const comm &c) {
   T to_return{0};
   c.barrier();
   MPI_Comm mpi_comm = c.get_mpi_comm();
-  ASSERT_MPI(MPI_Exscan(&value, &to_return, 1, detail::mpi_typeof(value),
+  YGM_ASSERT_MPI(MPI_Exscan(&value, &to_return, 1, detail::mpi_typeof(value),
                         MPI_SUM, mpi_comm));
   return to_return;
 }
@@ -42,7 +42,7 @@ T sum(const T &value, const comm &c) {
   T to_return;
   c.barrier();
   MPI_Comm mpi_comm = c.get_mpi_comm();
-  ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(T()),
+  YGM_ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(T()),
                            MPI_SUM, mpi_comm));
   return to_return;
 }
@@ -61,7 +61,7 @@ T min(const T &value, const comm &c) {
   T to_return;
   c.barrier();
   MPI_Comm mpi_comm = c.get_mpi_comm();
-  ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(T()),
+  YGM_ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(T()),
                            MPI_MIN, mpi_comm));
   return to_return;
 }
@@ -80,7 +80,7 @@ T max(const T &value, const comm &c) {
   T to_return;
   c.barrier();
   MPI_Comm mpi_comm = c.get_mpi_comm();
-  ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(T()),
+  YGM_ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(T()),
                            MPI_MAX, mpi_comm));
   return to_return;
 }
@@ -98,7 +98,7 @@ inline bool logical_and(bool value, const comm &c) {
   bool to_return;
   c.barrier();
   MPI_Comm mpi_comm = c.get_mpi_comm();
-  ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(bool()),
+  YGM_ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(bool()),
                            MPI_LAND, mpi_comm));
   return to_return;
 }
@@ -116,7 +116,7 @@ inline bool logical_or(bool value, const comm &c) {
   bool to_return;
   c.barrier();
   MPI_Comm mpi_comm = c.get_mpi_comm();
-  ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(bool()),
+  YGM_ASSERT_MPI(MPI_Allreduce(&value, &to_return, 1, detail::mpi_typeof(bool()),
                            MPI_LOR, mpi_comm));
   return to_return;
 }
@@ -133,7 +133,7 @@ template <typename T>
 void bcast(T &to_bcast, int root, const comm &cm) {
   if constexpr (std::is_trivially_copyable<T>::value &&
                 std::is_standard_layout<T>::value) {
-    ASSERT_MPI(
+    YGM_ASSERT_MPI(
         MPI_Bcast(&to_bcast, sizeof(T), MPI_BYTE, root, cm.get_mpi_comm()));
   } else {
     ygm::detail::byte_vector packed;
@@ -142,13 +142,13 @@ void bcast(T &to_bcast, int root, const comm &cm) {
       oarchive(to_bcast);
     }
     size_t packed_size = packed.size();
-    ASSERT_RELEASE(packed_size < 1024 * 1024 * 1024);
-    ASSERT_MPI(MPI_Bcast(&packed_size, 1, ygm::detail::mpi_typeof(packed_size),
+    YGM_ASSERT_RELEASE(packed_size < 1024 * 1024 * 1024);
+    YGM_ASSERT_MPI(MPI_Bcast(&packed_size, 1, ygm::detail::mpi_typeof(packed_size),
                          root, cm.get_mpi_comm()));
     if (cm.rank() != root) {
       packed.resize(packed_size);
     }
-    ASSERT_MPI(MPI_Bcast(packed.data(), packed_size, MPI_BYTE, root,
+    YGM_ASSERT_MPI(MPI_Bcast(packed.data(), packed_size, MPI_BYTE, root,
                          cm.get_mpi_comm()));
 
     if (cm.rank() != root) {
diff --git a/include/ygm/container/array.hpp b/include/ygm/container/array.hpp
index 14e11bae..ab0cf519 100644
--- a/include/ygm/container/array.hpp
+++ b/include/ygm/container/array.hpp
@@ -12,6 +12,7 @@
 #include <ygm/comm.hpp>
 #include <ygm/container/container_traits.hpp>
 #include <ygm/container/detail/base_async_insert.hpp>
+#include <ygm/container/detail/base_async_reduce.hpp>
 #include <ygm/container/detail/base_async_visit.hpp>
 #include <ygm/container/detail/base_concepts.hpp>
 #include <ygm/container/detail/base_iteration.hpp>
@@ -27,8 +28,10 @@ class array
       public detail::base_misc<array<Value, Index>, std::tuple<Index, Value>>,
       public detail::base_async_visit<array<Value, Index>,
                                       std::tuple<Index, Value>>,
-      public detail::base_iteration<array<Value, Index>,
-                                    std::tuple<Index, Value>> {
+      public detail::base_iteration_key_value<array<Value, Index>,
+                                              std::tuple<Index, Value>>,
+      public detail::base_async_reduce<array<Value, Index>,
+                                       std::tuple<Index, Value>> {
   friend class detail::base_misc<array<Value, Index>, std::tuple<Index, Value>>;
 
  public:
@@ -98,7 +101,7 @@ class array
 
     key_type max_index{0};
     for (const auto& [index, value] : l) {
-      ASSERT_RELEASE(index >= 0);
+      YGM_ASSERT_RELEASE(index >= 0);
       max_index = std::max<key_type>(max_index, index);
     }
 
@@ -126,9 +129,10 @@ class array
   }
 
   template <typename T>
-  array(ygm::comm& comm, const T& t) requires detail::HasForAll<T> &&
-      detail::SingleItemTuple<typename T::for_all_args> &&
-      std::same_as<typename T::for_all_args, std::tuple<mapped_type>>
+  array(ygm::comm& comm, const T& t)
+    requires detail::HasForAll<T> &&
+                 detail::SingleItemTuple<typename T::for_all_args> &&
+                 std::same_as<typename T::for_all_args, std::tuple<mapped_type>>
       : m_comm(comm), pthis(this), m_default_value{}, partitioner(comm, 0) {
     pthis.check(m_comm);
 
@@ -144,17 +148,19 @@ class array
   }
 
   template <typename T>
-  array(ygm::comm& comm, const T& t) requires detail::HasForAll<T> &&
-      detail::SingleItemTuple<typename T::for_all_args> && detail::
-          DoubleItemTuple<std::tuple_element_t<0, typename T::for_all_args>> &&
-      std::convertible_to<
-          std::tuple_element_t<
-              0, std::tuple_element_t<0, typename T::for_all_args>>,
-          key_type> &&
-      std::convertible_to<
-          std::tuple_element_t<
-              1, std::tuple_element_t<0, typename T::for_all_args>>,
-          mapped_type>
+  array(ygm::comm& comm, const T& t)
+    requires detail::HasForAll<T> &&
+                 detail::SingleItemTuple<typename T::for_all_args> &&
+                 detail::DoubleItemTuple<
+                     std::tuple_element_t<0, typename T::for_all_args>> &&
+                 std::convertible_to<
+                     std::tuple_element_t<
+                         0, std::tuple_element_t<0, typename T::for_all_args>>,
+                     key_type> &&
+                 std::convertible_to<
+                     std::tuple_element_t<
+                         1, std::tuple_element_t<0, typename T::for_all_args>>,
+                     mapped_type>
       : m_comm(comm), pthis(this), m_default_value{}, partitioner(comm, 0) {
     pthis.check(m_comm);
 
@@ -175,12 +181,15 @@ class array
   }
 
   template <typename T>
-  array(ygm::comm& comm, const T& t) requires detail::HasForAll<T> &&
-      detail::DoubleItemTuple<typename T::for_all_args> && std::convertible_to<
-
-          std::tuple_element_t<0, typename T::for_all_args>, key_type> &&
-      std::convertible_to<std::tuple_element_t<0, typename T::for_all_args>,
-                          mapped_type>
+  array(ygm::comm& comm, const T& t)
+    requires detail::HasForAll<T> &&
+                 detail::DoubleItemTuple<typename T::for_all_args> &&
+                 std::convertible_to<
+                     std::tuple_element_t<0, typename T::for_all_args>,
+                     key_type> &&
+                 std::convertible_to<
+                     std::tuple_element_t<0, typename T::for_all_args>,
+                     mapped_type>
       : m_comm(comm), pthis(this), m_default_value{}, partitioner(comm, 0) {
     pthis.check(m_comm);
 
@@ -201,9 +210,10 @@ class array
   }
 
   template <typename T>
-  array(ygm::comm& comm, const T& t) requires detail::STLContainer<T> &&
-      (not detail::SingleItemTuple<typename T::value_type>)&&std::
-          convertible_to<typename T::value_type, mapped_type>
+  array(ygm::comm& comm, const T& t)
+    requires detail::STLContainer<T> &&
+                 (not detail::SingleItemTuple<typename T::value_type>) &&
+                 std::convertible_to<typename T::value_type, mapped_type>
       : m_comm(comm), pthis(this), m_default_value{}, partitioner(comm, 0) {
     pthis.check(m_comm);
 
@@ -221,11 +231,15 @@ class array
   }
 
   template <typename T>
-  array(ygm::comm& comm, const T& t) requires detail::STLContainer<T> &&
-      detail::DoubleItemTuple<typename T::value_type> && std::convertible_to<
-          std::tuple_element_t<0, typename T::value_type>, key_type> &&
-      std::convertible_to<std::tuple_element_t<1, typename T::value_type>,
-                          mapped_type>
+  array(ygm::comm& comm, const T& t)
+    requires detail::STLContainer<T> &&
+                 detail::DoubleItemTuple<typename T::value_type> &&
+                 std::convertible_to<
+                     std::tuple_element_t<0, typename T::value_type>,
+                     key_type> &&
+                 std::convertible_to<
+                     std::tuple_element_t<1, typename T::value_type>,
+                     mapped_type>
       : m_comm(comm), pthis(this), m_default_value{}, partitioner(comm, 0) {
     pthis.check(m_comm);
 
@@ -278,7 +292,7 @@ class array
   void async_binary_op_update_value(const key_type     index,
                                     const mapped_type& value,
                                     const BinaryOp&    b) {
-    ASSERT_RELEASE(index < m_global_size);
+    YGM_ASSERT_RELEASE(index < m_global_size);
     auto updater = [](const key_type i, mapped_type& v,
                       const mapped_type& new_value) {
       BinaryOp* binary_op;
@@ -326,7 +340,7 @@ class array
 
   template <typename UnaryOp>
   void async_unary_op_update_value(const key_type index, const UnaryOp& u) {
-    ASSERT_RELEASE(index < m_global_size);
+    YGM_ASSERT_RELEASE(index < m_global_size);
     auto updater = [](const key_type i, mapped_type& v) {
       UnaryOp* u;
       v = (*u)(v);
@@ -355,8 +369,10 @@ class array
     std::vector<std::pair<const key_type, const mapped_type>> tmp_values;
     tmp_values.reserve(local_size());
     local_for_all(
-        [&tmp_values](const key_type& index, const mapped_type& value) {
-          tmp_values.push_back(std::make_pair(index, value));
+        [&tmp_values, size](const key_type& index, const mapped_type& value) {
+          if (index < size) {
+            tmp_values.push_back(std::make_pair(index, value));
+          }
         });
 
     m_global_size = size;
@@ -368,14 +384,14 @@ class array
 
     // Repopulate array values
     for (const auto& [index, value] : tmp_values) {
-      if (index < size) {
-        async_set(index, value);
-      }
+      async_set(index, value);
     }
 
     m_comm.barrier();
   }
 
+  void resize(const size_type size) { resize(size, m_default_value); }
+
   size_t local_size() { return partitioner.local_size(); }
 
   size_t size() const {
@@ -383,8 +399,6 @@ class array
     return m_global_size;
   }
 
-  void resize(const size_type size) { resize(size, m_default_value); }
-
   void local_clear() { resize(0); }
 
   void local_swap(self_type& other) {
@@ -413,6 +427,13 @@ class array
     }
   }
 
+  template <typename ReductionOp>
+  void local_reduce(const key_type index, const mapped_type& value,
+                    ReductionOp reducer) {
+    m_local_vec[partitioner.local_index(index)] =
+        reducer(value, m_local_vec[partitioner.local_index(index)]);
+  }
+
   void sort() {
     const key_type samples_per_pivot = std::max<key_type>(
         std::min<key_type>(20, m_global_size / m_comm.size()), 1);
@@ -446,7 +467,8 @@ class array
     }
     m_comm.barrier();
 
-    ASSERT_RELEASE(samples.size() == samples_per_pivot * (m_comm.size() - 1));
+    YGM_ASSERT_RELEASE(samples.size() ==
+                       samples_per_pivot * (m_comm.size() - 1));
     std::sort(samples.begin(), samples.end());
     for (size_t i = samples_per_pivot - 1; i < samples.size();
          i += samples_per_pivot) {
@@ -455,7 +477,7 @@ class array
     samples.clear();
     samples.shrink_to_fit();
 
-    ASSERT_RELEASE(pivots.size() == m_comm.size() - 1);
+    YGM_ASSERT_RELEASE(pivots.size() == m_comm.size() - 1);
 
     //
     // Partition using pivots
diff --git a/include/ygm/container/bag.hpp b/include/ygm/container/bag.hpp
index b5c1ab62..1ed3806c 100644
--- a/include/ygm/container/bag.hpp
+++ b/include/ygm/container/bag.hpp
@@ -21,7 +21,7 @@ template <typename Item>
 class bag : public detail::base_async_insert_value<bag<Item>, std::tuple<Item>>,
             public detail::base_count<bag<Item>, std::tuple<Item>>,
             public detail::base_misc<bag<Item>, std::tuple<Item>>,
-            public detail::base_iteration<bag<Item>, std::tuple<Item>> {
+            public detail::base_iteration_value<bag<Item>, std::tuple<Item>> {
   friend class detail::base_misc<bag<Item>, std::tuple<Item>>;
 
  public:
@@ -37,24 +37,38 @@ class bag : public detail::base_async_insert_value<bag<Item>, std::tuple<Item>>,
 
   bag(ygm::comm &comm, std::initializer_list<Item> l)
       : m_comm(comm), pthis(this), partitioner(comm) {
-    m_comm.cout0("initializer_list assumes all ranks are equal");
     pthis.check(m_comm);
     if (m_comm.rank0()) {
       for (const Item &i : l) {
         async_insert(i);
       }
     }
+    m_comm.barrier();
   }
 
   template <typename STLContainer>
-  bag(ygm::comm &comm, const STLContainer &cont)
+  bag(ygm::comm          &comm,
+      const STLContainer &cont) requires detail::STLContainer<STLContainer> &&
+      std::convertible_to<typename STLContainer::value_type, Item>
       : m_comm(comm), pthis(this), partitioner(comm) {
-    m_comm.cout0("STLContainer assumes all ranks are different");
     pthis.check(m_comm);
 
     for (const Item &i : cont) {
-      async_insert(i);
+      this->async_insert(i);
     }
+    m_comm.barrier();
+  }
+
+  template <typename YGMContainer>
+  bag(ygm::comm          &comm,
+      const YGMContainer &yc) requires detail::HasForAll<YGMContainer> &&
+      detail::SingleItemTuple<typename YGMContainer::for_all_args>
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+
+    yc.for_all([this](const Item &value) { this->async_insert(value); });
+
+    m_comm.barrier();
   }
 
   ~bag() { m_comm.barrier(); }
@@ -228,7 +242,7 @@ class bag : public detail::base_async_insert_value<bag<Item>, std::tuple<Item>>,
 
  private:
   std::vector<value_type> local_pop(int n) {
-    ASSERT_RELEASE(n <= local_size());
+    YGM_ASSERT_RELEASE(n <= local_size());
 
     size_t                  new_size  = local_size() - n;
     auto                    pop_start = m_local_bag.begin() + new_size;
@@ -245,4 +259,4 @@ class bag : public detail::base_async_insert_value<bag<Item>, std::tuple<Item>>,
   typename ygm::ygm_ptr<self_type> pthis;
 };
 
-}  // namespace ygm::container
\ No newline at end of file
+}  // namespace ygm::container
diff --git a/include/ygm/container/bag_orig.hpp b/include/ygm/container/bag_orig.hpp
deleted file mode 100644
index 2ecd6795..00000000
--- a/include/ygm/container/bag_orig.hpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-#include <ygm/container/container_traits.hpp>
-#include <ygm/random.hpp>
-
-namespace ygm::container {
-template <typename Item, typename Alloc = std::allocator<Item>>
-class bag {
- public:
-  using self_type         = bag<Item, Alloc>;
-  using value_type        = Item;
-  using size_type         = size_t;
-  using ygm_for_all_types = std::tuple<Item>;
-  using container_type    = ygm::container::bag_tag;
-
-  bag(ygm::comm &comm);
-  ~bag();
-
-  void async_insert(const value_type &item);
-  void async_insert(const value_type &item, int dest);
-  void async_insert(const std::vector<value_type> &items, int dest);
-
-  template <typename Function>
-  void for_all(Function fn);
-
-  void clear();
-
-  size_type size();
-  size_type local_size();
-
-  void rebalance();
-
-  void swap(self_type &s);
-
-  template <typename RandomFunc>
-  void local_shuffle(RandomFunc &r);
-  void local_shuffle();
-
-  template <typename RandomFunc>
-  void global_shuffle(RandomFunc &r);
-  void global_shuffle();
-
-  template <typename Function>
-  void local_for_all(Function fn);
-
-  ygm::comm &comm();
-
-  void                    serialize(const std::string &fname);
-  void                    deserialize(const std::string &fname);
-  std::vector<value_type> gather_to_vector(int dest);
-  std::vector<value_type> gather_to_vector();
-
- private:
-  std::vector<value_type> local_pop(int n);
-
-  template <typename Function>
-  void local_for_all_pair_types(Function fn);
-
- private:
-  size_t                           m_round_robin = 0;
-  ygm::comm                       &m_comm;
-  std::vector<value_type>          m_local_bag;
-  typename ygm::ygm_ptr<self_type> pthis;
-};
-}  // namespace ygm::container
-
-#include <ygm/container/detail/bag.ipp>
diff --git a/include/ygm/container/counting_set.hpp b/include/ygm/container/counting_set.hpp
index cb1e364b..4bab7ec5 100644
--- a/include/ygm/container/counting_set.hpp
+++ b/include/ygm/container/counting_set.hpp
@@ -19,8 +19,8 @@ template <typename Key>
 class counting_set
     : public detail::base_count<counting_set<Key>, std::tuple<Key, size_t>>,
       public detail::base_misc<counting_set<Key>, std::tuple<Key, size_t>>,
-      public detail::base_iteration<counting_set<Key>,
-                                    std::tuple<Key, size_t>> {
+      public detail::base_iteration_key_value<counting_set<Key>,
+                                              std::tuple<Key, size_t>> {
   friend class detail::base_misc<counting_set<Key>, std::tuple<Key, size_t>>;
 
  public:
@@ -34,24 +34,62 @@ class counting_set
   const size_type count_cache_size = 1024 * 1024;
 
   counting_set(ygm::comm &comm)
-      : m_map(comm /*, mapped_type(0)*/),
-        m_comm(comm),
-        partitioner(m_map.partitioner),
-        pthis(this) {
+      : m_map(comm), m_comm(comm), partitioner(m_map.partitioner), pthis(this) {
+    pthis.check(m_comm);
     m_count_cache.resize(count_cache_size, {key_type(), -1});
   }
 
   counting_set() = delete;
 
-  void async_insert(const key_type &key) { cache_insert(key); }
+  counting_set(ygm::comm &comm, std::initializer_list<Key> l)
+      : m_map(comm), m_comm(comm), partitioner(m_map.partitioner), pthis(this) {
+    pthis.check(m_comm);
+    m_count_cache.resize(count_cache_size, {key_type(), -1});
+    if (m_comm.rank0()) {
+      for (const Key &i : l) {
+        async_insert(i);
+      }
+    }
+    m_comm.barrier();
+  }
 
-  // void async_erase(const key_type& key) { cache_erase(key); }
+  template <typename STLContainer>
+  counting_set(ygm::comm &comm, const STLContainer &cont) requires
+      detail::STLContainer<STLContainer> &&
+      std::convertible_to<typename STLContainer::value_type, Key>
+      : m_map(comm), m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+    m_count_cache.resize(count_cache_size, {key_type(), -1});
+    for (const Key &i : cont) {
+      this->async_insert(i);
+    }
+    m_comm.barrier();
+  }
+
+  template <typename YGMContainer>
+  counting_set(ygm::comm &comm, const YGMContainer &yc) requires
+      detail::HasForAll<YGMContainer> &&
+      detail::SingleItemTuple<typename YGMContainer::for_all_args>
+      : m_map(comm), m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+    m_count_cache.resize(count_cache_size, {key_type(), -1});
+    yc.for_all([this](const Key &value) { this->async_insert(value); });
+
+    m_comm.barrier();
+  }
+
+  void async_insert(const key_type &key) { cache_insert(key); }
 
   template <typename Function>
   void local_for_all(Function fn) {
     m_map.local_for_all(fn);
   }
 
+  template <typename Function>
+  void local_for_all(Function fn) const {
+    m_map.local_for_all(fn);
+  }
+
   void local_clear() {  // What to do here
     m_map.local_clear();
     clear_cache();
@@ -95,9 +133,9 @@ class counting_set
   //   return m_map.all_gather(keys);
   // }
 
-  std::map<key_type, mapped_type> key_gather(
+  std::map<key_type, mapped_type> gather_keys(
       const std::vector<key_type> &keys) {
-    return m_map.key_gather(keys);
+    return m_map.gather_keys(keys);
   }
 
   typename ygm::ygm_ptr<self_type> get_ygm_ptr() const { return pthis; }
@@ -130,12 +168,12 @@ class counting_set
       m_count_cache[slot].second = 1;
     } else {
       // flush slot, fill with key
-      ASSERT_DEBUG(m_count_cache[slot].second > 0);
+      YGM_ASSERT_DEBUG(m_count_cache[slot].second > 0);
       if (m_count_cache[slot].first == key) {
         m_count_cache[slot].second++;
       } else {
         count_cache_flush(slot);
-        ASSERT_DEBUG(m_count_cache[slot].second == -1);
+        YGM_ASSERT_DEBUG(m_count_cache[slot].second == -1);
         m_count_cache[slot].first  = key;
         m_count_cache[slot].second = 1;
       }
@@ -148,7 +186,7 @@ class counting_set
   void count_cache_flush(size_t slot) {
     auto key          = m_count_cache[slot].first;
     auto cached_count = m_count_cache[slot].second;
-    ASSERT_DEBUG(cached_count > 0);
+    YGM_ASSERT_DEBUG(cached_count > 0);
     m_map.async_visit(
         key,
         [](const key_type &key, size_t &count, int32_t to_add) {
diff --git a/include/ygm/container/detail/bag_orig.ipp b/include/ygm/container/detail/bag_orig.ipp
deleted file mode 100644
index f64f1e9c..00000000
--- a/include/ygm/container/detail/bag_orig.ipp
+++ /dev/null
@@ -1,271 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-#include <cereal/archives/json.hpp>
-#include <ygm/collective.hpp>
-#include <ygm/detail/std_traits.hpp>
-
-namespace ygm::container {
-
-template <typename Item, typename Alloc>
-bag<Item, Alloc>::bag(ygm::comm &comm) : m_comm(comm), pthis(this) {
-  pthis.check(m_comm);
-}
-
-template <typename Item, typename Alloc>
-bag<Item, Alloc>::~bag() {
-  m_comm.barrier();
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::async_insert(const value_type &item) {
-  auto inserter = [](auto mailbox, auto map, const value_type &item) {
-    map->m_local_bag.push_back(item);
-  };
-  int dest = (m_round_robin++ + m_comm.rank()) % m_comm.size();
-  m_comm.async(dest, inserter, pthis, item);
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::async_insert(const value_type &item, int dest) {
-  auto inserter = [](auto mailbox, auto map, const value_type &item) {
-    map->m_local_bag.push_back(item);
-  };
-  m_comm.async(dest, inserter, pthis, item);
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::async_insert(const std::vector<value_type> &items,
-                                    int                            dest) {
-  auto inserter = [](auto mailbox, auto map,
-                     const std::vector<value_type> &item) {
-    map->m_local_bag.insert(map->m_local_bag.end(), item.begin(), item.end());
-  };
-  m_comm.async(dest, inserter, pthis, items);
-}
-
-template <typename Item, typename Alloc>
-template <typename Function>
-void bag<Item, Alloc>::for_all(Function fn) {
-  m_comm.barrier();
-  local_for_all(fn);
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::clear() {
-  m_comm.barrier();
-  m_local_bag.clear();
-}
-
-template <typename Item, typename Alloc>
-typename bag<Item, Alloc>::size_type bag<Item, Alloc>::size() {
-  m_comm.barrier();
-  return m_comm.all_reduce_sum(m_local_bag.size());
-}
-
-template <typename Item, typename Alloc>
-typename bag<Item, Alloc>::size_type bag<Item, Alloc>::local_size() {
-  return m_local_bag.size();
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::rebalance() {
-  m_comm.barrier();
-
-  // Find current rank's prefix val and desired target size
-  size_t prefix_val  = ygm::prefix_sum(local_size(), m_comm);
-  size_t target_size = std::ceil((size() * 1.0) / m_comm.size());
-
-  // Init to_send array where index is dest and value is the num to send
-  // int to_send[m_comm.size()] = {0};
-  std::unordered_map<size_t, size_t> to_send;
-
-  auto   global_size      = size();
-  size_t small_block_size = global_size / m_comm.size();
-  size_t large_block_size =
-      global_size / m_comm.size() + ((global_size / m_comm.size()) > 0);
-
-  for (size_t i = 0; i < local_size(); i++) {
-    size_t idx = prefix_val + i;
-    size_t target_rank;
-
-    // Determine target rank to match partitioning in ygm::container::array
-    if (idx < (global_size % m_comm.size()) * large_block_size) {
-      target_rank = idx / large_block_size;
-    } else {
-      target_rank = (global_size % m_comm.size()) +
-                    (idx - (global_size % m_comm.size()) * large_block_size) /
-                        small_block_size;
-    }
-
-    if (target_rank != m_comm.rank()) {
-      to_send[target_rank]++;
-    }
-  }
-  m_comm.barrier();
-
-  // Build and send bag indexes as calculated by to_send
-  for (auto &kv_pair : to_send) {
-    async_insert(local_pop(kv_pair.second), kv_pair.first);
-  }
-
-  m_comm.barrier();
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::swap(self_type &s) {
-  m_comm.barrier();
-  m_local_bag.swap(s.m_local_bag);
-}
-
-template <typename Item, typename Alloc>
-template <typename RandomFunc>
-void bag<Item, Alloc>::local_shuffle(RandomFunc &r) {
-  m_comm.barrier();
-  std::shuffle(m_local_bag.begin(), m_local_bag.end(), r);
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::local_shuffle() {
-  ygm::default_random_engine<> r(m_comm, std::random_device()());
-  local_shuffle(r);
-}
-
-template <typename Item, typename Alloc>
-template <typename RandomFunc>
-void bag<Item, Alloc>::global_shuffle(RandomFunc &r) {
-  m_comm.barrier();
-  std::vector<value_type> old_local_bag;
-  std::swap(old_local_bag, m_local_bag);
-
-  auto send_item = [](auto bag, const value_type &item) {
-    bag->m_local_bag.push_back(item);
-  };
-
-  std::uniform_int_distribution<> distrib(0, m_comm.size() - 1);
-  for (value_type i : old_local_bag) {
-    m_comm.async(distrib(r), send_item, pthis, i);
-  }
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::global_shuffle() {
-  ygm::default_random_engine<> r(m_comm, std::random_device()());
-  global_shuffle(r);
-}
-
-template <typename Item, typename Alloc>
-ygm::comm &bag<Item, Alloc>::comm() {
-  return m_comm;
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::serialize(const std::string &fname) {
-  m_comm.barrier();
-  std::string               rank_fname = fname + std::to_string(m_comm.rank());
-  std::ofstream             os(rank_fname, std::ios::binary);
-  cereal::JSONOutputArchive oarchive(os);
-  oarchive(m_local_bag, m_round_robin, m_comm.size());
-}
-
-template <typename Item, typename Alloc>
-void bag<Item, Alloc>::deserialize(const std::string &fname) {
-  m_comm.barrier();
-
-  std::string   rank_fname = fname + std::to_string(m_comm.rank());
-  std::ifstream is(rank_fname, std::ios::binary);
-
-  cereal::JSONInputArchive iarchive(is);
-  int                      comm_size;
-  iarchive(m_local_bag, m_round_robin, comm_size);
-
-  if (comm_size != m_comm.size()) {
-    m_comm.cerr0(
-        "Attempting to deserialize bag_impl using communicator of "
-        "different size than serialized with");
-  }
-}
-
-template <typename Item, typename Alloc>
-template <typename Function>
-void bag<Item, Alloc>::local_for_all(Function fn) {
-  if constexpr (ygm::detail::is_std_pair<Item>) {
-    local_for_all_pair_types(fn);  // pairs get special handling
-  } else {
-    if constexpr (std::is_invocable<decltype(fn), Item &>()) {
-      std::for_each(m_local_bag.begin(), m_local_bag.end(), fn);
-    } else {
-      static_assert(ygm::detail::always_false<>,
-                    "local bag lambdas must be invocable with (value_type &) "
-                    "signatures");
-    }
-  }
-}
-
-template <typename Item, typename Alloc>
-std::vector<typename bag<Item, Alloc>::value_type>
-bag<Item, Alloc>::gather_to_vector(int dest) {
-  std::vector<value_type> result;
-  auto                    p_res = m_comm.make_ygm_ptr(result);
-  m_comm.barrier();
-  auto gatherer = [](auto res, const std::vector<value_type> &outer_data) {
-    res->insert(res->end(), outer_data.begin(), outer_data.end());
-  };
-  m_comm.async(dest, gatherer, p_res, m_local_bag);
-  m_comm.barrier();
-  return result;
-}
-
-template <typename Item, typename Alloc>
-std::vector<typename bag<Item, Alloc>::value_type>
-bag<Item, Alloc>::gather_to_vector() {
-  std::vector<value_type> result;
-  auto                    p_res = m_comm.make_ygm_ptr(result);
-  m_comm.barrier();
-  auto result0 = gather_to_vector(0);
-  if (m_comm.rank0()) {
-    auto distribute = [](auto res, const std::vector<value_type> &data) {
-      res->insert(res->end(), data.begin(), data.end());
-    };
-    m_comm.async_bcast(distribute, p_res, result0);
-  }
-  m_comm.barrier();
-  return result;
-}
-
-template <typename Item, typename Alloc>
-std::vector<typename bag<Item, Alloc>::value_type> bag<Item, Alloc>::local_pop(
-    int n) {
-  ASSERT_RELEASE(n <= local_size());
-
-  size_t                  new_size  = local_size() - n;
-  auto                    pop_start = m_local_bag.begin() + new_size;
-  std::vector<value_type> ret;
-  ret.assign(pop_start, m_local_bag.end());
-  m_local_bag.resize(new_size);
-  return ret;
-}
-
-template <typename Item, typename Alloc>
-template <typename Function>
-void bag<Item, Alloc>::local_for_all_pair_types(Function fn) {
-  if constexpr (std::is_invocable<decltype(fn), Item &>()) {
-    std::for_each(m_local_bag.begin(), m_local_bag.end(), fn);
-  } else if constexpr (std::is_invocable<decltype(fn),
-                                         typename Item::first_type &,
-                                         typename Item::second_type &>()) {
-    for (auto &kv : m_local_bag) {
-      fn(kv.first, kv.second);
-    }
-  } else {
-    static_assert(ygm::detail::always_false<>,
-                  "local bag<pair> lambdas must be invocable with (pair &) "
-                  "or (pair::first_type &, pair::second_type &) signatures");
-  }
-}
-
-}  // namespace ygm::container
diff --git a/include/ygm/container/detail/base_async_contains.hpp b/include/ygm/container/detail/base_async_contains.hpp
index 6e8fd3e6..2565052c 100644
--- a/include/ygm/container/detail/base_async_contains.hpp
+++ b/include/ygm/container/detail/base_async_contains.hpp
@@ -7,33 +7,35 @@
 
 #include <tuple>
 #include <utility>
+#include <ygm/detail/lambda_compliance.hpp>
+#include <ygm/detail/meta/functional.hpp>
 
 namespace ygm::container::detail {
 
 template <typename derived_type, typename for_all_args>
 struct base_async_contains {
-
-  template<typename Function, typename... FuncArgs>
+  template <typename Function, typename... FuncArgs>
   void async_contains(const std::tuple_element<0, for_all_args>::type& value,
-                             Function fn, const FuncArgs&... args) {
+                      Function fn, const FuncArgs&... args) {
+    YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(Function,
+                                      "ygm::container::async_contains()");
 
     derived_type* derived_this = static_cast<derived_type*>(this);
 
     int dest = derived_this->partitioner.owner(value);
 
-    auto lambda = [](auto pcont,
-                      const std::tuple_element<0, for_all_args>::type& value,
-                      const FuncArgs&... args) {
-
-      Function* fn = nullptr;
+    auto lambda = [fn](auto                                             pcont,
+                       const std::tuple_element<0, for_all_args>::type& value,
+                       const FuncArgs&... args) mutable {
       bool contains = static_cast<bool>(pcont->local_count(value));
-      ygm::meta::apply_optional(*fn, std::make_tuple(pcont),
-                                std::forward_as_tuple(contains, value, args...));
+      ygm::meta::apply_optional(
+          fn, std::make_tuple(pcont),
+          std::forward_as_tuple(contains, value, args...));
     };
 
-    derived_this->comm().async(dest, lambda, derived_this->get_ygm_ptr(),
-                              value, args...);
+    derived_this->comm().async(dest, lambda, derived_this->get_ygm_ptr(), value,
+                               args...);
   }
 };
 
-}  // namespace ygm::container::detail
\ No newline at end of file
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/base_async_insert_contains.hpp b/include/ygm/container/detail/base_async_insert_contains.hpp
index de17cc14..6c26a641 100644
--- a/include/ygm/container/detail/base_async_insert_contains.hpp
+++ b/include/ygm/container/detail/base_async_insert_contains.hpp
@@ -7,37 +7,40 @@
 
 #include <tuple>
 #include <utility>
+#include <ygm/detail/lambda_compliance.hpp>
+#include <ygm/detail/meta/functional.hpp>
 
 namespace ygm::container::detail {
 
 template <typename derived_type, typename for_all_args>
 struct base_async_insert_contains {
-
-  template<typename Function, typename... FuncArgs>
-  void async_insert_contains(const std::tuple_element<0, for_all_args>::type& value,
-                             Function fn, const FuncArgs&... args) {
+  template <typename Function, typename... FuncArgs>
+  void async_insert_contains(
+      const std::tuple_element<0, for_all_args>::type& value, Function fn,
+      const FuncArgs&... args) {
+    YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(
+        Function, "ygm::container::async_insert_contains()");
 
     derived_type* derived_this = static_cast<derived_type*>(this);
 
     int dest = derived_this->partitioner.owner(value);
 
-    auto lambda = [](auto pcont,
-                      const std::tuple_element<0, for_all_args>::type& value,
-                      const FuncArgs&... args) {
-
-      Function* fn = nullptr;
+    auto lambda = [fn](auto                                             pcont,
+                       const std::tuple_element<0, for_all_args>::type& value,
+                       const FuncArgs&... args) mutable {
       bool contains = static_cast<bool>(pcont->local_count(value));
       if (!contains) {
         pcont->local_insert(value);
-      } 
+      }
 
-      ygm::meta::apply_optional(*fn, std::make_tuple(pcont),
-                                std::forward_as_tuple(contains, value, args...));
+      ygm::meta::apply_optional(
+          fn, std::make_tuple(pcont),
+          std::forward_as_tuple(contains, value, args...));
     };
 
-    derived_this->comm().async(dest, lambda, derived_this->get_ygm_ptr(),
-                              value, args...);
+    derived_this->comm().async(dest, lambda, derived_this->get_ygm_ptr(), value,
+                               args...);
   }
 };
 
-}  // namespace ygm::container::detail
\ No newline at end of file
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/base_async_reduce.hpp b/include/ygm/container/detail/base_async_reduce.hpp
index e2d3d2bf..6ae652fd 100644
--- a/include/ygm/container/detail/base_async_reduce.hpp
+++ b/include/ygm/container/detail/base_async_reduce.hpp
@@ -8,6 +8,7 @@
 #include <tuple>
 #include <utility>
 #include <ygm/container/detail/base_concepts.hpp>
+#include <ygm/detail/lambda_compliance.hpp>
 
 namespace ygm::container::detail {
 
@@ -18,20 +19,23 @@ struct base_async_reduce {
       const typename std::tuple_element<0, for_all_args>::type& key,
       const typename std::tuple_element<1, for_all_args>::type& value,
       ReductionOp                                               reducer) {
+    YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(ReductionOp,
+                                      "ygm::container::async_reduce()");
+
     derived_type* derived_this = static_cast<derived_type*>(this);
 
     int dest = derived_this->partitioner.owner(key);
 
-    auto rlambda = [reducer](
-                       auto                                             pcont,
-                       const std::tuple_element<0, for_all_args>::type& key,
-                       const std::tuple_element<1, for_all_args>::type& value) {
-      pcont->local_reduce(key, value, reducer);
-    };
+    auto rlambda =
+        [reducer](
+            auto pcont, const std::tuple_element<0, for_all_args>::type& key,
+            const std::tuple_element<1, for_all_args>::type& value) mutable {
+          pcont->local_reduce(key, value, reducer);
+        };
 
     derived_this->comm().async(dest, rlambda, derived_this->get_ygm_ptr(), key,
                                value);
   }
 };
 
-}  // namespace ygm::container::detail
\ No newline at end of file
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/base_async_visit.hpp b/include/ygm/container/detail/base_async_visit.hpp
index 9a611818..6f9c22c8 100644
--- a/include/ygm/container/detail/base_async_visit.hpp
+++ b/include/ygm/container/detail/base_async_visit.hpp
@@ -9,6 +9,7 @@
 #include <utility>
 #include <ygm/container/detail/base_concepts.hpp>
 #include <ygm/detail/interrupt_mask.hpp>
+#include <ygm/detail/lambda_compliance.hpp>
 
 namespace ygm::container::detail {
 
@@ -19,15 +20,17 @@ struct base_async_visit {
                    Visitor visitor, const VisitorArgs&... args)
     requires DoubleItemTuple<for_all_args>
   {
+    YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(Visitor, "ygm::container::async_visit()");
+
     derived_type* derived_this = static_cast<derived_type*>(this);
 
     int dest = derived_this->partitioner.owner(key);
 
-    auto vlambda = [](auto                                             pcont,
-                      const std::tuple_element<0, for_all_args>::type& key,
-                      const VisitorArgs&... args) {
-      Visitor* vis = nullptr;
-      pcont->local_visit(key, *vis, args...);
+    auto vlambda = [visitor](
+                       auto                                             pcont,
+                       const std::tuple_element<0, for_all_args>::type& key,
+                       const VisitorArgs&... args) mutable {
+      pcont->local_visit(key, visitor, args...);
     };
 
     derived_this->comm().async(dest, vlambda, derived_this->get_ygm_ptr(), key,
@@ -40,15 +43,18 @@ struct base_async_visit {
       const VisitorArgs&... args)
     requires DoubleItemTuple<for_all_args>
   {
+    YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(
+        Visitor, "ygm::container::async_visit_if_contains()");
+
     derived_type* derived_this = static_cast<derived_type*>(this);
 
     int dest = derived_this->partitioner.owner(key);
 
-    auto vlambda = [](auto                                             pcont,
-                      const std::tuple_element<0, for_all_args>::type& key,
-                      const VisitorArgs&... args) {
-      Visitor* vis = nullptr;
-      pcont->local_visit_if_contains(key, *vis, args...);
+    auto vlambda = [visitor](
+                       auto                                             pcont,
+                       const std::tuple_element<0, for_all_args>::type& key,
+                       const VisitorArgs&... args) mutable {
+      pcont->local_visit_if_contains(key, visitor, args...);
     };
 
     derived_this->comm().async(dest, vlambda, derived_this->get_ygm_ptr(), key,
@@ -61,15 +67,18 @@ struct base_async_visit {
       const VisitorArgs&... args) const
     requires DoubleItemTuple<for_all_args>
   {
+    YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(
+        Visitor, "ygm::container::async_visit_if_contains()");
+
     const derived_type* derived_this = static_cast<const derived_type*>(this);
 
     int dest = derived_this->partitioner.owner(key);
 
-    auto vlambda = [](const auto                                       pcont,
-                      const std::tuple_element<0, for_all_args>::type& key,
-                      const VisitorArgs&... args) {
-      Visitor* vis = nullptr;
-      pcont->local_visit_if_contains(key, *vis, args...);
+    auto vlambda = [visitor](
+                       const auto                                       pcont,
+                       const std::tuple_element<0, for_all_args>::type& key,
+                       const VisitorArgs&... args) mutable {
+      pcont->local_visit_if_contains(key, visitor, args...);
     };
 
     derived_this->comm().async(dest, vlambda, derived_this->get_ygm_ptr(), key,
@@ -79,4 +88,4 @@ struct base_async_visit {
   // todo:   async_insert_visit()
 };
 
-}  // namespace ygm::container::detail
\ No newline at end of file
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/base_batch_erase.hpp b/include/ygm/container/detail/base_batch_erase.hpp
new file mode 100644
index 00000000..2ae842cd
--- /dev/null
+++ b/include/ygm/container/detail/base_batch_erase.hpp
@@ -0,0 +1,135 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <tuple>
+#include <utility>
+#include <ygm/container/detail/base_concepts.hpp>
+
+namespace ygm::container::detail {
+
+template <typename derived_type, typename for_all_args>
+struct base_batch_erase_key {
+  using key_type = std::tuple_element_t<0, for_all_args>;
+
+  template <typename Container>
+  void erase(const Container &cont) requires detail::HasForAll<Container> &&
+      SingleItemTuple<typename Container::for_all_args> && std::convertible_to<
+          std::tuple_element_t<0, typename Container::for_all_args>, key_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    cont.for_all(
+        [derived_this](const auto &key) { derived_this->async_erase(key); });
+
+    derived_this->comm().barrier();
+  }
+
+  template <typename Container>
+  void erase(const Container &cont) requires STLContainer<Container> &&
+      AtLeastOneItemTuple<for_all_args> &&
+      std::convertible_to<typename Container::value_type, key_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    for (const auto &key : cont) {
+      derived_this->async_erase(key);
+    }
+
+    derived_this->comm().barrier();
+  }
+};
+
+template <typename derived_type, typename for_all_args>
+struct base_batch_erase_key_value {
+  using key_type    = std::tuple_element_t<0, for_all_args>;
+  using mapped_type = std::tuple_element_t<1, for_all_args>;
+
+  template <typename Container>
+  void erase(const Container &cont) requires HasForAll<Container> &&
+      DoubleItemTuple<typename Container::for_all_args> && std::convertible_to<
+          std::tuple_element_t<0, typename Container::for_all_args>,
+          key_type> &&
+      std::convertible_to<
+          std::tuple_element_t<1, typename Container::for_all_args>,
+          mapped_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    cont.for_all([derived_this](const auto &key, const auto &value) {
+      derived_this->async_erase(key, value);
+    });
+
+    derived_this->comm().barrier();
+  }
+
+  template <typename Container>
+  void erase(const Container &cont) requires HasForAll<Container> &&
+      SingleItemTuple<typename Container::for_all_args> && DoubleItemTuple<
+          std::tuple_element_t<0, typename Container::for_all_args>> &&
+      std::convertible_to<
+          std::tuple_element_t<
+              0, std::tuple_element_t<0, typename Container::for_all_args>>,
+          key_type> &&
+      std::convertible_to<
+          std::tuple_element_t<
+              1, std::tuple_element_t<0, typename Container::for_all_args>>,
+          mapped_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    cont.for_all([derived_this](const auto &key_value) {
+      const auto &[key, value] = key_value;
+
+      derived_this->async_erase(key, value);
+    });
+
+    derived_this->comm().barrier();
+  }
+
+  template <typename Container>
+  void erase(const Container &cont) requires STLContainer<Container> &&
+      DoubleItemTuple<typename Container::value_type> && std::convertible_to<
+          std::tuple_element_t<0, typename Container::value_type>, key_type> &&
+      std::convertible_to<
+          std::tuple_element_t<1, typename Container::value_type>,
+          mapped_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    derived_this->comm().barrier();
+
+    for (const auto &key_value : cont) {
+      const auto &[key, value] = key_value;
+      derived_this->async_erase(key, value);
+    }
+
+    derived_this->comm().barrier();
+  }
+
+  // Copies of base_batch_erase_key functions to allow deletions from keys alone
+  template <typename Container>
+  void erase(const Container &cont) requires detail::HasForAll<Container> &&
+      SingleItemTuple<typename Container::for_all_args> && std::convertible_to<
+          std::tuple_element_t<0, typename Container::for_all_args>, key_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    cont.for_all(
+        [derived_this](const auto &key) { derived_this->async_erase(key); });
+
+    derived_this->comm().barrier();
+  }
+
+  template <typename Container>
+  void erase(const Container &cont) requires STLContainer<Container> &&
+      AtLeastOneItemTuple<for_all_args> &&
+      std::convertible_to<typename Container::value_type, key_type> {
+    derived_type *derived_this = static_cast<derived_type *>(this);
+
+    for (const auto &key : cont) {
+      derived_this->async_erase(key);
+    }
+
+    derived_this->comm().barrier();
+  }
+};
+
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/base_concepts.hpp b/include/ygm/container/detail/base_concepts.hpp
index 3881a832..9f82abc2 100644
--- a/include/ygm/container/detail/base_concepts.hpp
+++ b/include/ygm/container/detail/base_concepts.hpp
@@ -11,23 +11,44 @@
 namespace ygm::container::detail {
 
 template <typename T>
-concept SingleItemTuple = requires(T v) {
-  requires std::tuple_size<T>::value == 1;
-};
+concept SingleItemTuple =
+    requires(T v) { requires std::tuple_size<T>::value == 1; };
+
+template <typename T>
+concept DoubleItemTuple =
+    requires(T v) { requires std::tuple_size<T>::value == 2; };
+
+template <typename T>
+concept AtLeastOneItemTuple =
+    requires(T v) { requires std::tuple_size<T>::value >= 1; };
+
+template <typename T>
+concept HasForAll = requires(T v) { typename T::for_all_args; };
 
 template <typename T>
-concept DoubleItemTuple = requires(T v) {
-  requires std::tuple_size<T>::value == 2;
+concept HasAsyncReduceWithReductionOp = requires(T v) {
+  {
+    std::declval<T>().async_reduce(
+        std::declval<typename T::key_type>(),
+        std::declval<typename T::mapped_type>(),
+        [](const typename T::mapped_type a, const typename T::mapped_type b) {
+          return a;
+        })
+  } -> std::same_as<void>;
 };
 
 template <typename T>
-concept AtLeastOneItemTuple = requires(T v) {
-  requires std::tuple_size<T>::value >= 1;
+concept HasAsyncReduceWithoutReductionOp = requires(T v) {
+  {
+    std::declval<T>().async_reduce(std::declval<typename T::key_type>(),
+                                   std::declval<typename T::mapped_type>())
+  } -> std::same_as<void>;
 };
 
 template <typename T>
-concept HasForAll = requires(T v) {
-  typename T::for_all_args;
+concept HasAsyncReduce = requires(T v) {
+  requires HasAsyncReduceWithReductionOp<T> or
+               HasAsyncReduceWithoutReductionOp<T>;
 };
 
 // Copied solution for an STL container concept from
@@ -61,5 +82,4 @@ concept STLContainer = requires(ContainerType a, const ContainerType b) {
   { a.max_size() } -> std::same_as<typename ContainerType::size_type>;
   { a.empty() } -> std::same_as<bool>;
 };
-
 }  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/base_iteration.hpp b/include/ygm/container/detail/base_iteration.hpp
index 7f0e7cd7..51434b09 100644
--- a/include/ygm/container/detail/base_iteration.hpp
+++ b/include/ygm/container/detail/base_iteration.hpp
@@ -6,43 +6,54 @@
 #pragma once
 
 #include <tuple>
+#include <vector>
 #include <ygm/collective.hpp>
 #include <ygm/container/detail/base_concepts.hpp>
 
 namespace ygm::container::detail {
 
 template <typename derived_type, typename FilterFunction>
-class filter_proxy;
+class filter_proxy_value;
+template <typename derived_type, typename FilterFunction>
+class filter_proxy_key_value;
 
-template <typename derived_type, typename MapFunction>
-class map_proxy;
+template <typename derived_type, typename TransformFunction>
+class transform_proxy_value;
+template <typename derived_type, typename TransformFunction>
+class transform_proxy_key_value;
 
 template <typename derived_type>
-class flatten_proxy;
+class flatten_proxy_value;
+template <typename derived_type>
+class flatten_proxy_key_value;
+
+template <typename derived_type, SingleItemTuple for_all_args>
+struct base_iteration_value {
+  using value_type = typename std::tuple_element<0, for_all_args>::type;
 
-template <typename derived_type, typename for_all_args>
-struct base_iteration {
   template <typename Function>
   void for_all(Function fn) {
-    derived_type* derived_this = static_cast<derived_type*>(this);
+    auto* derived_this = static_cast<derived_type*>(this);
     derived_this->comm().barrier();
     derived_this->local_for_all(fn);
   }
 
   template <typename Function>
   void for_all(Function fn) const {
-    const derived_type* derived_this = static_cast<const derived_type*>(this);
+    const auto* derived_this = static_cast<const derived_type*>(this);
     derived_this->comm().barrier();
     derived_this->local_for_all(fn);
   }
 
   template <typename STLContainer>
   void gather(STLContainer& gto, int rank) const {
+    static_assert(
+        std::is_same_v<typename STLContainer::value_type, value_type>);
     // TODO, make an all gather version that defaults to rank = -1 & uses a temp
     // container.
     bool                 all_gather   = (rank == -1);
     static STLContainer* spgto        = &gto;
-    const derived_type*  derived_this = static_cast<const derived_type*>(this);
+    const auto*          derived_this = static_cast<const derived_type*>(this);
     const ygm::comm&     mycomm       = derived_this->comm();
 
     auto glambda = [&mycomm, rank](const auto& value) {
@@ -51,78 +62,284 @@ struct base_iteration {
           value);
     };
 
-    for_all(glambda);
+    derived_this->for_all(glambda);
 
     derived_this->comm().barrier();
   }
 
-  template <typename MergeFunction>
-  std::tuple_element<0, for_all_args>::type reduce(MergeFunction merge) const
+  template <typename Compare = std::greater<value_type>>
+  std::vector<value_type> gather_topk(
+      size_t k, Compare comp = std::greater<value_type>()) const
     requires SingleItemTuple<for_all_args>
   {
-    const derived_type* derived_this = static_cast<const derived_type*>(this);
+    const auto*      derived_this = static_cast<const derived_type*>(this);
+    const ygm::comm& mycomm       = derived_this->comm();
+    std::vector<value_type> local_topk;
+
+    //
+    // Find local top_k
+    for_all([&local_topk, comp, k](const value_type& value) {
+      local_topk.push_back(value);
+      std::sort(local_topk.begin(), local_topk.end(), comp);
+      if (local_topk.size() > k) {
+        local_topk.pop_back();
+      }
+    });
+
+    //
+    // All reduce global top_k
+    auto to_return = mycomm.all_reduce(
+        local_topk, [comp, k](const std::vector<value_type>& va,
+                              const std::vector<value_type>& vb) {
+          std::vector<value_type> out(va.begin(), va.end());
+          out.insert(out.end(), vb.begin(), vb.end());
+          std::sort(out.begin(), out.end(), comp);
+          while (out.size() > k) {
+            out.pop_back();
+          }
+          return out;
+        });
+    return to_return;
+  }
+
+  template <typename MergeFunction>
+  value_type reduce(MergeFunction merge) const {
+    const auto* derived_this = static_cast<const derived_type*>(this);
     derived_this->comm().barrier();
-    ASSERT_RELEASE(derived_this->local_size() >
-                   0);  // empty partition not handled yet
 
     using value_type = typename std::tuple_element<0, for_all_args>::type;
     bool first       = true;
 
-    value_type to_return;
+    value_type local_reduce;
 
-    auto rlambda = [&to_return, &first, &merge](const value_type& value) {
+    auto rlambda = [&local_reduce, &first, &merge](const value_type& value) {
       if (first) {
-        to_return = value;
-        first     = false;
+        local_reduce = value;
+        first        = false;
       } else {
-        to_return = merge(to_return, value);
+        local_reduce = merge(local_reduce, value);
       }
     };
 
     derived_this->for_all(rlambda);
 
-    derived_this->comm().barrier();
+    std::optional<value_type> to_reduce;
+    if (!first) {
+      to_reduce = local_reduce;
+    }
 
-    return ::ygm::all_reduce(to_return, merge, derived_this->comm());
+    std::optional<value_type> to_return =
+        ::ygm::all_reduce(to_reduce, merge, derived_this->comm());
+    YGM_ASSERT_RELEASE(to_return.has_value());
+    return to_return.value();
   }
 
   template <typename YGMContainer>
   void collect(YGMContainer& c) const {
+    const auto* derived_this = static_cast<const derived_type*>(this);
+    auto clambda = [&c](const value_type& item) { c.async_insert(item); };
+    derived_this->for_all(clambda);
+  }
+
+  template <typename MapType, typename ReductionOp>
+  void reduce_by_key(MapType& map, ReductionOp reducer) const {
+    // TODO:  static_assert MapType is ygm::container::map
+    const auto* derived_this = static_cast<const derived_type*>(this);
+    using reduce_key_type    = typename MapType::key_type;
+    using reduce_value_type  = typename MapType::mapped_type;
+    static_assert(std::is_same_v<value_type,
+                                 std::pair<reduce_key_type, reduce_value_type>>,
+                  "value_type must be a std::pair");
+
+    auto rbklambda =
+        [&map, reducer](std::pair<reduce_key_type, reduce_value_type> kvp) {
+          map.async_reduce(kvp.first, kvp.second, reducer);
+        };
+    derived_this->for_all(rbklambda);
+  }
+
+  template <typename TransformFunction>
+  transform_proxy_value<derived_type, TransformFunction> transform(
+      TransformFunction ffn);
+
+  flatten_proxy_value<derived_type> flatten();
+
+  template <typename FilterFunction>
+  filter_proxy_value<derived_type, FilterFunction> filter(FilterFunction ffn);
+
+ private:
+  template <typename STLContainer, typename Value>
+    requires requires(STLContainer stc, Value v) { stc.push_back(v); }
+  static void generic_insert(STLContainer& stc, const Value& value) {
+    stc.push_back(value);
+  }
+
+  template <typename STLContainer, typename Value>
+    requires requires(STLContainer stc, Value v) { stc.insert(v); }
+  static void generic_insert(STLContainer& stc, const Value& value) {
+    stc.insert(value);
+  }
+};
+
+// For Associative Containers
+template <typename derived_type, DoubleItemTuple for_all_args>
+struct base_iteration_key_value {
+  using key_type    = typename std::tuple_element<0, for_all_args>::type;
+  using mapped_type = typename std::tuple_element<1, for_all_args>::type;
+
+  template <typename Function>
+  void for_all(Function fn) {
+    auto* derived_this = static_cast<derived_type*>(this);
+    derived_this->comm().barrier();
+    derived_this->local_for_all(fn);
+  }
+
+  template <typename Function>
+  void for_all(Function fn) const {
+    const auto* derived_this = static_cast<const derived_type*>(this);
+    derived_this->comm().barrier();
+    derived_this->local_for_all(fn);
+  }
+
+  template <typename STLContainer>
+  void gather(STLContainer& gto, int rank) const {
+    static_assert(std::is_same_v<typename STLContainer::value_type,
+                                 std::pair<key_type, mapped_type>>);
+    // TODO, make an all gather version that defaults to rank = -1 & uses a temp
+    // container.
+    bool                 all_gather   = (rank == -1);
+    static STLContainer* spgto        = &gto;
+    const derived_type*  derived_this = static_cast<const derived_type*>(this);
+    const ygm::comm&     mycomm       = derived_this->comm();
+
+    auto glambda = [&mycomm, rank](const key_type&    key,
+                                   const mapped_type& value) {
+      mycomm.async(
+          rank,
+          [](const key_type& key, const mapped_type& value) {
+            generic_insert(*spgto, std::make_pair(key, value));
+          },
+          key, value);
+    };
+
+    derived_this->for_all(glambda);
+
+    derived_this->comm().barrier();
+  }
+
+  template <typename Compare = std::greater<std::pair<key_type, mapped_type>>>
+  std::vector<std::pair<key_type, mapped_type>> gather_topk(
+      size_t k, Compare comp = Compare()) const {
+    const auto*      derived_this = static_cast<const derived_type*>(this);
+    const ygm::comm& mycomm       = derived_this->comm();
+    using vec_type = std::vector<std::pair<key_type, mapped_type>>;
+    vec_type local_topk;
+
+    //
+    // Find local top_k
+    for_all(
+        [&local_topk, comp, k](const key_type& key, const mapped_type& mapped) {
+          local_topk.push_back(std::make_pair(key, mapped));
+          std::sort(local_topk.begin(), local_topk.end(), comp);
+          if (local_topk.size() > k) {
+            local_topk.pop_back();
+          }
+        });
+
+    //
+    // All reduce global top_k
+    auto to_return = mycomm.all_reduce(
+        local_topk, [comp, k](const vec_type& va, const vec_type& vb) {
+          vec_type out(va.begin(), va.end());
+          out.insert(out.end(), vb.begin(), vb.end());
+          std::sort(out.begin(), out.end(), comp);
+          while (out.size() > k) {
+            out.pop_back();
+          }
+          return out;
+        });
+    return to_return;
+  }
+
+  /* Its unclear this makes sense for an associative container.
+  template <typename MergeFunction>
+  std::pair<key_type, mapped_type> reduce(MergeFunction merge) const {
     const derived_type* derived_this = static_cast<const derived_type*>(this);
-    auto clambda = [&c](const std::tuple_element<0, for_all_args>::type& item) {
-      c.async_insert(item);
+    derived_this->comm().barrier();
+
+    bool first = true;
+
+    std::pair<key_type, mapped_type> local_reduce;
+
+    auto rlambda = [&local_reduce, &first,
+                    &merge](const std::pair<key_type, mapped_type>& value) {
+      if (first) {
+        local_reduce = value;
+        first        = false;
+      } else {
+        local_reduce = merge(local_reduce, value);
+      }
+    };
+
+    derived_this->for_all(rlambda);
+
+    std::optional<std::pair<key_type, mapped_type>> to_reduce;
+    if (!first) {  // local partition was empty!
+      to_reduce = std::move(local_reduce);
+    }
+
+    std::optional<std::pair<key_type, mapped_type>> to_return =
+        ::ygm::all_reduce(to_reduce, merge, derived_this->comm());
+    YGM_ASSERT_RELEASE(to_return.has_value());
+    return to_return.value();
+  }
+  */
+
+  template <typename YGMContainer>
+  void collect(YGMContainer& c) const {
+    const auto* derived_this = static_cast<const derived_type*>(this);
+    auto        clambda = [&c](const key_type& key, const mapped_type& value) {
+      c.async_insert(std::make_pair(key, value));
     };
     derived_this->for_all(clambda);
   }
 
   template <typename MapType, typename ReductionOp>
   void reduce_by_key(MapType& map, ReductionOp reducer) const {
-    const derived_type* derived_this = static_cast<const derived_type*>(this);
+    const auto* derived_this = static_cast<const derived_type*>(this);
     // static_assert ygm::map
     using reduce_key_type   = typename MapType::key_type;
     using reduce_value_type = typename MapType::mapped_type;
-    if constexpr (std::tuple_size<for_all_args>::value == 1) {
-      // must be a std::pair
-      auto rbklambda = [&map, reducer](std::pair<reduce_key_type, reduce_value_type> kvp) {
-        map.async_reduce(kvp.first, kvp.second, reducer);
-      };
-      derived_this->for_all(rbklambda);
-    } else {
-      static_assert(std::tuple_size<for_all_args>::value == 2);
-      auto rbklambda = [&map, reducer](const reduce_key_type& key, const reduce_value_type& value) {
-        map.async_reduce(key, value, reducer);
-      };
-      derived_this->for_all(rbklambda);
-    }
+
+    static_assert(std::tuple_size<for_all_args>::value == 2);
+    auto rbklambda = [&map, reducer](const reduce_key_type&   key,
+                                     const reduce_value_type& value) {
+      map.async_reduce(key, value, reducer);
+    };
+    derived_this->for_all(rbklambda);
+  }
+
+  template <typename TransformFunction>
+  transform_proxy_key_value<derived_type, TransformFunction> transform(
+      TransformFunction ffn);
+
+  auto keys() {
+    return transform([](const key_type&    key,
+                        const mapped_type& value) -> key_type { return key; });
   }
 
-  template <typename MapFunction>
-  map_proxy<derived_type, MapFunction> map(MapFunction ffn);
+  auto values() {
+    return transform(
+        [](const key_type& key, const mapped_type& value) -> mapped_type {
+          return value;
+        });
+  }
 
-  flatten_proxy<derived_type> flatten();
+  flatten_proxy_key_value<derived_type> flatten();
 
   template <typename FilterFunction>
-  filter_proxy<derived_type, FilterFunction> filter(FilterFunction ffn);
+  filter_proxy_key_value<derived_type, FilterFunction> filter(
+      FilterFunction ffn);
 
  private:
   template <typename STLContainer, typename Value>
@@ -142,33 +359,64 @@ struct base_iteration {
 
 #include <ygm/container/detail/filter_proxy.hpp>
 #include <ygm/container/detail/flatten_proxy.hpp>
-#include <ygm/container/detail/map_proxy.hpp>
+#include <ygm/container/detail/transform_proxy.hpp>
 
 namespace ygm::container::detail {
 
-template <typename derived_type, typename for_all_args>
-template <typename MapFunction>
-map_proxy<derived_type, MapFunction>
-base_iteration<derived_type, for_all_args>::map(MapFunction ffn) {
-  derived_type* derived_this = static_cast<derived_type*>(this);
-  return map_proxy<derived_type, MapFunction>(*derived_this, ffn);
+template <typename derived_type, SingleItemTuple for_all_args>
+template <typename TransformFunction>
+transform_proxy_value<derived_type, TransformFunction>
+base_iteration_value<derived_type, for_all_args>::transform(
+    TransformFunction ffn) {
+  auto* derived_this = static_cast<derived_type*>(this);
+  return transform_proxy_value<derived_type, TransformFunction>(*derived_this,
+                                                                ffn);
 }
 
-template <typename derived_type, typename for_all_args>
-inline flatten_proxy<derived_type>
-base_iteration<derived_type, for_all_args>::flatten() {
+template <typename derived_type, SingleItemTuple for_all_args>
+inline flatten_proxy_value<derived_type>
+base_iteration_value<derived_type, for_all_args>::flatten() {
   // static_assert(
   //     type_traits::is_vector<std::tuple_element<0, for_all_args>>::value);
-  derived_type* derived_this = static_cast<derived_type*>(this);
-  return flatten_proxy<derived_type>(*derived_this);
+  auto* derived_this = static_cast<derived_type*>(this);
+  return flatten_proxy_value<derived_type>(*derived_this);
 }
 
-template <typename derived_type, typename for_all_args>
+template <typename derived_type, SingleItemTuple for_all_args>
 template <typename FilterFunction>
-filter_proxy<derived_type, FilterFunction>
-base_iteration<derived_type, for_all_args>::filter(FilterFunction ffn) {
-  derived_type* derived_this = static_cast<derived_type*>(this);
-  return filter_proxy<derived_type, FilterFunction>(*derived_this, ffn);
+filter_proxy_value<derived_type, FilterFunction>
+base_iteration_value<derived_type, for_all_args>::filter(FilterFunction ffn) {
+  auto* derived_this = static_cast<derived_type*>(this);
+  return filter_proxy_value<derived_type, FilterFunction>(*derived_this, ffn);
 }
 
-}  // namespace ygm::container::detail
\ No newline at end of file
+template <typename derived_type, DoubleItemTuple for_all_args>
+template <typename TransformFunction>
+transform_proxy_key_value<derived_type, TransformFunction>
+base_iteration_key_value<derived_type, for_all_args>::transform(
+    TransformFunction ffn) {
+  auto* derived_this = static_cast<derived_type*>(this);
+  return transform_proxy_key_value<derived_type, TransformFunction>(
+      *derived_this, ffn);
+}
+
+template <typename derived_type, DoubleItemTuple for_all_args>
+inline flatten_proxy_key_value<derived_type>
+base_iteration_key_value<derived_type, for_all_args>::flatten() {
+  // static_assert(
+  //     type_traits::is_vector<std::tuple_element<0, for_all_args>>::value);
+  auto* derived_this = static_cast<derived_type*>(this);
+  return flatten_proxy_key_value<derived_type>(*derived_this);
+}
+
+template <typename derived_type, DoubleItemTuple for_all_args>
+template <typename FilterFunction>
+filter_proxy_key_value<derived_type, FilterFunction>
+base_iteration_key_value<derived_type, for_all_args>::filter(
+    FilterFunction ffn) {
+  auto* derived_this = static_cast<derived_type*>(this);
+  return filter_proxy_key_value<derived_type, FilterFunction>(*derived_this,
+                                                              ffn);
+}
+
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/block_partitioner.hpp b/include/ygm/container/detail/block_partitioner.hpp
index 83e88a7b..3ff26914 100644
--- a/include/ygm/container/detail/block_partitioner.hpp
+++ b/include/ygm/container/detail/block_partitioner.hpp
@@ -48,7 +48,7 @@ struct block_partitioner {
     int to_return;
     // Owner depends on whether index is before switching to small blocks
     if (index < (m_partitioned_size % m_comm_size) * m_large_block_size) {
-      ASSERT_RELEASE(m_large_block_size > 0);
+      YGM_ASSERT_RELEASE(m_large_block_size > 0);
       to_return = index / m_large_block_size;
     } else {
       if (m_small_block_size == 0) {
@@ -56,26 +56,26 @@ struct block_partitioner {
                   << m_partitioned_size << "\t" << m_comm_size << "\t" << index
                   << std::endl;
       }
-      ASSERT_RELEASE(m_small_block_size > 0);
+      YGM_ASSERT_RELEASE(m_small_block_size > 0);
       to_return =
           (m_partitioned_size % m_comm_size) +
           (index - (m_partitioned_size % m_comm_size) * m_large_block_size) /
               m_small_block_size;
     }
-    ASSERT_RELEASE((to_return >= 0) && (to_return < m_comm_size));
+    YGM_ASSERT_RELEASE((to_return >= 0) && (to_return < m_comm_size));
 
     return to_return;
   }
 
   index_type local_index(const index_type &global_index) {
     index_type to_return = global_index - m_local_start_index;
-    ASSERT_RELEASE((to_return >= 0) && (to_return <= m_small_block_size));
+    YGM_ASSERT_RELEASE((to_return >= 0) && (to_return < m_local_size));
     return to_return;
   }
 
   index_type global_index(const index_type &local_index) {
     index_type to_return = m_local_start_index + local_index;
-    ASSERT_RELEASE(to_return < m_partitioned_size);
+    YGM_ASSERT_RELEASE(to_return < m_partitioned_size);
     return to_return;
   }
 
diff --git a/include/ygm/container/detail/disjoint_set_impl.hpp b/include/ygm/container/detail/disjoint_set_impl.hpp
index 0748ced8..f4a79798 100644
--- a/include/ygm/container/detail/disjoint_set_impl.hpp
+++ b/include/ygm/container/detail/disjoint_set_impl.hpp
@@ -50,7 +50,7 @@ class disjoint_set_impl {
 
    private:
     void increase_rank(const rank_type new_rank) {
-      ASSERT_RELEASE(m_rank < new_rank);
+      YGM_ASSERT_RELEASE(m_rank < new_rank);
       m_rank = new_rank;
 
       // Only called on roots
@@ -184,12 +184,12 @@ class disjoint_set_impl {
       const auto &my_parent = item_data.second.get_parent();
       const auto  my_parent_rank_est =
           item_data.second.get_parent_rank_estimate();
-      ASSERT_RELEASE(my_rank >= merging_rank);
+      YGM_ASSERT_RELEASE(my_rank >= merging_rank);
 
       if (my_rank > merging_rank) {
         return;
       } else {
-        ASSERT_RELEASE(my_rank == merging_rank);
+        YGM_ASSERT_RELEASE(my_rank == merging_rank);
         if (my_parent ==
             my_item) {  // Merging new item onto root. Need to increase rank.
           item_data.second.increase_rank(merging_rank + 1);
@@ -331,12 +331,12 @@ class disjoint_set_impl {
       const auto &my_parent = item_data.second.get_parent();
       const auto  my_parent_rank_est =
           item_data.second.get_parent_rank_estimate();
-      ASSERT_RELEASE(my_rank >= merging_rank);
+      YGM_ASSERT_RELEASE(my_rank >= merging_rank);
 
       if (my_rank > merging_rank) {
         return;
       } else {
-        ASSERT_RELEASE(my_rank == merging_rank);
+        YGM_ASSERT_RELEASE(my_rank == merging_rank);
         if (my_parent == my_item) {  // Has not found new parent
           item_data.second.increase_rank(merging_rank + 1);
         } else {  // Tell merging item about new parent
diff --git a/include/ygm/container/detail/filter_proxy.hpp b/include/ygm/container/detail/filter_proxy.hpp
index 44e3725c..80714a4f 100644
--- a/include/ygm/container/detail/filter_proxy.hpp
+++ b/include/ygm/container/detail/filter_proxy.hpp
@@ -5,17 +5,60 @@
 
 #pragma once
 
-
 namespace ygm::container::detail {
 
 template <typename Container, typename FilterFunction>
-class filter_proxy
-    : public base_iteration<filter_proxy<Container, FilterFunction>,
-                            typename Container::for_all_args> {
+class filter_proxy_value
+    : public base_iteration_value<filter_proxy_value<Container, FilterFunction>,
+                                  typename Container::for_all_args> {
+ public:
+  using for_all_args = typename Container::for_all_args;
+
+  filter_proxy_value(Container& rc, FilterFunction filter)
+      : m_rcontainer(rc), m_filter_fn(filter) {}
+
+  template <typename Function>
+  void for_all(Function fn) {
+    auto flambda = [fn, this](auto&... xs) {
+      bool b = m_filter_fn(std::forward<decltype(xs)>(xs)...);
+      if (b) {
+        fn(std::forward<decltype(xs)>(xs)...);
+      }
+    };
+
+    m_rcontainer.for_all(flambda);
+  }
+
+  template <typename Function>
+  void for_all(Function fn) const {
+    auto flambda = [fn, this](const auto&... xs) {
+      bool b = m_filter_fn(std::forward<decltype(xs)>(xs)...);
+      if (b) {
+        fn(std::forward<decltype(xs)>(xs)...);
+      }
+    };
+
+    m_rcontainer.for_all(flambda);
+  }
+
+  ygm::comm& comm() { return m_rcontainer.comm(); }
+
+  const ygm::comm& comm() const { return m_rcontainer.comm(); }
+
+ private:
+  Container&     m_rcontainer;
+  FilterFunction m_filter_fn;
+};
+
+template <typename Container, typename FilterFunction>
+class filter_proxy_key_value
+    : public base_iteration_key_value<
+          filter_proxy_key_value<Container, FilterFunction>,
+          typename Container::for_all_args> {
  public:
   using for_all_args = typename Container::for_all_args;
 
-  filter_proxy(Container& rc, FilterFunction filter)
+  filter_proxy_key_value(Container& rc, FilterFunction filter)
       : m_rcontainer(rc), m_filter_fn(filter) {}
 
   template <typename Function>
@@ -51,4 +94,4 @@ class filter_proxy
   FilterFunction m_filter_fn;
 };
 
-}
\ No newline at end of file
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/flatten_proxy.hpp b/include/ygm/container/detail/flatten_proxy.hpp
index d8ee4000..8dde155e 100644
--- a/include/ygm/container/detail/flatten_proxy.hpp
+++ b/include/ygm/container/detail/flatten_proxy.hpp
@@ -8,15 +8,15 @@
 namespace ygm::container::detail {
 
 template <typename Container>
-class flatten_proxy
-    : public base_iteration<flatten_proxy<Container>,
-                            std::tuple<std::tuple_element_t<
-                                0, typename Container::for_all_args>>> {
+class flatten_proxy_value
+    : public base_iteration_value<flatten_proxy_value<Container>,
+                                  std::tuple<std::tuple_element_t<
+                                      0, typename Container::for_all_args>>> {
  public:
   using for_all_args =
       std::tuple<std::tuple_element_t<0, typename Container::for_all_args>>;
 
-  flatten_proxy(Container& rc) : m_rcontainer(rc) {}
+  flatten_proxy_value(Container& rc) : m_rcontainer(rc) {}
 
   template <typename Function>
   void for_all(Function fn) {
@@ -52,4 +52,49 @@ class flatten_proxy
   Container& m_rcontainer;
 };
 
+template <typename Container>
+class flatten_proxy_key_value
+    : public base_iteration_key_value<
+          flatten_proxy_value<Container>,
+          std::tuple<
+              std::tuple_element_t<0, typename Container::for_all_args>>> {
+ public:
+  using for_all_args =
+      std::tuple<std::tuple_element_t<0, typename Container::for_all_args>>;
+
+  flatten_proxy_key_value(Container& rc) : m_rcontainer(rc) {}
+
+  template <typename Function>
+  void for_all(Function fn) {
+    auto flambda =
+        [fn](std::tuple_element_t<0, typename Container::for_all_args>&
+                 stlcont) {
+          for (auto& v : stlcont) {
+            fn(v);
+          }
+        };
+
+    m_rcontainer.for_all(flambda);
+  }
+
+  template <typename Function>
+  void for_all(Function fn) const {
+    auto flambda =
+        [fn](std::tuple_element_t<0, typename Container::for_all_args>&
+                 stlcont) {
+          for (const auto& v : stlcont) {
+            fn(v);
+          }
+        };
+
+    m_rcontainer.for_all(flambda);
+  }
+
+  ygm::comm& comm() { return m_rcontainer.comm(); }
+
+  const ygm::comm& comm() const { return m_rcontainer.comm(); }
+
+ private:
+  Container& m_rcontainer;
+};
 }  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/map_impl.hpp b/include/ygm/container/detail/map_impl.hpp
deleted file mode 100644
index 40d617a3..00000000
--- a/include/ygm/container/detail/map_impl.hpp
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-#include <cereal/archives/json.hpp>
-#include <cereal/types/utility.hpp>
-#include <fstream>
-#include <map>
-#include <ygm/comm.hpp>
-#include <ygm/container/container_traits.hpp>
-#include <ygm/container/detail/hash_partitioner.hpp>
-#include <ygm/detail/interrupt_mask.hpp>
-#include <ygm/detail/ygm_ptr.hpp>
-#include <ygm/detail/ygm_traits.hpp>
-
-namespace ygm::container::detail {
-
-template <typename Key, typename Value,
-          typename Partitioner = detail::old_hash_partitioner<Key>,
-          typename Compare     = std::less<Key>,
-          class Alloc          = std::allocator<std::pair<const Key, Value>>>
-class map_impl {
- public:
-  using self_type         = map_impl<Key, Value, Partitioner, Compare, Alloc>;
-  using ptr_type          = typename ygm::ygm_ptr<self_type>;
-  using mapped_type       = Value;
-  using key_type          = Key;
-  using size_type         = size_t;
-  using ygm_for_all_types = std::tuple<Key, Value>;
-  using container_type    = ygm::container::map_tag;
-
-  Partitioner partitioner;
-
-  map_impl(ygm::comm &comm) : m_default_value{}, m_comm(comm), pthis(this) {
-    pthis.check(m_comm);
-  }
-
-  map_impl(ygm::comm &comm, const mapped_type &dv)
-      : m_default_value(dv), m_comm(comm), pthis(this) {
-    pthis.check(m_comm);
-  }
-
-  map_impl(const self_type &rhs)
-      : m_default_value(rhs.m_default_value), m_comm(rhs.m_comm), pthis(this) {
-    m_local_map.insert(std::begin(rhs.m_local_map), std::end(rhs.m_local_map));
-    pthis.check(m_comm);
-  }
-
-  ~map_impl() { m_comm.barrier(); }
-
-  void async_insert_unique(const key_type &key, const mapped_type &value) {
-    auto inserter = [](auto mailbox, auto map, const key_type &key,
-                       const mapped_type &value) {
-      auto itr = map->m_local_map.find(key);
-      if (itr != map->m_local_map.end()) {
-        itr->second = value;
-      } else {
-        map->m_local_map.insert(std::make_pair(key, value));
-      }
-    };
-    int dest = owner(key);
-    m_comm.async(dest, inserter, pthis, key, value);
-  }
-
-  void async_insert_if_missing(const key_type &key, const mapped_type &value) {
-    async_insert_if_missing_else_visit(
-        key, value,
-        [](const key_type &k, const mapped_type &v,
-           const mapped_type &new_value) {});
-  }
-
-  void async_insert_multi(const key_type &key, const mapped_type &value) {
-    auto inserter = [](auto mailbox, auto map, const key_type &key,
-                       const mapped_type &value) {
-      map->m_local_map.insert(std::make_pair(key, value));
-    };
-    int dest = owner(key);
-    m_comm.async(dest, inserter, pthis, key, value);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_visit(const key_type &key, Visitor visitor,
-                   const VisitorArgs &...args) {
-    int  dest          = owner(key);
-    auto visit_wrapper = [](auto pcomm, auto pmap, const key_type &key,
-                            const VisitorArgs &...args) {
-      auto range = pmap->m_local_map.equal_range(key);
-      if (range.first == range.second) {  // check if not in range
-        pmap->m_local_map.insert(std::make_pair(key, pmap->m_default_value));
-        range = pmap->m_local_map.equal_range(key);
-        ASSERT_DEBUG(range.first != range.second);
-      }
-      Visitor *vis = nullptr;
-      pmap->local_visit(key, *vis, args...);
-    };
-
-    m_comm.async(dest, visit_wrapper, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_visit_group(const key_type &key, Visitor visitor,
-                         const VisitorArgs &...args) {
-    int  dest          = owner(key);
-    auto visit_wrapper = [](auto pcomm, auto pmap, const key_type &key,
-                            const VisitorArgs &...args) {
-      auto range = pmap->m_local_map.equal_range(key);
-      if (range.first == range.second) {  // check if not in range
-        pmap->m_local_map.insert(std::make_pair(key, pmap->m_default_value));
-        range = pmap->m_local_map.equal_range(key);
-        ASSERT_DEBUG(range.first != range.second);
-      }
-
-      ygm::detail::interrupt_mask mask(pmap->m_comm);
-
-      Visitor *vis = nullptr;
-      ygm::meta::apply_optional(
-          *vis, std::make_tuple(pmap),
-          std::forward_as_tuple(range.first, range.second, args...));
-    };
-
-    m_comm.async(dest, visit_wrapper, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_visit_if_exists(const key_type &key, Visitor visitor,
-                             const VisitorArgs &...args) {
-    int  dest          = owner(key);
-    auto visit_wrapper = [](auto pcomm, auto pmap, const key_type &key,
-                            const VisitorArgs &...args) {
-      Visitor *vis = nullptr;
-      pmap->local_visit(key, *vis, args...);
-    };
-
-    m_comm.async(dest, visit_wrapper, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_insert_if_missing_else_visit(const key_type    &key,
-                                          const mapped_type &value,
-                                          Visitor            visitor,
-                                          const VisitorArgs &...args) {
-    int  dest                      = owner(key);
-    auto insert_else_visit_wrapper = [](auto pmap, const key_type &key,
-                                        const mapped_type &value,
-                                        const VisitorArgs &...args) {
-      auto itr = pmap->m_local_map.find(key);
-      if (itr == pmap->m_local_map.end()) {
-        pmap->m_local_map.insert(std::make_pair(key, value));
-      } else {
-        Visitor *vis = nullptr;
-        pmap->local_visit(key, *vis, value, args...);
-      }
-    };
-
-    m_comm.async(dest, insert_else_visit_wrapper, pthis, key, value,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename ReductionOp>
-  void async_reduce(const key_type &key, const mapped_type &value,
-                    ReductionOp reducer) {
-    int  dest           = owner(key);
-    auto reduce_wrapper = [](auto pmap, const key_type &key,
-                             const mapped_type &value) {
-      auto itr = pmap->m_local_map.find(key);
-      if (itr == pmap->m_local_map.end()) {
-        pmap->m_local_map.insert(std::make_pair(key, value));
-      } else {
-        ReductionOp *reducer = nullptr;
-        itr->second          = (*reducer)(itr->second, value);
-      }
-    };
-
-    m_comm.async(dest, reduce_wrapper, pthis, key, value);
-  }
-
-  void async_erase(const key_type &key) {
-    int  dest          = owner(key);
-    auto erase_wrapper = [](auto pcomm, auto pmap, const key_type &key) {
-      pmap->local_erase(key);
-    };
-
-    m_comm.async(dest, erase_wrapper, pthis, key);
-  }
-
-  size_t local_count(const key_type &key) { return m_local_map.count(key); }
-
-  template <typename Function>
-  void for_all(Function fn) {
-    m_comm.barrier();
-    local_for_all(fn);
-  }
-
-  void clear() {
-    m_comm.barrier();
-    m_local_map.clear();
-  }
-
-  size_type size() {
-    m_comm.barrier();
-    return m_comm.all_reduce_sum(m_local_map.size());
-  }
-
-  size_t count(const key_type &key) {
-    m_comm.barrier();
-    return m_comm.all_reduce_sum(m_local_map.count(key));
-  }
-
-  // Doesn't swap pthis.
-  // should we check comm is equal? -- probably
-  void swap(self_type &s) {
-    m_comm.barrier();
-    std::swap(m_default_value, s.m_default_value);
-    m_local_map.swap(s.m_local_map);
-  }
-
-  template <typename STLKeyContainer, typename MapKeyValue>
-  void all_gather(const STLKeyContainer &keys, MapKeyValue &output) {
-    ygm::ygm_ptr<MapKeyValue> preturn(&output);
-
-    auto fetcher = [](auto pcomm, int from, const key_type &key, auto pmap,
-                      auto pcont) {
-      auto returner = [](auto pcomm, const key_type &key,
-                         const std::vector<mapped_type> &values, auto pcont) {
-        for (const auto &v : values) {
-          pcont->insert(std::make_pair(key, v));
-        }
-      };
-      auto values = pmap->local_get(key);
-      pcomm->async(from, returner, key, values, pcont);
-    };
-
-    m_comm.barrier();
-    for (const auto &key : keys) {
-      int o = owner(key);
-      m_comm.async(o, fetcher, m_comm.rank(), key, pthis, preturn);
-    }
-    m_comm.barrier();
-  }
-
-  typename ygm::ygm_ptr<self_type> get_ygm_ptr() const { return pthis; }
-
-  void serialize(const std::string &fname) {
-    m_comm.barrier();
-    std::string   rank_fname = fname + std::to_string(m_comm.rank());
-    std::ofstream os(rank_fname, std::ios::binary);
-    cereal::JSONOutputArchive oarchive(os);
-    oarchive(m_local_map, m_default_value, m_comm.size());
-  }
-
-  void deserialize(const std::string &fname) {
-    m_comm.barrier();
-
-    std::string   rank_fname = fname + std::to_string(m_comm.rank());
-    std::ifstream is(rank_fname, std::ios::binary);
-
-    cereal::JSONInputArchive iarchive(is);
-    int                      comm_size;
-    iarchive(m_local_map, m_default_value, comm_size);
-
-    if (comm_size != m_comm.size()) {
-      m_comm.cerr0(
-          "Attempting to deserialize map_impl using communicator of "
-          "different size than serialized with");
-    }
-  }
-
-  int owner(const key_type &key) const {
-    auto [owner, rank] = partitioner(key, m_comm.size(), 1024);
-    return owner;
-  }
-
-  bool is_mine(const key_type &key) const {
-    return owner(key) == m_comm.rank();
-  }
-
-  std::vector<mapped_type> local_get(const key_type &key) {
-    std::vector<mapped_type> to_return;
-
-    auto range = m_local_map.equal_range(key);
-    for (auto itr = range.first; itr != range.second; ++itr) {
-      to_return.push_back(itr->second);
-    }
-
-    return to_return;
-  }
-
-  template <typename Function, typename... VisitorArgs>
-  void local_visit(const key_type &key, Function &fn,
-                   const VisitorArgs &...args) {
-    ygm::detail::interrupt_mask mask(m_comm);
-
-    auto range = m_local_map.equal_range(key);
-    if constexpr (std::is_invocable<decltype(fn), const key_type &,
-                                    mapped_type &, VisitorArgs &...>() ||
-                  std::is_invocable<decltype(fn), ptr_type, const key_type &,
-                                    mapped_type &, VisitorArgs &...>()) {
-      for (auto itr = range.first; itr != range.second; ++itr) {
-        ygm::meta::apply_optional(
-            fn, std::make_tuple(pthis),
-            std::forward_as_tuple(itr->first, itr->second, args...));
-      }
-    } else {
-      static_assert(ygm::detail::always_false<>,
-                    "remote map lambda signature must be invocable with (const "
-                    "&key_type, mapped_type&, ...) or (ptr_type, const "
-                    "&key_type, mapped_type&, ...) signatures");
-    }
-  }
-
-  void local_erase(const key_type &key) { m_local_map.erase(key); }
-
-  void local_clear() { m_local_map.clear(); }
-
-  size_type local_size() const { return m_local_map.size(); }
-
-  size_t local_const(const key_type &k) const { return m_local_map.count(k); }
-
-  ygm::comm &comm() { return m_comm; }
-
-  template <typename Function>
-  void local_for_all(Function fn) {
-    if constexpr (std::is_invocable<decltype(fn), const key_type,
-                                    mapped_type &>()) {
-      for (std::pair<const key_type, mapped_type> &kv : m_local_map) {
-        fn(kv.first, kv.second);
-      }
-    } else {
-      static_assert(ygm::detail::always_false<>,
-                    "local map lambda signature must be invocable with (const "
-                    "&key_type, mapped_type&) signature");
-    }
-  }
-
-  template <typename CompareFunction>
-  std::vector<std::pair<key_type, mapped_type>> topk(size_t          k,
-                                                     CompareFunction cfn) {
-    using vec_type = std::vector<std::pair<key_type, mapped_type>>;
-
-    m_comm.barrier();
-
-    vec_type local_topk;
-    for (const auto &kv : m_local_map) {
-      local_topk.push_back(kv);
-      std::sort(local_topk.begin(), local_topk.end(), cfn);
-      if (local_topk.size() > k) {
-        local_topk.pop_back();
-      }
-    }
-
-    auto to_return = m_comm.all_reduce(
-        local_topk, [cfn, k](const vec_type &va, const vec_type &vb) {
-          vec_type out(va.begin(), va.end());
-          out.insert(out.end(), vb.begin(), vb.end());
-          std::sort(out.begin(), out.end(), cfn);
-          while (out.size() > k) {
-            out.pop_back();
-          }
-          return out;
-        });
-    return to_return;
-  }
-
-  const mapped_type &default_value() const { return m_default_value; }
-
- protected:
-  map_impl() = delete;
-
-  mapped_type                                          m_default_value;
-  std::multimap<key_type, mapped_type, Compare, Alloc> m_local_map;
-  ygm::comm                                           &m_comm;
-  ptr_type                                             pthis;
-};
-}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/map_proxy.hpp b/include/ygm/container/detail/map_proxy.hpp
deleted file mode 100644
index 2dcab046..00000000
--- a/include/ygm/container/detail/map_proxy.hpp
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-#include <functional>
-#include <tuple>
-#include <utility>
-
-namespace ygm::container::detail {
-
-namespace type_traits {
-template <template <typename...> class T, typename U>
-struct is_specialization_of : std::false_type {};
-
-template <template <typename...> class T, typename... Us>
-struct is_specialization_of<T, T<Us...>> : std::true_type {};
-
-template <typename T>
-struct is_vector
-    : is_specialization_of<std::vector, typename std::decay<T>::type> {};
-
-template <typename T>
-struct is_tuple
-    : is_specialization_of<std::tuple, typename std::decay<T>::type> {};
-
-template <class T, bool isTuple>
-struct tuple_wrapper_helper  // T is not a tuple
-{
-  using type = std::tuple<T>;
-};
-
-template <class T>
-struct tuple_wrapper_helper<T, true>  // T is a tuple
-{
-  using type = T;
-};
-
-template <class T>
-struct tuple_wrapper  // T is a tuple
-{
-  using type = tuple_wrapper_helper<T, is_tuple<T>::value>::type;
-};
-}  // namespace type_traits
-
-template <typename Container, typename MapFunction>
-class map_proxy
-    : public base_iteration<
-          map_proxy<Container, MapFunction>,
-          typename type_traits::tuple_wrapper<decltype(std::apply(
-              std::declval<MapFunction>(),
-              std::declval<typename Container::for_all_args>()))>::type> {
- private:
-  using map_function_ret =
-      decltype(std::apply(std::declval<MapFunction>(),
-                          std::declval<typename Container::for_all_args>()));
-
- public:
-  using for_all_args = type_traits::tuple_wrapper<map_function_ret>::type;
-
-  map_proxy(Container& rc, MapFunction filter)
-      : m_rcontainer(rc), m_map_fn(filter) {}
-
-  template <typename Function>
-  void for_all(Function fn) {
-    auto mlambda = [fn, this](auto&... xs) {
-      auto map_result = m_map_fn(std::forward<decltype(xs)>(xs)...);
-      if constexpr (type_traits::is_tuple<decltype(map_result)>::value) {
-        std::apply(fn, map_result);
-      } else {
-        fn(map_result);
-      }
-    };
-
-    m_rcontainer.for_all(mlambda);
-  }
-
-  template <typename Function>
-  void for_all(Function fn) const {
-    auto mlambda = [fn, this](const auto&... xs) {
-      auto map_result = m_map_fn(std::forward<decltype(xs)>(xs)...);
-      if constexpr (type_traits::is_tuple<decltype(map_result)>::value) {
-        std::apply(fn, std::move(map_result));
-      } else {
-        fn(std::move(map_result));
-      }
-    };
-
-    m_rcontainer.for_all(mlambda);
-  }
-
-  ygm::comm& comm() { return m_rcontainer.comm(); }
-
-  const ygm::comm& comm() const { return m_rcontainer.comm(); }
-
- private:
-  Container&  m_rcontainer;
-  MapFunction m_map_fn;
-};
-
-}  // namespace ygm::container::detail
\ No newline at end of file
diff --git a/include/ygm/container/detail/reducing_adapter.hpp b/include/ygm/container/detail/reducing_adapter.hpp
index 6ddce0de..3b578fa4 100644
--- a/include/ygm/container/detail/reducing_adapter.hpp
+++ b/include/ygm/container/detail/reducing_adapter.hpp
@@ -61,7 +61,7 @@ class reducing_adapter {
           m_cache[slot].value = m_reducer(m_cache[slot].value, value);
         } else {
           cache_flush(slot);
-          ASSERT_DEBUG(m_cache[slot].occupied == false);
+          YGM_ASSERT_DEBUG(m_cache[slot].occupied == false);
           m_cache[slot].key      = key;
           m_cache[slot].value    = value;
           m_cache[slot].occupied = true;
@@ -73,7 +73,8 @@ class reducing_adapter {
   void cache_flush(const size_t slot) {
     // Use NLNR for reductions
     int next_dest = m_container.comm().router().next_hop(
-        m_container.owner(m_cache[slot].key), ygm::detail::routing_type::NLNR);
+        m_container.partitioner.owner(m_cache[slot].key),
+        ygm::detail::routing_type::NLNR);
 
     m_container.comm().async(
         next_dest,
diff --git a/include/ygm/container/detail/set_impl.hpp b/include/ygm/container/detail/set_impl.hpp
deleted file mode 100644
index 1a974565..00000000
--- a/include/ygm/container/detail/set_impl.hpp
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-#include <cereal/archives/json.hpp>
-#include <fstream>
-#include <set>
-#include <ygm/comm.hpp>
-#include <ygm/container/container_traits.hpp>
-#include <ygm/container/detail/hash_partitioner.hpp>
-#include <ygm/detail/ygm_ptr.hpp>
-#include <ygm/detail/ygm_traits.hpp>
-
-namespace ygm::container::detail {
-template <
-    typename Key, typename Partitioner = detail::old_hash_partitioner<Key>,
-    typename Compare = std::less<Key>, class Alloc = std::allocator<const Key>>
-class set_impl {
- public:
-  using self_type      = set_impl<Key, Partitioner, Compare, Alloc>;
-  using key_type       = Key;
-  using size_type      = size_t;
-  using container_type = ygm::container::set_tag;
-
-  Partitioner partitioner;
-
-  set_impl(ygm::comm &comm) : m_comm(comm), pthis(this) { pthis.check(m_comm); }
-  set_impl(set_impl &&s) noexcept
-      : m_comm(s.m_comm), pthis(this), m_local_set(std::move(s.m_local_set)) {
-    pthis.check(m_comm);
-  }
-
-  ~set_impl() { m_comm.barrier(); }
-
-  void async_insert_multi(const key_type &key) {
-    auto inserter = [](auto mailbox, auto pset, const key_type &key) {
-      pset->m_local_set.insert(key);
-    };
-    int dest = owner(key);
-    m_comm.async(dest, inserter, pthis, key);
-  }
-
-  void async_insert_unique(const key_type &key) {
-    auto inserter = [](auto mailbox, auto pset, const key_type &key) {
-      if (pset->m_local_set.count(key) == 0) {
-        pset->m_local_set.insert(key);
-      }
-    };
-    int dest = owner(key);
-    m_comm.async(dest, inserter, pthis, key);
-  }
-
-  void async_erase(const key_type &key) {
-    int  dest          = owner(key);
-    auto erase_wrapper = [](auto pcomm, auto pset, const key_type &key) {
-      pset->m_local_set.erase(key);
-    };
-
-    m_comm.async(dest, erase_wrapper, pthis, key);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_insert_exe_if_missing(const key_type &key, Visitor visitor,
-                                   const VisitorArgs &...args) {
-    auto insert_and_visit = [](auto mailbox, auto pset, const key_type &key,
-                               const VisitorArgs &...args) {
-      if (pset->m_local_set.count(key) == 0) {
-        pset->m_local_set.insert(key);
-        Visitor *vis = nullptr;
-        std::apply(*vis, std::forward_as_tuple(key, args...));
-      }
-    };
-    int dest = owner(key);
-    m_comm.async(dest, insert_and_visit, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_insert_exe_if_contains(const key_type &key, Visitor visitor,
-                                    const VisitorArgs &...args) {
-    auto insert_and_visit = [](auto mailbox, auto pset, const key_type &key,
-                               const VisitorArgs &...args) {
-      if (pset->m_local_set.count(key) == 0) {
-        pset->m_local_set.insert(key);
-      } else {
-        Visitor *vis = nullptr;
-        std::apply(*vis, std::forward_as_tuple(key, args...));
-      }
-    };
-    int dest = owner(key);
-    m_comm.async(dest, insert_and_visit, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_exe_if_missing(const key_type &key, Visitor visitor,
-                            const VisitorArgs &...args) {
-    auto checker = [](auto mailbox, auto pset, const key_type &key,
-                      const VisitorArgs &...args) {
-      if (pset->m_local_set.count(key) == 0) {
-        Visitor *vis = nullptr;
-        std::apply(*vis, std::forward_as_tuple(key, args...));
-      }
-    };
-    int dest = owner(key);
-    m_comm.async(dest, checker, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_exe_if_contains(const key_type &key, Visitor visitor,
-                             const VisitorArgs &...args) {
-    auto checker = [](auto mailbox, auto pset, const key_type &key,
-                      const VisitorArgs &...args) {
-      if (pset->m_local_set.count(key) == 1) {
-        Visitor *vis = nullptr;
-        std::apply(*vis, std::forward_as_tuple(key, args...));
-      }
-    };
-    int dest = owner(key);
-    m_comm.async(dest, checker, pthis, key,
-                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Function>
-  void for_all(Function fn) {
-    m_comm.barrier();
-    local_for_all(fn);
-  }
-
-  template <typename Function>
-  void consume_all(Function fn) {
-    m_comm.barrier();
-    local_consume_all(fn);
-  }
-
-  void clear() {
-    m_comm.barrier();
-    m_local_set.clear();
-  }
-
-  size_type size() {
-    m_comm.barrier();
-    return m_comm.all_reduce_sum(m_local_set.size());
-  }
-
-  size_t count(const key_type &key) {
-    m_comm.barrier();
-    return m_comm.all_reduce_sum(m_local_set.count(key));
-  }
-
-  // Doesn't swap pthis.
-  // should we check comm is equal? -- probably
-  void swap(self_type &s) {
-    m_comm.barrier();
-    m_local_set.swap(s.m_local_set);
-  }
-
-  typename ygm::ygm_ptr<self_type> get_ygm_ptr() const { return pthis; }
-
-  void serialize(const std::string &fname) {
-    m_comm.barrier();
-    std::string   rank_fname = fname + std::to_string(m_comm.rank());
-    std::ofstream os(rank_fname, std::ios::binary);
-    cereal::JSONOutputArchive oarchive(os);
-    oarchive(m_local_set, m_comm.size());
-  }
-
-  void deserialize(const std::string &fname) {
-    m_comm.barrier();
-
-    std::string   rank_fname = fname + std::to_string(m_comm.rank());
-    std::ifstream is(rank_fname, std::ios::binary);
-
-    cereal::JSONInputArchive iarchive(is);
-    int                      comm_size;
-    iarchive(m_local_set, comm_size);
-
-    if (comm_size != m_comm.size()) {
-      m_comm.cerr0(
-          "Attempting to deserialize set_impl using communicator of "
-          "different size than serialized with");
-    }
-  }
-
-  ygm::comm &comm() { return m_comm; }
-
-  template <typename Function>
-  void local_for_all(Function fn) {
-    if constexpr (std::is_invocable<decltype(fn), const key_type &>()) {
-      std::for_each(m_local_set.begin(), m_local_set.end(), fn);
-    } else {
-      static_assert(ygm::detail::always_false<>,
-                    "local set lambda signature must be invocable with (const "
-                    "key_type &) signature");
-    }
-  }
-
-  template <typename Function>
-  void local_consume_all(Function fn) {
-    if constexpr (std::is_invocable<decltype(fn), const key_type &>()) {
-      while (!m_local_set.empty()) {
-        auto tmp = *(m_local_set.begin());
-        m_local_set.erase(m_local_set.begin());
-        fn(tmp);
-      }
-    } else {
-      static_assert(ygm::detail::always_false<>,
-                    "local set lambda signature must be invocable with (const "
-                    "key_type &) signature");
-    }
-  }
-
-  int owner(const key_type &key) const {
-    auto [owner, rank] = partitioner(key, m_comm.size(), 1024);
-    return owner;
-  }
-  set_impl() = delete;
-
-  std::multiset<key_type, Compare, Alloc> m_local_set;
-  ygm::comm                              &m_comm;
-  typename ygm::ygm_ptr<self_type>        pthis;
-};
-}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/transform_proxy.hpp b/include/ygm/container/detail/transform_proxy.hpp
new file mode 100644
index 00000000..67b4946e
--- /dev/null
+++ b/include/ygm/container/detail/transform_proxy.hpp
@@ -0,0 +1,125 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <functional>
+#include <tuple>
+#include <utility>
+#include <ygm/container/detail/type_traits.hpp>
+
+namespace ygm::container::detail {
+
+template <typename Container, typename MapFunction>
+class transform_proxy_value
+    : public base_iteration_value<
+          transform_proxy_value<Container, MapFunction>,
+          typename type_traits::tuple_wrapper<decltype(std::apply(
+              std::declval<MapFunction>(),
+              std::declval<typename Container::for_all_args>()))>::type> {
+ private:
+  using map_function_ret =
+      decltype(std::apply(std::declval<MapFunction>(),
+                          std::declval<typename Container::for_all_args>()));
+
+ public:
+  using for_all_args = type_traits::tuple_wrapper<map_function_ret>::type;
+
+  transform_proxy_value(Container& rc, MapFunction filter)
+      : m_rcontainer(rc), m_map_fn(filter) {}
+
+  template <typename Function>
+  void for_all(Function fn) {
+    auto mlambda = [fn, this](auto&... xs) {
+      auto map_result = m_map_fn(std::forward<decltype(xs)>(xs)...);
+      if constexpr (type_traits::is_tuple<decltype(map_result)>::value) {
+        std::apply(fn, map_result);
+      } else {
+        fn(map_result);
+      }
+    };
+
+    m_rcontainer.for_all(mlambda);
+  }
+
+  template <typename Function>
+  void for_all(Function fn) const {
+    auto mlambda = [fn, this](const auto&... xs) {
+      auto map_result = m_map_fn(std::forward<decltype(xs)>(xs)...);
+      if constexpr (type_traits::is_tuple<decltype(map_result)>::value) {
+        std::apply(fn, std::move(map_result));
+      } else {
+        fn(std::move(map_result));
+      }
+    };
+
+    m_rcontainer.for_all(mlambda);
+  }
+
+  ygm::comm& comm() { return m_rcontainer.comm(); }
+
+  const ygm::comm& comm() const { return m_rcontainer.comm(); }
+
+ private:
+  Container&  m_rcontainer;
+  MapFunction m_map_fn;
+};
+
+template <typename Container, typename MapFunction>
+class transform_proxy_key_value
+    : public base_iteration_value<
+          transform_proxy_key_value<Container, MapFunction>,
+          typename type_traits::tuple_wrapper<decltype(std::apply(
+              std::declval<MapFunction>(),
+              std::declval<typename Container::for_all_args>()))>::type> {
+ private:
+  using map_function_ret =
+      decltype(std::apply(std::declval<MapFunction>(),
+                          std::declval<typename Container::for_all_args>()));
+
+ public:
+  using for_all_args = type_traits::tuple_wrapper<map_function_ret>::type;
+
+  transform_proxy_key_value(Container& rc, MapFunction filter)
+      : m_rcontainer(rc), m_map_fn(filter) {}
+
+  template <typename Function>
+  void for_all(Function fn) {
+    auto mlambda = [fn, this](auto&... xs) {
+      auto map_result = m_map_fn(std::forward<decltype(xs)>(xs)...);
+      if constexpr (type_traits::is_tuple<decltype(map_result)>::value) {
+        std::apply(fn, map_result);
+      } else {
+        fn(map_result);
+      }
+    };
+
+    m_rcontainer.for_all(mlambda);
+  }
+
+  template <typename Function>
+  void for_all(Function fn) const {
+    auto mlambda = [fn, this](const auto&... xs) {
+      auto map_result = m_map_fn(std::forward<decltype(xs)>(xs)...);
+      if constexpr (type_traits::is_tuple<decltype(map_result)>::value) {
+        std::apply(fn, std::move(map_result));
+      } else {
+        fn(std::move(map_result));
+      }
+    };
+
+    m_rcontainer.for_all(mlambda);
+  }
+
+  ygm::comm& comm() { return m_rcontainer.comm(); }
+
+  const ygm::comm& comm() const { return m_rcontainer.comm(); }
+
+ private:
+  Container&  m_rcontainer;
+  MapFunction m_map_fn;
+};
+
+}  // namespace ygm::container::detail
diff --git a/include/ygm/container/detail/type_traits.hpp b/include/ygm/container/detail/type_traits.hpp
new file mode 100644
index 00000000..85b37b5f
--- /dev/null
+++ b/include/ygm/container/detail/type_traits.hpp
@@ -0,0 +1,48 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <functional>
+#include <tuple>
+#include <utility>
+
+namespace ygm::container::detail::type_traits {
+template <template <typename...> class T, typename U>
+struct is_specialization_of : std::false_type {};
+
+template <template <typename...> class T, typename... Us>
+struct is_specialization_of<T, T<Us...>> : std::true_type {};
+
+template <typename T>
+struct is_vector
+    : is_specialization_of<std::vector, typename std::decay<T>::type> {};
+
+template <typename T>
+struct is_tuple
+    : is_specialization_of<std::tuple, typename std::decay<T>::type> {};
+
+template <typename T>
+struct is_pair
+    : is_specialization_of<std::pair, typename std::decay<T>::type> {};
+
+template <class T, bool isTuple>
+struct tuple_wrapper_helper  // T is not a tuple
+{
+  using type = std::tuple<T>;
+};
+
+template <class T>
+struct tuple_wrapper_helper<T, true>  // T is a tuple
+{
+  using type = T;
+};
+
+template <class T>
+struct tuple_wrapper  // T is a tuple
+{
+  using type = tuple_wrapper_helper<T, is_tuple<T>::value>::type;
+};
+}  // namespace ygm::container::detail::type_traits
\ No newline at end of file
diff --git a/include/ygm/container/disjoint_set.hpp b/include/ygm/container/disjoint_set.hpp
index 96ee2c79..0e666ccc 100644
--- a/include/ygm/container/disjoint_set.hpp
+++ b/include/ygm/container/disjoint_set.hpp
@@ -64,6 +64,8 @@ class disjoint_set {
     return m_impl.get_ygm_ptr();
   }
 
+  ygm::comm &comm() { return m_impl.comm(); }
+
  private:
   impl_type m_impl;
 };
diff --git a/include/ygm/container/map.hpp b/include/ygm/container/map.hpp
index 4af07fe8..b3703086 100644
--- a/include/ygm/container/map.hpp
+++ b/include/ygm/container/map.hpp
@@ -13,6 +13,7 @@
 #include <ygm/container/detail/base_async_insert_or_assign.hpp>
 #include <ygm/container/detail/base_async_reduce.hpp>
 #include <ygm/container/detail/base_async_visit.hpp>
+#include <ygm/container/detail/base_batch_erase.hpp>
 #include <ygm/container/detail/base_count.hpp>
 #include <ygm/container/detail/base_iteration.hpp>
 #include <ygm/container/detail/base_misc.hpp>
@@ -33,8 +34,11 @@ class map
                                           std::tuple<Key, Value>>,
       public detail::base_async_erase_key_value<map<Key, Value>,
                                                 std::tuple<Key, Value>>,
+      public detail::base_batch_erase_key_value<map<Key, Value>,
+                                                std::tuple<Key, Value>>,
       public detail::base_async_visit<map<Key, Value>, std::tuple<Key, Value>>,
-      public detail::base_iteration<map<Key, Value>, std::tuple<Key, Value>> {
+      public detail::base_iteration_key_value<map<Key, Value>,
+                                              std::tuple<Key, Value>> {
   friend class detail::base_misc<map<Key, Value>, std::tuple<Key, Value>>;
 
  public:
@@ -48,18 +52,67 @@ class map
 
   map() = delete;
 
-  map(ygm::comm& comm) : m_comm(comm), pthis(this), partitioner(comm) {
+  map(ygm::comm& comm)
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
     pthis.check(m_comm);
   }
 
+  map(ygm::comm& comm, const mapped_type& default_value)
+      : m_comm(comm),
+        pthis(this),
+        partitioner(comm),
+        m_default_value(default_value) {
+    pthis.check(m_comm);
+  }
+
+  map(ygm::comm& comm, std::initializer_list<std::pair<Key, Value>> l)
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
+    pthis.check(m_comm);
+    if (m_comm.rank0()) {
+      for (const std::pair<Key, Value>& i : l) {
+        async_insert(i);
+      }
+    }
+  }
+
+  template <typename STLContainer>
+  map(ygm::comm& comm, const STLContainer& cont)
+    requires detail::STLContainer<STLContainer> &&
+                 std::convertible_to<typename STLContainer::value_type,
+                                     std::pair<Key, Value>>
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
+    pthis.check(m_comm);
+
+    for (const std::pair<Key, Value>& i : cont) {
+      this->async_insert(i);
+    }
+    m_comm.barrier();
+  }
+
+  template <typename YGMContainer>
+  map(ygm::comm& comm, const YGMContainer& yc)
+    requires detail::HasForAll<YGMContainer> &&
+                 detail::SingleItemTuple<typename YGMContainer::for_all_args>
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
+    pthis.check(m_comm);
+
+    yc.for_all([this](const std::pair<Key, Value>& value) {
+      this->async_insert(value);
+    });
+
+    m_comm.barrier();
+  }
+
   ~map() { m_comm.barrier(); }
 
   using detail::base_async_erase_key<map<Key, Value>,
                                      for_all_args>::async_erase;
   using detail::base_async_erase_key_value<map<Key, Value>,
                                            for_all_args>::async_erase;
+  using detail::base_batch_erase_key_value<map<Key, Value>,
+                                           for_all_args>::erase;
 
-  void local_insert(const key_type& key) { m_local_map[key]; }
+  void local_insert(const key_type& key) { local_insert(key, m_default_value); }
 
   void local_erase(const key_type& key) { m_local_map.erase(key); }
 
@@ -150,7 +203,7 @@ class map
   }
 
   template <typename STLKeyContainer>
-  std::map<key_type, mapped_type> key_gather(const STLKeyContainer& keys) {
+  std::map<key_type, mapped_type> gather_keys(const STLKeyContainer& keys) {
     std::map<key_type, mapped_type>         to_return;
     static std::map<key_type, mapped_type>& sto_return = to_return;
 
@@ -195,21 +248,21 @@ class map
     } else {
       static_assert(ygm::detail::always_false<>,
                     "local map lambda signature must be invocable with (const "
-                    "&key_type, mapped_type&) signature");
+                    "key_type&, mapped_type&) signature");
     }
   }
 
   template <typename Function>
   void local_for_all(Function fn) const {
     if constexpr (std::is_invocable<decltype(fn), const key_type,
-                                    mapped_type&>()) {
+                                    const mapped_type&>()) {
       for (const std::pair<const key_type, mapped_type>& kv : m_local_map) {
         fn(kv.first, kv.second);
       }
     } else {
       static_assert(ygm::detail::always_false<>,
                     "local map lambda signature must be invocable with (const "
-                    "&key_type, mapped_type&) signature");
+                    "key_type&, const mapped_type&) signature");
     }
   }
 
@@ -287,6 +340,7 @@ class map
 
   ygm::comm&                                m_comm;
   std::unordered_map<key_type, mapped_type> m_local_map;
+  mapped_type                               m_default_value;
   typename ygm::ygm_ptr<self_type>          pthis;
 };
 
@@ -300,10 +354,12 @@ class multimap
                                           std::tuple<Key, Value>>,
       public detail::base_async_erase_key_value<multimap<Key, Value>,
                                                 std::tuple<Key, Value>>,
+      public detail::base_batch_erase_key_value<multimap<Key, Value>,
+                                                std::tuple<Key, Value>>,
       public detail::base_async_visit<multimap<Key, Value>,
                                       std::tuple<Key, Value>>,
-      public detail::base_iteration<multimap<Key, Value>,
-                                    std::tuple<Key, Value>> {
+      public detail::base_iteration_key_value<multimap<Key, Value>,
+                                              std::tuple<Key, Value>> {
   friend class detail::base_misc<multimap<Key, Value>, std::tuple<Key, Value>>;
 
  public:
@@ -322,24 +378,85 @@ class multimap
 
   multimap() = delete;
 
-  multimap(ygm::comm& comm) : m_comm(comm), pthis(this), partitioner(comm) {
+  multimap(ygm::comm& comm)
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
     pthis.check(m_comm);
   }
 
+  multimap(ygm::comm& comm, const mapped_type& default_value)
+      : m_comm(comm),
+        pthis(this),
+        partitioner(comm),
+        m_default_value(default_value) {
+    pthis.check(m_comm);
+  }
+
+  multimap(ygm::comm& comm, std::initializer_list<std::pair<Key, Value>> l)
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
+    pthis.check(m_comm);
+    if (m_comm.rank0()) {
+      for (const std::pair<Key, Value>& i : l) {
+        async_insert(i);
+      }
+    }
+  }
+
+  template <typename STLContainer>
+  multimap(ygm::comm& comm, const STLContainer& cont)
+    requires detail::STLContainer<STLContainer> &&
+                 std::convertible_to<typename STLContainer::value_type,
+                                     std::pair<Key, Value>>
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
+    pthis.check(m_comm);
+
+    for (const std::pair<Key, Value>& i : cont) {
+      this->async_insert(i);
+    }
+    m_comm.barrier();
+  }
+
+  template <typename YGMContainer>
+  multimap(ygm::comm& comm, const YGMContainer& yc)
+    requires detail::HasForAll<YGMContainer> &&
+                 detail::SingleItemTuple<typename YGMContainer::for_all_args>
+      : m_comm(comm), pthis(this), partitioner(comm), m_default_value() {
+    pthis.check(m_comm);
+
+    yc.for_all([this](const std::pair<Key, Value>& value) {
+      this->async_insert(value);
+    });
+
+    m_comm.barrier();
+  }
+
   ~multimap() { m_comm.barrier(); }
 
   void local_insert(const key_type& key) {
     if (m_local_map.count(key) == 0) {
-      m_local_map.insert({key, mapped_type()});
+      m_local_map.insert({key, m_default_value});
     }
   }
 
   void local_erase(const key_type& key) { m_local_map.erase(key); }
 
   void local_erase(const key_type& key, const key_type& value) {
-    auto itr = m_local_map.find(key);
-    if (itr != m_local_map.end() && itr->second == value) {
-      m_local_map.erase(itr);
+    auto [itr, end]      = m_local_map.equal_range(key);
+    auto to_delete       = itr;
+    bool delete_previous = false;
+    while (itr != end) {
+      if (delete_previous) {
+        m_local_map.erase(to_delete);
+        delete_previous = false;
+      }
+      if (itr->second == value) {
+        to_delete       = itr;
+        delete_previous = true;
+      }
+      ++itr;
+    }
+    if (delete_previous) {
+      m_local_map.erase(to_delete);
+      delete_previous = false;
     }
   }
 
@@ -403,7 +520,7 @@ class multimap
   }
 
   // template <typename STLKeyContainer>
-  // std::map<key_type, mapped_type> key_gather(const STLKeyContainer& keys) {
+  // std::map<key_type, mapped_type> gather_keys(const STLKeyContainer& keys) {
   //   std::map<key_type, mapped_type>         to_return;
   //   static std::map<key_type, mapped_type>& sto_return = to_return;
 
@@ -452,6 +569,20 @@ class multimap
     }
   }
 
+  template <typename Function>
+  void local_for_all(Function fn) const {
+    if constexpr (std::is_invocable<decltype(fn), const key_type,
+                                    mapped_type&>()) {
+      for (const std::pair<const key_type, mapped_type>& kv : m_local_map) {
+        fn(kv.first, kv.second);
+      }
+    } else {
+      static_assert(ygm::detail::always_false<>,
+                    "local map lambda signature must be invocable with (const "
+                    "&key_type, mapped_type&) signature");
+    }
+  }
+
   // void async_insert(const std::pair<key_type, mapped_type>& kv) {
   //   async_insert(kv.first, kv.second);
   // }
@@ -513,6 +644,7 @@ class multimap
 
   ygm::comm&                                     m_comm;
   std::unordered_multimap<key_type, mapped_type> m_local_map;
+  mapped_type                                    m_default_value;
   typename ygm::ygm_ptr<self_type>               pthis;
 };
 
diff --git a/include/ygm/container/old_set.hpp b/include/ygm/container/old_set.hpp
deleted file mode 100644
index 609e9db8..00000000
--- a/include/ygm/container/old_set.hpp
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-#include <ygm/container/container_traits.hpp>
-#include <ygm/container/detail/set_impl.hpp>
-
-namespace ygm::container {
-
-template <typename Key, typename Partitioner = detail::hash_partitioner<Key>,
-          typename Compare = std::less<Key>,
-          class Alloc      = std::allocator<const Key>>
-class multiset {
- public:
-  using self_type         = multiset<Key, Partitioner, Compare, Alloc>;
-  using key_type          = Key;
-  using size_type         = size_t;
-  using ygm_for_all_types = std::tuple<Key>;
-  using impl_type = detail::set_impl<key_type, Partitioner, Compare, Alloc>;
-
-  Partitioner partitioner;
-
-  multiset() = delete;
-
-  multiset(ygm::comm& comm) : m_impl(comm) {}
-
-  void async_insert(const key_type& key) { m_impl.async_insert_multi(key); }
-
-  void async_erase(const key_type& key) { m_impl.async_erase(key); }
-
-  template <typename Function>
-  void for_all(Function fn) {
-    m_impl.for_all(fn);
-  }
-
-  template <typename Function>
-  void consume_all(Function fn) {
-    m_impl.consume_all(fn);
-  }
-
-  void clear() { m_impl.clear(); }
-
-  size_type size() { return m_impl.size(); }
-
-  bool empty() { return m_impl.size() == 0; }
-
-  size_t count(const key_type& key) { return m_impl.count(key); }
-
-  void swap(self_type& s) { return m_impl.swap(s.m_impl); }
-
-  void serialize(const std::string& fname) { m_impl.serialize(fname); }
-  void deserialize(const std::string& fname) { m_impl.deserialize(fname); }
-
-  typename ygm::ygm_ptr<impl_type> get_ygm_ptr() const {
-    return m_impl.get_ygm_ptr();
-  }
-
-  template <typename Function>
-  void local_for_all(Function fn) {
-    m_impl.local_for_all(fn);
-  }
-
-  int owner(const key_type& key) const { return m_impl.owner(key); }
-
-  ygm::comm& comm() { return m_impl.comm(); }
-
- private:
-  impl_type m_impl;
-};
-
-template <typename Key, typename Partitioner = detail::hash_partitioner<Key>,
-          typename Compare = std::less<Key>,
-          class Alloc      = std::allocator<const Key>>
-class set {
- public:
-  using self_type          = set<Key, Partitioner, Compare, Alloc>;
-  using key_type           = Key;
-  using size_type          = size_t;
-  using ygm_container_type = ygm::container::set_tag;
-  using ygm_for_all_types  = std::tuple<Key>;
-  using impl_type = detail::set_impl<key_type, Partitioner, Compare, Alloc>;
-
-  Partitioner partitioner;
-
-  set() = delete;
-
-  set(ygm::comm& comm) : m_impl(comm) {}
-
-  void async_insert(const key_type& key) { m_impl.async_insert_unique(key); }
-
-  void async_erase(const key_type& key) { m_impl.async_erase(key); }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_insert_exe_if_missing(const key_type& key, Visitor visitor,
-                                   const VisitorArgs&... args) {
-    m_impl.async_insert_exe_if_missing(
-        key, visitor, std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_insert_exe_if_contains(const key_type& key, Visitor visitor,
-                                    const VisitorArgs&... args) {
-    m_impl.async_insert_exe_if_contains(
-        key, visitor, std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_exe_if_missing(const key_type& key, Visitor visitor,
-                            const VisitorArgs&... args) {
-    m_impl.async_exe_if_missing(key, visitor,
-                                std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Visitor, typename... VisitorArgs>
-  void async_exe_if_contains(const key_type& key, Visitor visitor,
-                             const VisitorArgs&... args) {
-    m_impl.async_exe_if_contains(key, visitor,
-                                 std::forward<const VisitorArgs>(args)...);
-  }
-
-  template <typename Function>
-  void for_all(Function fn) {
-    m_impl.for_all(fn);
-  }
-
-  template <typename Function>
-  void consume_all(Function fn) {
-    m_impl.consume_all(fn);
-  }
-
-  void clear() { m_impl.clear(); }
-
-  size_type size() { return m_impl.size(); }
-
-  bool empty() { return m_impl.size() == 0; }
-
-  size_t count(const key_type& key) { return m_impl.count(key); }
-
-  void swap(self_type& s) { return m_impl.swap(s.m_impl); }
-
-  void serialize(const std::string& fname) { m_impl.serialize(fname); }
-  void deserialize(const std::string& fname) { m_impl.deserialize(fname); }
-
-  typename ygm::ygm_ptr<impl_type> get_ygm_ptr() const {
-    return m_impl.get_ygm_ptr();
-  }
-
-  template <typename Function>
-  void local_for_all(Function fn) {
-    m_impl.local_for_all(fn);
-  }
-
-  int owner(const key_type& key) const { return m_impl.owner(key); }
-
-  ygm::comm& comm() { return m_impl.comm(); }
-
- private:
-  impl_type m_impl;
-};
-
-}  // namespace ygm::container
diff --git a/include/ygm/container/set.hpp b/include/ygm/container/set.hpp
index 3267a34e..3445643e 100644
--- a/include/ygm/container/set.hpp
+++ b/include/ygm/container/set.hpp
@@ -11,11 +11,11 @@
 #include <ygm/container/detail/base_async_erase.hpp>
 #include <ygm/container/detail/base_async_insert.hpp>
 #include <ygm/container/detail/base_async_insert_contains.hpp>
+#include <ygm/container/detail/base_batch_erase.hpp>
 #include <ygm/container/detail/base_count.hpp>
 #include <ygm/container/detail/base_iteration.hpp>
 #include <ygm/container/detail/base_misc.hpp>
 #include <ygm/container/detail/hash_partitioner.hpp>
-// #include <ygm/container/detail/set_impl.hpp>
 
 namespace ygm::container {
 
@@ -24,12 +24,13 @@ class multiset
     : public detail::base_async_insert_value<multiset<Value>,
                                              std::tuple<Value>>,
       public detail::base_async_erase_key<multiset<Value>, std::tuple<Value>>,
+      public detail::base_batch_erase_key<multiset<Value>, std::tuple<Value>>,
       public detail::base_async_contains<multiset<Value>, std::tuple<Value>>,
       public detail::base_async_insert_contains<multiset<Value>,
                                                 std::tuple<Value>>,
       public detail::base_count<multiset<Value>, std::tuple<Value>>,
       public detail::base_misc<multiset<Value>, std::tuple<Value>>,
-      public detail::base_iteration<multiset<Value>, std::tuple<Value>> {
+      public detail::base_iteration_value<multiset<Value>, std::tuple<Value>> {
   friend class detail::base_misc<multiset<Value>, std::tuple<Value>>;
 
  public:
@@ -59,6 +60,45 @@ class multiset
     pthis.check(m_comm);
   }
 
+  multiset(ygm::comm &comm, std::initializer_list<Value> l)
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+    if (m_comm.rank0()) {
+      for (const Value &i : l) {
+        async_insert(i);
+      }
+    }
+
+    m_comm.barrier();
+  }
+
+  template <typename STLContainer>
+  multiset(ygm::comm &comm, const STLContainer &cont) requires
+      detail::STLContainer<STLContainer> &&
+      std::convertible_to<typename STLContainer::value_type, Value>
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+
+    for (const Value &i : cont) {
+      this->async_insert(i);
+    }
+
+    m_comm.barrier();
+  }
+
+  template <typename YGMContainer>
+  multiset(ygm::comm          &comm,
+           const YGMContainer &yc) requires detail::HasForAll<YGMContainer> &&
+      detail::SingleItemTuple<typename YGMContainer::for_all_args>  //&&
+      // std::same_as<typename TYGMContainer::for_all_args, std::tuple<Value>>
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+
+    yc.for_all([this](const Value &value) { this->async_insert(value); });
+
+    m_comm.barrier();
+  }
+
   ~multiset() { m_comm.barrier(); }
 
   multiset() = delete;
@@ -112,11 +152,12 @@ template <typename Value>
 class set
     : public detail::base_async_insert_value<set<Value>, std::tuple<Value>>,
       public detail::base_async_erase_key<set<Value>, std::tuple<Value>>,
+      public detail::base_batch_erase_key<set<Value>, std::tuple<Value>>,
       public detail::base_async_contains<set<Value>, std::tuple<Value>>,
       public detail::base_async_insert_contains<set<Value>, std::tuple<Value>>,
       public detail::base_count<set<Value>, std::tuple<Value>>,
       public detail::base_misc<set<Value>, std::tuple<Value>>,
-      public detail::base_iteration<set<Value>, std::tuple<Value>> {
+      public detail::base_iteration_value<set<Value>, std::tuple<Value>> {
   friend class detail::base_misc<set<Value>, std::tuple<Value>>;
 
  public:
@@ -146,6 +187,43 @@ class set
     pthis.check(m_comm);
   }
 
+  set(ygm::comm &comm, std::initializer_list<Value> l)
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+    if (m_comm.rank0()) {
+      for (const Value &i : l) {
+        this->async_insert(i);
+      }
+    }
+    m_comm.barrier();
+  }
+
+  template <typename STLContainer>
+  set(ygm::comm          &comm,
+      const STLContainer &cont) requires detail::STLContainer<STLContainer> &&
+      std::convertible_to<typename STLContainer::value_type, Value>
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+
+    for (const Value &i : cont) {
+      this->async_insert(i);
+    }
+    m_comm.barrier();
+  }
+
+  template <typename YGMContainer>
+  set(ygm::comm          &comm,
+      const YGMContainer &yc) requires detail::HasForAll<YGMContainer> &&
+      detail::SingleItemTuple<typename YGMContainer::for_all_args>  //&&
+      // std::same_as<typename TYGMContainer::for_all_args, std::tuple<Value>>
+      : m_comm(comm), pthis(this), partitioner(comm) {
+    pthis.check(m_comm);
+
+    yc.for_all([this](const Value &value) { this->async_insert(value); });
+
+    m_comm.barrier();
+  }
+
   ~set() { m_comm.barrier(); }
 
   set() = delete;
@@ -157,6 +235,8 @@ class set
     return *this;
   }
 
+  using detail::base_batch_erase_key<set<Value>, for_all_args>::erase;
+
   void local_insert(const value_type &val) { m_local_set.insert(val); }
 
   void local_erase(const value_type &val) { m_local_set.erase(val); }
diff --git a/include/ygm/container/tagged_bag.hpp b/include/ygm/container/tagged_bag.hpp
index 1e2c8bc5..470a8a74 100644
--- a/include/ygm/container/tagged_bag.hpp
+++ b/include/ygm/container/tagged_bag.hpp
@@ -99,12 +99,12 @@ class tagged_bag {
   }
 
   // template <typename STLKeyContainer>
-  // std::map<tag_type, value_type> key_gather(const STLKeyContainer &tags) {
+  // std::map<tag_type, value_type> gather_keys(const STLKeyContainer &tags) {
   //   return m_tagged_bag.all_gather(tags);
   // }
 
-  std::map<tag_type, value_type> key_gather(const std::vector<tag_type> &tags) {
-    return m_tagged_bag.key_gather(tags);
+  std::map<tag_type, value_type> gather_keys(const std::vector<tag_type> &tags) {
+    return m_tagged_bag.gather_keys(tags);
   }
   template <typename Function>
   void local_for_all(Function fn) {
diff --git a/include/ygm/detail/assert.hpp b/include/ygm/detail/assert.hpp
index 7e286858..7420d9a7 100644
--- a/include/ygm/detail/assert.hpp
+++ b/include/ygm/detail/assert.hpp
@@ -18,7 +18,7 @@ inline void release_assert_fail(const char *assertion, const char *file,
   throw std::runtime_error(ss.str());
 }
 
-#define ASSERT_MPI(a)                                     \
+#define YGM_ASSERT_MPI(a)                                     \
   {                                                       \
     if (a != MPI_SUCCESS) {                               \
       char *error_string = NULL;                          \
@@ -32,9 +32,9 @@ inline void release_assert_fail(const char *assertion, const char *file,
     }                                                     \
   }
 
-#define ASSERT_DEBUG(expr) assert(expr)
+#define YGM_ASSERT_DEBUG(expr) assert(expr)
 
-#define ASSERT_RELEASE(expr) \
+#define YGM_ASSERT_RELEASE(expr) \
   (static_cast<bool>(expr)   \
        ? void(0)             \
        : release_assert_fail(#expr, __FILE__, __LINE__, ""))
diff --git a/include/ygm/detail/comm.ipp b/include/ygm/detail/comm.ipp
index 9da7401a..3ddd5643 100644
--- a/include/ygm/detail/comm.ipp
+++ b/include/ygm/detail/comm.ipp
@@ -4,8 +4,14 @@
 // SPDX-License-Identifier: MIT
 
 #pragma once
+#include <ygm/detail/lambda_compliance.hpp>
 #include <ygm/detail/meta/functional.hpp>
 #include <ygm/detail/ygm_cereal_archive.hpp>
+<<<<<<< HEAD
+=======
+#include <ygm/version.hpp>
+
+>>>>>>> ygm/v0.7-dev
 namespace ygm {
 
 struct comm::mpi_irecv_request {
@@ -35,7 +41,7 @@ inline comm::comm(MPI_Comm mcomm)
     : m_layout(mcomm), m_router(m_layout, config.routing) {
   pimpl_if.reset();
   int flag(0);
-  ASSERT_MPI(MPI_Initialized(&flag));
+  YGM_ASSERT_MPI(MPI_Initialized(&flag));
   if (!flag) {
     throw std::runtime_error("YGM::COMM ERROR: MPI not initialized");
   }
@@ -43,9 +49,9 @@ inline comm::comm(MPI_Comm mcomm)
 }
 
 inline void comm::comm_setup(MPI_Comm c) {
-  ASSERT_MPI(MPI_Comm_dup(c, &m_comm_async));
-  ASSERT_MPI(MPI_Comm_dup(c, &m_comm_barrier));
-  ASSERT_MPI(MPI_Comm_dup(c, &m_comm_other));
+  YGM_ASSERT_MPI(MPI_Comm_dup(c, &m_comm_async));
+  YGM_ASSERT_MPI(MPI_Comm_dup(c, &m_comm_barrier));
+  YGM_ASSERT_MPI(MPI_Comm_dup(c, &m_comm_other));
 
   m_vec_send_buffers.resize(m_layout.size());
 
@@ -77,6 +83,19 @@ inline void comm::welcome(std::ostream &os) {
        << "RANKS_PER_NODE = " << m_layout.local_size() << "\n"
        << "NUM_NODES      = " << m_layout.node_size() << "\n";
 
+  // Find MPI implementation details
+  char version[MPI_MAX_LIBRARY_VERSION_STRING];
+  int  version_len;
+  MPI_Get_library_version(version, &version_len);
+
+  // Trim MPI details to implementation and version
+  std::string version_string(version, version_len);
+  std::string delimiters{',', '\n'};
+  auto        end = version_string.find_first_of(delimiters);
+
+  sstr << "MPI_LIBRARY    = " << version_string.substr(0, end) << "\n";
+  sstr << "YGM_VERSION    = " << ygm_version << "\n";
+
   config.print(sstr);
 
   if (rank() == 0) {
@@ -111,31 +130,29 @@ inline void comm::stats_print(const std::string &name, std::ostream &os) {
 inline comm::~comm() {
   barrier();
 
-  ASSERT_RELEASE(MPI_Barrier(m_comm_async) == MPI_SUCCESS);
+  YGM_ASSERT_RELEASE(MPI_Barrier(m_comm_async) == MPI_SUCCESS);
 
-  ASSERT_RELEASE(m_send_queue.empty());
-  ASSERT_RELEASE(m_send_dest_queue.empty());
-  ASSERT_RELEASE(m_send_buffer_bytes == 0);
-  ASSERT_RELEASE(m_pending_isend_bytes == 0);
+  YGM_ASSERT_RELEASE(m_send_queue.empty());
+  YGM_ASSERT_RELEASE(m_send_dest_queue.empty());
+  YGM_ASSERT_RELEASE(m_send_buffer_bytes == 0);
+  YGM_ASSERT_RELEASE(m_pending_isend_bytes == 0);
 
   for (size_t i = 0; i < m_recv_queue.size(); ++i) {
-    ASSERT_RELEASE(MPI_Cancel(&(m_recv_queue[i].request)) == MPI_SUCCESS);
+    YGM_ASSERT_RELEASE(MPI_Cancel(&(m_recv_queue[i].request)) == MPI_SUCCESS);
   }
-  ASSERT_RELEASE(MPI_Barrier(m_comm_async) == MPI_SUCCESS);
-  ASSERT_RELEASE(MPI_Comm_free(&m_comm_async) == MPI_SUCCESS);
-  ASSERT_RELEASE(MPI_Comm_free(&m_comm_barrier) == MPI_SUCCESS);
-  ASSERT_RELEASE(MPI_Comm_free(&m_comm_other) == MPI_SUCCESS);
+  YGM_ASSERT_RELEASE(MPI_Barrier(m_comm_async) == MPI_SUCCESS);
+  YGM_ASSERT_RELEASE(MPI_Comm_free(&m_comm_async) == MPI_SUCCESS);
+  YGM_ASSERT_RELEASE(MPI_Comm_free(&m_comm_barrier) == MPI_SUCCESS);
+  YGM_ASSERT_RELEASE(MPI_Comm_free(&m_comm_other) == MPI_SUCCESS);
 
   pimpl_if.reset();
 }
 
 template <typename AsyncFunction, typename... SendArgs>
 inline void comm::async(int dest, AsyncFunction fn, const SendArgs &...args) {
-  static_assert(std::is_trivially_copyable<AsyncFunction>::value &&
-                    std::is_standard_layout<AsyncFunction>::value,
-                "comm::async() AsyncFunction must be is_trivially_copyable & "
-                "is_standard_layout.");
-  ASSERT_RELEASE(dest < m_layout.size());
+  YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(AsyncFunction, "ygm::comm::async()");
+
+  YGM_ASSERT_RELEASE(dest < m_layout.size());
   stats.async(dest);
 
   check_if_production_halt_required();
@@ -184,11 +201,8 @@ inline void comm::async(int dest, AsyncFunction fn, const SendArgs &...args) {
 
 template <typename AsyncFunction, typename... SendArgs>
 inline void comm::async_bcast(AsyncFunction fn, const SendArgs &...args) {
-  static_assert(
-      std::is_trivially_copyable<AsyncFunction>::value &&
-          std::is_standard_layout<AsyncFunction>::value,
-      "comm::async_bcast() AsyncFunction must be is_trivially_copyable & "
-      "is_standard_layout.");
+  YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(AsyncFunction, "ygm::comm::async_bcast()");
+
   check_if_production_halt_required();
 
   pack_lambda_broadcast(fn, std::forward<const SendArgs>(args)...);
@@ -203,11 +217,8 @@ inline void comm::async_bcast(AsyncFunction fn, const SendArgs &...args) {
 template <typename AsyncFunction, typename... SendArgs>
 inline void comm::async_mcast(const std::vector<int> &dests, AsyncFunction fn,
                               const SendArgs &...args) {
-  static_assert(
-      std::is_trivially_copyable<AsyncFunction>::value &&
-          std::is_standard_layout<AsyncFunction>::value,
-      "comm::async_mcast() AsyncFunction must be is_trivially_copyable & "
-      "is_standard_layout.");
+  YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(AsyncFunction, "ygm::comm::async_mcast()");
+
   for (auto dest : dests) {
     async(dest, fn, std::forward<const SendArgs>(args)...);
   }
@@ -241,8 +252,10 @@ inline void comm::barrier() {
       flush_all_local_and_process_incoming();
     }
   }
-  ASSERT_RELEASE(m_pre_barrier_callbacks.empty());
-  ASSERT_RELEASE(m_send_dest_queue.empty());
+  YGM_ASSERT_RELEASE(m_pre_barrier_callbacks.empty());
+  YGM_ASSERT_RELEASE(m_send_dest_queue.empty());
+
+  cf_barrier();
 }
 
 /**
@@ -251,7 +264,7 @@ inline void comm::barrier() {
  * called it. See:  MPI_Barrier()
  */
 inline void comm::cf_barrier() const {
-  ASSERT_MPI(MPI_Barrier(m_comm_barrier));
+  YGM_ASSERT_MPI(MPI_Barrier(m_comm_barrier));
 }
 
 template <typename T>
@@ -269,24 +282,24 @@ inline void comm::register_pre_barrier_callback(
 template <typename T>
 inline T comm::all_reduce_sum(const T &t) const {
   T to_return;
-  ASSERT_MPI(MPI_Allreduce(&t, &to_return, 1, detail::mpi_typeof(T()), MPI_SUM,
-                           m_comm_other));
+  YGM_ASSERT_MPI(MPI_Allreduce(&t, &to_return, 1, detail::mpi_typeof(T()),
+                               MPI_SUM, m_comm_other));
   return to_return;
 }
 
 template <typename T>
 inline T comm::all_reduce_min(const T &t) const {
   T to_return;
-  ASSERT_MPI(MPI_Allreduce(&t, &to_return, 1, detail::mpi_typeof(T()), MPI_MIN,
-                           m_comm_other));
+  YGM_ASSERT_MPI(MPI_Allreduce(&t, &to_return, 1, detail::mpi_typeof(T()),
+                               MPI_MIN, m_comm_other));
   return to_return;
 }
 
 template <typename T>
 inline T comm::all_reduce_max(const T &t) const {
   T to_return;
-  ASSERT_MPI(MPI_Allreduce(&t, &to_return, 1, detail::mpi_typeof(T()), MPI_MAX,
-                           m_comm_other));
+  YGM_ASSERT_MPI(MPI_Allreduce(&t, &to_return, 1, detail::mpi_typeof(T()),
+                               MPI_MAX, m_comm_other));
   return to_return;
 }
 
@@ -333,21 +346,22 @@ inline void comm::mpi_send(const T &data, int dest, int tag,
   cereal::YGMOutputArchive oarchive(packed);
   oarchive(data);
   size_t packed_size = packed.size();
-  ASSERT_RELEASE(packed_size < 1024 * 1024 * 1024);
-  ASSERT_MPI(MPI_Send(&packed_size, 1, detail::mpi_typeof(packed_size), dest,
-                      tag, comm));
-  ASSERT_MPI(MPI_Send(packed.data(), packed_size, MPI_BYTE, dest, tag, comm));
+  YGM_ASSERT_RELEASE(packed_size < 1024 * 1024 * 1024);
+  YGM_ASSERT_MPI(MPI_Send(&packed_size, 1, detail::mpi_typeof(packed_size),
+                          dest, tag, comm));
+  YGM_ASSERT_MPI(
+      MPI_Send(packed.data(), packed_size, MPI_BYTE, dest, tag, comm));
 }
 
 template <typename T>
 inline T comm::mpi_recv(int source, int tag, MPI_Comm comm) const {
   std::vector<std::byte> packed;
   size_t                 packed_size{0};
-  ASSERT_MPI(MPI_Recv(&packed_size, 1, detail::mpi_typeof(packed_size), source,
-                      tag, comm, MPI_STATUS_IGNORE));
+  YGM_ASSERT_MPI(MPI_Recv(&packed_size, 1, detail::mpi_typeof(packed_size),
+                          source, tag, comm, MPI_STATUS_IGNORE));
   packed.resize(packed_size);
-  ASSERT_MPI(MPI_Recv(packed.data(), packed_size, MPI_BYTE, source, tag, comm,
-                      MPI_STATUS_IGNORE));
+  YGM_ASSERT_MPI(MPI_Recv(packed.data(), packed_size, MPI_BYTE, source, tag,
+                          comm, MPI_STATUS_IGNORE));
 
   T                       to_return;
   cereal::YGMInputArchive iarchive(packed.data(), packed.size());
@@ -363,13 +377,13 @@ inline T comm::mpi_bcast(const T &to_bcast, int root, MPI_Comm comm) const {
     oarchive(to_bcast);
   }
   size_t packed_size = packed.size();
-  ASSERT_RELEASE(packed_size < 1024 * 1024 * 1024);
-  ASSERT_MPI(
+  YGM_ASSERT_RELEASE(packed_size < 1024 * 1024 * 1024);
+  YGM_ASSERT_MPI(
       MPI_Bcast(&packed_size, 1, detail::mpi_typeof(packed_size), root, comm));
   if (rank() != root) {
     packed.resize(packed_size);
   }
-  ASSERT_MPI(MPI_Bcast(packed.data(), packed_size, MPI_BYTE, root, comm));
+  YGM_ASSERT_MPI(MPI_Bcast(packed.data(), packed_size, MPI_BYTE, root, comm));
 
   cereal::YGMInputArchive iarchive(packed.data(), packed.size());
   T                       to_return;
@@ -462,12 +476,12 @@ inline std::pair<uint64_t, uint64_t> comm::barrier_reduce_counts() {
   uint64_t local_counts[2]  = {m_recv_count, m_send_count};
   uint64_t global_counts[2] = {0, 0};
 
-  ASSERT_RELEASE(m_pending_isend_bytes == 0);
-  ASSERT_RELEASE(m_send_buffer_bytes == 0);
+  YGM_ASSERT_RELEASE(m_pending_isend_bytes == 0);
+  YGM_ASSERT_RELEASE(m_send_buffer_bytes == 0);
 
   MPI_Request req = MPI_REQUEST_NULL;
-  ASSERT_MPI(MPI_Iallreduce(local_counts, global_counts, 2, MPI_UINT64_T,
-                            MPI_SUM, m_comm_barrier, &req));
+  YGM_ASSERT_MPI(MPI_Iallreduce(local_counts, global_counts, 2, MPI_UINT64_T,
+                                MPI_SUM, m_comm_barrier, &req));
   stats.iallreduce();
   bool iallreduce_complete(false);
   while (!iallreduce_complete) {
@@ -482,7 +496,7 @@ inline std::pair<uint64_t, uint64_t> comm::barrier_reduce_counts() {
     {
       auto timer = stats.waitsome_iallreduce();
       while (outcount == 0) {
-        ASSERT_MPI(
+        YGM_ASSERT_MPI(
             MPI_Testsome(2, twin_req, &outcount, twin_indices, twin_status));
       }
     }
@@ -496,7 +510,7 @@ inline std::pair<uint64_t, uint64_t> comm::barrier_reduce_counts() {
         mpi_irecv_request req_buffer = m_recv_queue.front();
         m_recv_queue.pop_front();
         int buffer_size{0};
-        ASSERT_MPI(MPI_Get_count(&twin_status[i], MPI_BYTE, &buffer_size));
+        YGM_ASSERT_MPI(MPI_Get_count(&twin_status[i], MPI_BYTE, &buffer_size));
         stats.irecv(twin_status[i].MPI_SOURCE, buffer_size);
         handle_next_receive(req_buffer.buffer, buffer_size);
         flush_all_local_and_process_incoming();
@@ -523,13 +537,13 @@ inline void comm::flush_send_buffer(int dest) {
     }
     request.buffer->swap(m_vec_send_buffers[dest]);
     if (config.freq_issend > 0 && counter++ % config.freq_issend == 0) {
-      ASSERT_MPI(MPI_Issend(request.buffer->data(), request.buffer->size(),
-                            MPI_BYTE, dest, 0, m_comm_async,
-                            &(request.request)));
+      YGM_ASSERT_MPI(MPI_Issend(request.buffer->data(), request.buffer->size(),
+                                MPI_BYTE, dest, 0, m_comm_async,
+                                &(request.request)));
     } else {
-      ASSERT_MPI(MPI_Isend(request.buffer->data(), request.buffer->size(),
-                           MPI_BYTE, dest, 0, m_comm_async,
-                           &(request.request)));
+      YGM_ASSERT_MPI(MPI_Isend(request.buffer->data(), request.buffer->size(),
+                               MPI_BYTE, dest, 0, m_comm_async,
+                               &(request.request)));
     }
     stats.isend(dest, request.buffer->size());
     m_pending_isend_bytes += request.buffer->size();
@@ -618,7 +632,7 @@ inline void comm::flush_all_local_and_process_incoming() {
  */
 inline void comm::flush_to_capacity() {
   while (m_send_buffer_bytes > config.buffer_size) {
-    ASSERT_DEBUG(!m_send_dest_queue.empty());
+    YGM_ASSERT_DEBUG(!m_send_dest_queue.empty());
     int dest = m_send_dest_queue.front();
     m_send_dest_queue.pop_front();
     flush_send_buffer(dest);
@@ -908,7 +922,7 @@ inline void comm::handle_next_receive(std::shared_ptr<ygm::detail::byte_vector>
  * @return True if receive queue was non-empty, else false
  */
 inline bool comm::process_receive_queue() {
-  ASSERT_RELEASE(!m_in_process_receive_queue);
+  YGM_ASSERT_RELEASE(!m_in_process_receive_queue);
   m_in_process_receive_queue = true;
   bool received_to_return    = false;
 
@@ -930,7 +944,7 @@ inline bool comm::process_receive_queue() {
     {
       auto timer = stats.waitsome_isend_irecv();
       while (outcount == 0) {
-        ASSERT_MPI(
+        YGM_ASSERT_MPI(
             MPI_Testsome(2, twin_req, &outcount, twin_indices, twin_status));
       }
     }
@@ -945,7 +959,7 @@ inline bool comm::process_receive_queue() {
         mpi_irecv_request req_buffer = m_recv_queue.front();
         m_recv_queue.pop_front();
         int buffer_size{0};
-        ASSERT_MPI(MPI_Get_count(&twin_status[i], MPI_BYTE, &buffer_size));
+        YGM_ASSERT_MPI(MPI_Get_count(&twin_status[i], MPI_BYTE, &buffer_size));
         stats.irecv(twin_status[i].MPI_SOURCE, buffer_size);
         handle_next_receive(req_buffer.buffer, buffer_size);
       }
@@ -953,7 +967,7 @@ inline bool comm::process_receive_queue() {
   } else {
     if (!m_send_queue.empty()) {
       int flag(0);
-      ASSERT_MPI(
+      YGM_ASSERT_MPI(
           MPI_Test(&(m_send_queue.front().request), &flag, MPI_STATUS_IGNORE));
       stats.isend_test();
       if (flag) {
@@ -965,7 +979,7 @@ inline bool comm::process_receive_queue() {
     }
   }
 
-  received_to_return != local_process_incoming();
+  received_to_return |= local_process_incoming();
 
   m_in_process_receive_queue = false;
   return received_to_return;
@@ -977,14 +991,14 @@ inline bool comm::local_process_incoming() {
   while (true) {
     int        flag(0);
     MPI_Status status;
-    ASSERT_MPI(MPI_Test(&(m_recv_queue.front().request), &flag, &status));
+    YGM_ASSERT_MPI(MPI_Test(&(m_recv_queue.front().request), &flag, &status));
     stats.irecv_test();
     if (flag) {
       received_to_return           = true;
       mpi_irecv_request req_buffer = m_recv_queue.front();
       m_recv_queue.pop_front();
       int buffer_size{0};
-      ASSERT_MPI(MPI_Get_count(&status, MPI_BYTE, &buffer_size));
+      YGM_ASSERT_MPI(MPI_Get_count(&status, MPI_BYTE, &buffer_size));
       stats.irecv(status.MPI_SOURCE, buffer_size);
       handle_next_receive(req_buffer.buffer, buffer_size);
     } else {
diff --git a/include/ygm/detail/lambda_compliance.hpp b/include/ygm/detail/lambda_compliance.hpp
new file mode 100644
index 00000000..df130769
--- /dev/null
+++ b/include/ygm/detail/lambda_compliance.hpp
@@ -0,0 +1,16 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <tuple>
+#include <utility>
+
+#define YGM_CHECK_ASYNC_LAMBDA_COMPLIANCE(func, location) \
+  static_assert(                                          \
+      std::is_trivially_copyable<func>::value &&          \
+          std::is_standard_layout<func>::value,           \
+      location                                            \
+      " function object must be is_trivially_copyable & is_standard_layout.")
diff --git a/include/ygm/detail/lambda_map.hpp b/include/ygm/detail/lambda_map.hpp
index 9b9bd978..b5315ead 100644
--- a/include/ygm/detail/lambda_map.hpp
+++ b/include/ygm/detail/lambda_map.hpp
@@ -34,7 +34,7 @@ class lambda_map {
  private:
   template <typename LambdaType>
   static FuncId record() {
-    ASSERT_RELEASE(s_map.size() < std::numeric_limits<FuncId>::max());
+    YGM_ASSERT_RELEASE(s_map.size() < std::numeric_limits<FuncId>::max());
     FuncId      to_return = s_map.size();
     LambdaType *lp;  // scary, but by definition can't capture
     s_map.push_back(*lp);
diff --git a/include/ygm/detail/layout.hpp b/include/ygm/detail/layout.hpp
index 20ffcda4..c5037991 100644
--- a/include/ygm/detail/layout.hpp
+++ b/include/ygm/detail/layout.hpp
@@ -32,31 +32,31 @@ class layout {
  public:
   layout(MPI_Comm comm) {
     // global ranks
-    ASSERT_MPI(MPI_Comm_size(comm, &m_comm_size));
-    ASSERT_MPI(MPI_Comm_rank(comm, &m_comm_rank));
+    YGM_ASSERT_MPI(MPI_Comm_size(comm, &m_comm_size));
+    YGM_ASSERT_MPI(MPI_Comm_rank(comm, &m_comm_rank));
 
     // local ranks
     MPI_Comm comm_local;
-    ASSERT_MPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, m_comm_rank,
+    YGM_ASSERT_MPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, m_comm_rank,
                                    MPI_INFO_NULL, &comm_local));
-    ASSERT_MPI(MPI_Comm_size(comm_local, &m_local_size));
-    ASSERT_MPI(MPI_Comm_rank(comm_local, &m_local_id));
+    YGM_ASSERT_MPI(MPI_Comm_size(comm_local, &m_local_size));
+    YGM_ASSERT_MPI(MPI_Comm_rank(comm_local, &m_local_id));
 
     _mpi_allgather(m_comm_rank, m_local_ranks, m_local_size, comm_local);
 
     // node ranks
     MPI_Comm comm_node;
-    ASSERT_MPI(MPI_Comm_split(comm, m_local_id, m_comm_rank, &comm_node));
-    ASSERT_MPI(MPI_Comm_size(comm_node, &m_node_size));
-    ASSERT_MPI(MPI_Comm_rank(comm_node, &m_node_id));
+    YGM_ASSERT_MPI(MPI_Comm_split(comm, m_local_id, m_comm_rank, &comm_node));
+    YGM_ASSERT_MPI(MPI_Comm_size(comm_node, &m_node_size));
+    YGM_ASSERT_MPI(MPI_Comm_rank(comm_node, &m_node_id));
 
     _mpi_allgather(m_comm_rank, m_strided_ranks, m_node_size, comm_node);
 
     _mpi_allgather(m_local_id, m_rank_to_local, m_comm_size, comm);
     _mpi_allgather(m_node_id, m_rank_to_node, m_comm_size, comm);
 
-    ASSERT_RELEASE(MPI_Comm_free(&comm_local) == MPI_SUCCESS);
-    ASSERT_RELEASE(MPI_Comm_free(&comm_node) == MPI_SUCCESS);
+    YGM_ASSERT_RELEASE(MPI_Comm_free(&comm_local) == MPI_SUCCESS);
+    YGM_ASSERT_RELEASE(MPI_Comm_free(&comm_node) == MPI_SUCCESS);
   }
 
   layout(const layout &rhs)
@@ -158,7 +158,7 @@ class layout {
   template <typename T>
   void _mpi_allgather(T &_t, std::vector<T> &out_vec, int size, MPI_Comm comm) {
     out_vec.resize(size);
-    ASSERT_MPI(MPI_Allgather(&_t, sizeof(_t), MPI_BYTE, &(out_vec[0]),
+    YGM_ASSERT_MPI(MPI_Allgather(&_t, sizeof(_t), MPI_BYTE, &(out_vec[0]),
                              sizeof(_t), MPI_BYTE, comm));
   }
 
diff --git a/include/ygm/detail/mpi.hpp b/include/ygm/detail/mpi.hpp
index e026aad7..b0c5bb9f 100644
--- a/include/ygm/detail/mpi.hpp
+++ b/include/ygm/detail/mpi.hpp
@@ -13,10 +13,10 @@ namespace ygm::detail {
 class mpi_init_finalize {
  public:
   mpi_init_finalize(int *argc, char ***argv) {
-    ASSERT_MPI(MPI_Init(argc, argv));
+    YGM_ASSERT_MPI(MPI_Init(argc, argv));
   }
   ~mpi_init_finalize() {
-    ASSERT_RELEASE(MPI_Barrier(MPI_COMM_WORLD) == MPI_SUCCESS);
+    YGM_ASSERT_RELEASE(MPI_Barrier(MPI_COMM_WORLD) == MPI_SUCCESS);
     if (MPI_Finalize() != MPI_SUCCESS) {
       std::cerr << "ERROR:  MPI_Finilize() != MPI_SUCCESS" << std::endl;
       exit(-1);
diff --git a/include/ygm/detail/ygm_cereal_archive.hpp b/include/ygm/detail/ygm_cereal_archive.hpp
index 58fa4889..6575e01b 100644
--- a/include/ygm/detail/ygm_cereal_archive.hpp
+++ b/include/ygm/detail/ygm_cereal_archive.hpp
@@ -87,7 +87,7 @@ class YGMInputArchive
 
   //! Reads size bytes of data from the input stream
   void loadBinary(void *const data, std::streamsize size) {
-    ASSERT_DEBUG(m_position + size <= m_capacity);
+    YGM_ASSERT_DEBUG(m_position + size <= m_capacity);
     std::memcpy(data, m_pdata + m_position, size);
     m_position += size;
 
@@ -98,7 +98,7 @@ class YGMInputArchive
   }
 
   bool empty() const {
-    ASSERT_DEBUG(!(m_position > m_capacity));
+    YGM_ASSERT_DEBUG(!(m_position > m_capacity));
     return m_position == m_capacity;
   }
 
diff --git a/include/ygm/detail/ygm_ptr.hpp b/include/ygm/detail/ygm_ptr.hpp
index 79071837..c9e946a4 100644
--- a/include/ygm/detail/ygm_ptr.hpp
+++ b/include/ygm/detail/ygm_ptr.hpp
@@ -18,10 +18,9 @@ class ygm_ptr {
   ygm_ptr(){};
 
   T       *operator->() { return sptrs[idx]; }
-  const T *operator->() const { return sptrs[idx]; }
+  T *const operator->() const { return sptrs[idx]; }
 
-  T       &operator*() { return *sptrs[idx]; }
-  const T &operator*() const { return *sptrs[idx]; }
+  T &operator*() const { return *sptrs[idx]; }
 
   /**
    * @brief Construct a new ygm ptr object
@@ -45,7 +44,7 @@ class ygm_ptr {
 
   template <typename Comm>
   void check(Comm &c) const {
-    ASSERT_RELEASE(idx == c.all_reduce_min(idx));
+    YGM_ASSERT_RELEASE(idx == c.all_reduce_min(idx));
   }
 
   template <class Archive>
diff --git a/include/ygm/detail/ygm_traits.hpp b/include/ygm/detail/ygm_traits.hpp
index c1ac6c12..e7060a5a 100644
--- a/include/ygm/detail/ygm_traits.hpp
+++ b/include/ygm/detail/ygm_traits.hpp
@@ -4,6 +4,7 @@
 // SPDX-License-Identifier: MIT
 
 #pragma once
+#include <algorithm>
 #include <type_traits>
 
 namespace ygm::detail {
diff --git a/include/ygm/for_all_adapter.hpp b/include/ygm/for_all_adapter.hpp
deleted file mode 100644
index 0b87c063..00000000
--- a/include/ygm/for_all_adapter.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
-// Project Developers. See the top-level COPYRIGHT file for details.
-//
-// SPDX-License-Identifier: MIT
-
-#pragma once
-
-namespace ygm {
-
-/**
- * @brief Consuming for_all adapter.
- *
- * @tparam Container
- */
-template <typename Container>
-class for_all_consume_adapter {
- public:
-  for_all_consume_adapter(Container& c) : m_rc(c) {}
-
-  template <typename Function>
-  void for_all(Function fn) {
-    m_rc.consume_all(fn);
-  }
-
- private:
-  Container& m_rc;
-};
-
-/**
- * @brief Adapter that iteratively calls consume_all until container is globally
- * empty.
- *
- * @tparam Container
- */
-template <typename Container>
-class consume_all_iterative_adapter {
- public:
-  consume_all_iterative_adapter(Container& c) : m_rc(c) {}
-
-  template <typename Function>
-  void consume_all(Function fn) {
-    while (not m_rc.empty()) {
-      m_rc.consume_all(fn);
-    }
-  }
-
- private:
-  Container& m_rc;
-};
-}  // namespace ygm
\ No newline at end of file
diff --git a/include/ygm/io/csv_parser.hpp b/include/ygm/io/csv_parser.hpp
index 8b003dd3..c8d47fbc 100644
--- a/include/ygm/io/csv_parser.hpp
+++ b/include/ygm/io/csv_parser.hpp
@@ -6,20 +6,23 @@
 #pragma once
 
 #include <fstream>
+#include <map>
 #include <string>
 #include <vector>
+#include <ygm/container/detail/base_iteration.hpp>
 #include <ygm/io/detail/csv.hpp>
 #include <ygm/io/line_parser.hpp>
-#include <ygm/container/detail/base_iteration.hpp>
 
 namespace ygm::io {
 
-class csv_parser : public ygm::container::detail::base_iteration<csv_parser, std::tuple<std::vector<detail::csv_field>>> {
+class csv_parser : public ygm::container::detail::base_iteration_value<
+                       csv_parser, std::tuple<std::vector<detail::csv_field>>> {
  public:
   using for_all_args = std::tuple<std::vector<detail::csv_field>>;
 
   template <typename... Args>
-  csv_parser(Args&&... args) : m_lp(std::forward<Args>(args)...) {}
+  csv_parser(Args&&... args)
+      : m_lp(std::forward<Args>(args)...), m_has_headers(false) {}
 
   /**
    * @brief Executes a user function for every CSV record in a set of files.
@@ -30,17 +33,49 @@ class csv_parser : public ygm::container::detail::base_iteration<csv_parser, std
   template <typename Function>
   void for_all(Function fn) {
     using namespace ygm::io::detail;
-    m_lp.for_all([fn](const std::string& line) {
-      auto vfields = parse_csv_line(line);
+
+    std::map<std::string, int>* header_map_ptr;
+    bool                        skip_first;
+    auto handle_line_lambda = [fn, this](const std::string& line) {
+      auto vfields = parse_csv_line(line, m_header_map);
       // auto stypes    = convert_type_string(vfields);
       // todo, detect if types are inconsistent between records
       if (vfields.size() > 0) {
         fn(vfields);
       }
-    });
+    };
+
+    m_lp.for_all(handle_line_lambda);
+  }
+
+  /**
+   * @brief Read the header of a CSV file
+   */
+  void read_headers() {
+    using namespace ygm::io::detail;
+    auto header_line = m_lp.read_first_line();
+    m_lp.set_skip_first_line(true);
+    m_header_map  = parse_csv_headers(header_line);
+    m_has_headers = true;
   }
 
+  /**
+   * @brief Checks for existence of a column label within headers
+   *
+   * @param label Header label to search for within headers
+   */
+  bool has_header(const std::string& label) {
+    return m_has_headers && (m_header_map.find(label) != m_header_map.end());
+  }
+
+  ygm::comm& comm() { return m_lp.comm(); }
+
+  const ygm::comm& comm() const { return m_lp.comm(); }
+
  private:
   line_parser m_lp;
-};
-}  // namespace ygm::io
\ No newline at end of file
+
+  std::map<std::string, int> m_header_map;
+  bool                       m_has_headers;
+};  // namespace ygm::io
+}  // namespace ygm::io
diff --git a/include/ygm/io/detail/csv.hpp b/include/ygm/io/detail/csv.hpp
index 2e54bbac..3b3fe4fc 100644
--- a/include/ygm/io/detail/csv.hpp
+++ b/include/ygm/io/detail/csv.hpp
@@ -6,6 +6,7 @@
 #pragma once
 
 #include <iomanip>
+#include <map>
 #include <string>
 #include <variant>
 #include <vector>
@@ -68,12 +69,57 @@ class csv_field {
   std::string          m_f;
 };
 
+class csv_line {
+ public:
+  using vector_type            = std::vector<csv_field>;
+  using size_type              = vector_type::size_type;
+  using reference              = vector_type::reference;
+  using const_reference        = vector_type::const_reference;
+  using iterator               = vector_type::iterator;
+  using const_iterator         = vector_type::const_iterator;
+  using reverse_iterator       = vector_type::reverse_iterator;
+  using const_reverse_iterator = vector_type::const_reverse_iterator;
+
+  csv_line(const std::map<std::string, int> &header_map)
+      : m_header_map_ref(header_map){};
+
+  void push_back(const csv_field &f) { m_csv_fields.push_back(f); }
+
+  size_type size() const { return m_csv_fields.size(); }
+
+  reference operator[](size_type n) { return m_csv_fields[n]; }
+
+  const_reference operator[](size_type n) const { return m_csv_fields[n]; }
+
+  const_reference operator[](const std::string &key) const {
+    return m_csv_fields[m_header_map_ref.at(key)];
+  }
+
+  iterator               begin() { return m_csv_fields.begin(); }
+  iterator               end() { return m_csv_fields.end(); }
+  const_iterator         begin() const { return m_csv_fields.begin(); }
+  const_iterator         end() const { return m_csv_fields.end(); }
+  reverse_iterator       rbegin() { return m_csv_fields.rbegin(); }
+  reverse_iterator       rend() { return m_csv_fields.rend(); }
+  const_reverse_iterator rbegin() const { return m_csv_fields.rbegin(); }
+  const_reverse_iterator rend() const { return m_csv_fields.rend(); }
+  const_iterator         cbegin() const { return m_csv_fields.cbegin(); }
+  const_iterator         cend() const { return m_csv_fields.cend(); }
+  const_reverse_iterator crbegin() const { return m_csv_fields.crbegin(); }
+  const_reverse_iterator crend() const { return m_csv_fields.crend(); }
+
+ private:
+  std::vector<csv_field>            m_csv_fields;
+  const std::map<std::string, int> &m_header_map_ref;
+};
+
 std::ostream &operator<<(std::ostream &os, const csv_field &f) {
   return os << f.as_string();
 }
 
-std::vector<csv_field> parse_csv_line(const std::string line) {
-  std::vector<csv_field> line_fields;
+csv_line parse_csv_line(const std::string                 line,
+                        const std::map<std::string, int> &header_map_ref) {
+  csv_line line_fields(header_map_ref);
   if (line.empty() || line[0] == '#') {
     return line_fields;
   }
@@ -96,6 +142,30 @@ std::vector<csv_field> parse_csv_line(const std::string line) {
   return line_fields;
 }
 
+std::map<std::string, int> parse_csv_headers(const std::string header_line) {
+  std::map<std::string, int> header_map;
+
+  std::stringstream ssline(header_line);
+  int               column_num{0};
+  while (ssline >> std::ws) {
+    std::string header_field;
+    if (ssline.peek() == '"') {
+      ssline >> std::quoted(header_field);
+      if (ssline) {
+        header_map[header_field] = column_num++;
+      }
+      ssline.ignore(256, ',');
+    } else {
+      std::getline(ssline, header_field, ',');
+      if (ssline) {
+        header_map[header_field] = column_num++;
+      }
+    }
+  }
+
+  return header_map;
+}
+
 std::string convert_type_string(const std::vector<csv_field> &line_fields) {
   std::stringstream ss;
   for (const auto &f : line_fields) {
diff --git a/include/ygm/io/line_parser.hpp b/include/ygm/io/line_parser.hpp
index d25178e4..fb25a7a2 100644
--- a/include/ygm/io/line_parser.hpp
+++ b/include/ygm/io/line_parser.hpp
@@ -19,9 +19,16 @@ namespace fs = std::filesystem;
  * @brief Distributed text file parsing.
  *
  */
-class line_parser: public ygm::container::detail::base_iteration<line_parser, std::tuple<std::string>> {
+class line_parser : public ygm::container::detail::base_iteration_value<
+                        line_parser, std::tuple<std::string>> {
  public:
   using for_all_args = std::tuple<std::string>;
+
+ private:
+  // enum for tracking storage accessiblity
+  enum class accessibility_tag { distributed, local };
+
+ public:
   /**
    * @brief Construct a new line parser object
    *
@@ -32,15 +39,16 @@ class line_parser: public ygm::container::detail::base_iteration<line_parser, st
    */
   line_parser(ygm::comm& comm, const std::vector<std::string>& stringpaths,
               bool node_local_filesystem = false, bool recursive = false)
-      : m_comm(comm), m_node_local_filesystem(node_local_filesystem) {
-    if (node_local_filesystem) {
-      ASSERT_RELEASE(false);
-      check_paths(stringpaths, recursive);
-    } else {
-      if (m_comm.rank0()) {
-        check_paths(stringpaths, recursive);
-      }
-    }
+      : m_comm(comm), m_skip_first_line(false) {
+    check_paths(stringpaths, recursive);
+    // if (node_local_filesystem) {
+    // YGM_ASSERT_RELEASE(false);
+    // check_paths(stringpaths, recursive);
+    //} else {
+    // if (m_comm.rank0()) {
+    // check_paths(stringpaths, recursive);
+    //}
+    //}
   }
 
   /**
@@ -51,95 +59,172 @@ class line_parser: public ygm::container::detail::base_iteration<line_parser, st
    */
   template <typename Function>
   void for_all(Function fn) {
-    if (m_node_local_filesystem) {
-      ASSERT_RELEASE(false);
-      if (m_paths.empty()) return;
-    } else {
-      static std::vector<std::tuple<fs::path, size_t, size_t>> my_file_paths;
-
-      //
-      //  Splits files over ranks by file size.   8MB is smallest granularity.
-      //  This approach could be improved by having rank_layout information.
-      m_comm.barrier();
-      if (m_comm.rank0()) {
-        std::vector<std::tuple<fs::path, size_t, size_t>> remaining_files(
-            m_paths.size());
-        size_t total_size{0};
-        for (size_t i = 0; i < m_paths.size(); ++i) {
-          size_t fsize = fs::file_size(m_paths[i]);
+    static std::vector<std::tuple<fs::path, size_t, size_t>> my_file_paths;
+
+    //
+    //  Splits files over ranks by file size.   8MB is smallest granularity.
+    //  This approach could be improved by having rank_layout information.
+    // Starts with distributed files from rank 0
+    m_comm.barrier();
+    if (m_comm.rank0()) {
+      std::vector<std::tuple<fs::path, size_t, size_t>> remaining_files;
+      size_t                                            total_size{0};
+      for (size_t i = 0; i < m_paths.size(); ++i) {
+        if (m_paths[i].second == accessibility_tag::distributed) {
+          size_t fsize = fs::file_size(m_paths[i].first);
           total_size += fsize;
-          remaining_files[i] = std::make_tuple(m_paths[i], size_t(0), fsize);
+          remaining_files.push_back(
+              std::make_tuple(m_paths[i].first, size_t(0), fsize));
         }
+      }
 
-        if (total_size > 0) {
-          size_t bytes_per_rank = std::max((total_size / m_comm.size()) + 1,
-                                           size_t(8 * 1024 * 1024));
-          for (int rank = 0; rank < m_comm.size(); ++rank) {
-            size_t remaining_budget = bytes_per_rank;
-            while (remaining_budget > 0 && !remaining_files.empty()) {
-              size_t file_remaining = std::get<2>(remaining_files.back()) -
-                                      std::get<1>(remaining_files.back());
-              size_t& cur_position = std::get<1>(remaining_files.back());
-              if (file_remaining > remaining_budget) {
-                m_comm.async(
-                    rank,
-                    [](const std::string& fname, size_t bytes_begin,
-                       size_t bytes_end) {
-                      my_file_paths.push_back(
-                          {fs::path(fname), bytes_begin, bytes_end});
-                    },
-                    (std::string)std::get<0>(remaining_files.back()),
-                    cur_position, cur_position + remaining_budget);
-                cur_position += remaining_budget;
-                remaining_budget = 0;
-              } else if (file_remaining <= remaining_budget) {
-                m_comm.async(
-                    rank,
-                    [](const std::string& fname, size_t bytes_begin,
-                       size_t bytes_end) {
-                      my_file_paths.push_back(
-                          {fs::path(fname), bytes_begin, bytes_end});
-                    },
-                    (std::string)std::get<0>(remaining_files.back()),
-                    cur_position, std::get<2>(remaining_files.back()));
-                remaining_budget -= file_remaining;
-                remaining_files.pop_back();
-              }
+      if (total_size > 0) {
+        size_t bytes_per_rank =
+            std::max((total_size / m_comm.size()) + 1, size_t(8 * 1024 * 1024));
+        for (int rank = 0; rank < m_comm.size(); ++rank) {
+          size_t remaining_budget = bytes_per_rank;
+          while (remaining_budget > 0 && !remaining_files.empty()) {
+            size_t file_remaining = std::get<2>(remaining_files.back()) -
+                                    std::get<1>(remaining_files.back());
+            size_t& cur_position = std::get<1>(remaining_files.back());
+            if (file_remaining > remaining_budget) {
+              m_comm.async(
+                  rank,
+                  [](const std::string& fname, size_t bytes_begin,
+                     size_t bytes_end) {
+                    my_file_paths.push_back(
+                        {fs::path(fname), bytes_begin, bytes_end});
+                  },
+                  (std::string)std::get<0>(remaining_files.back()),
+                  cur_position, cur_position + remaining_budget);
+              cur_position += remaining_budget;
+              remaining_budget = 0;
+            } else if (file_remaining <= remaining_budget) {
+              m_comm.async(
+                  rank,
+                  [](const std::string& fname, size_t bytes_begin,
+                     size_t bytes_end) {
+                    my_file_paths.push_back(
+                        {fs::path(fname), bytes_begin, bytes_end});
+                  },
+                  (std::string)std::get<0>(remaining_files.back()),
+                  cur_position, std::get<2>(remaining_files.back()));
+              remaining_budget -= file_remaining;
+              remaining_files.pop_back();
             }
           }
         }
       }
-      m_comm.barrier();
-
-      //
-      // Each rank process locally assigned files.
-      for (const auto& fname : my_file_paths) {
-        // m_comm.cout("Opening: ", std::get<0>(fname), " ", std::get<1>(fname),
-        //             " ", std::get<2>(fname));
-        std::ifstream ifs(std::get<0>(fname));
-        // Note: Current process is responsible for reading up to *AND
-        // INCLUDING* bytes_end
-        size_t bytes_begin = std::get<1>(fname);
-        size_t bytes_end   = std::get<2>(fname);
-        ASSERT_RELEASE(ifs.good());
-        ifs.imbue(std::locale::classic());
-        std::string line;
-        // Throw away line containing bytes_begin as it was read by the previous
-        // process (unless it corresponds to the beginning of a file)
-        if (bytes_begin > 0) {
-          ifs.seekg(bytes_begin);
-          std::getline(ifs, line);
+    }
+
+    // First rank on every node checks its local files
+    if (m_comm.layout().local_id() == 0) {
+      std::vector<std::tuple<fs::path, size_t, size_t>> remaining_files;
+      size_t                                            total_size{0};
+      for (size_t i = 0; i < m_paths.size(); ++i) {
+        if (m_paths[i].second == accessibility_tag::local) {
+          size_t fsize = fs::file_size(m_paths[i].first);
+          total_size += fsize;
+          remaining_files.push_back(
+              std::make_tuple(m_paths[i].first, size_t(0), fsize));
+        }
+      }
+
+      if (total_size > 0) {
+        size_t bytes_per_rank =
+            std::max((total_size / m_comm.layout().local_size()) + 1,
+                     size_t(8 * 1024 * 1024));
+        for (int rank : m_comm.layout().local_ranks()) {
+          size_t remaining_budget = bytes_per_rank;
+          while (remaining_budget > 0 && !remaining_files.empty()) {
+            size_t file_remaining = std::get<2>(remaining_files.back()) -
+                                    std::get<1>(remaining_files.back());
+            size_t& cur_position = std::get<1>(remaining_files.back());
+            if (file_remaining > remaining_budget) {
+              m_comm.async(
+                  rank,
+                  [](const std::string& fname, size_t bytes_begin,
+                     size_t bytes_end) {
+                    my_file_paths.push_back(
+                        {fs::path(fname), bytes_begin, bytes_end});
+                  },
+                  (std::string)std::get<0>(remaining_files.back()),
+                  cur_position, cur_position + remaining_budget);
+              cur_position += remaining_budget;
+              remaining_budget = 0;
+            } else if (file_remaining <= remaining_budget) {
+              m_comm.async(
+                  rank,
+                  [](const std::string& fname, size_t bytes_begin,
+                     size_t bytes_end) {
+                    my_file_paths.push_back(
+                        {fs::path(fname), bytes_begin, bytes_end});
+                  },
+                  (std::string)std::get<0>(remaining_files.back()),
+                  cur_position, std::get<2>(remaining_files.back()));
+              remaining_budget -= file_remaining;
+              remaining_files.pop_back();
+            }
+          }
         }
-        // Keep reading until line containing bytes_end is read
-        while (ifs.tellg() <= bytes_end && std::getline(ifs, line)) {
+      }
+    }
+    m_comm.barrier();
+
+    //
+    // Each rank process locally assigned files.
+    for (const auto& fname : my_file_paths) {
+      // m_comm.cout("Opening: ", std::get<0>(fname), " ", std::get<1>(fname),
+      //             " ", std::get<2>(fname));
+      std::ifstream ifs(std::get<0>(fname));
+      // Note: Current process is responsible for reading up to *AND
+      // INCLUDING* bytes_end
+      size_t bytes_begin = std::get<1>(fname);
+      size_t bytes_end   = std::get<2>(fname);
+      YGM_ASSERT_RELEASE(ifs.good());
+      ifs.imbue(std::locale::classic());
+      std::string line;
+      bool        first_line = false;
+      // Throw away line containing bytes_begin as it was read by the previous
+      // process (unless it corresponds to the beginning of a file)
+      if (bytes_begin > 0) {
+        ifs.seekg(bytes_begin);
+        std::getline(ifs, line);
+      } else {
+        first_line = true;
+      }
+      // Keep reading until line containing bytes_end is read
+      while (ifs.tellg() <= bytes_end && std::getline(ifs, line)) {
+        // Skip first line if necessary
+        if (not first_line || not m_skip_first_line) {
           fn(line);
-          // if(ifs.tellg() > bytes_end) break;
+        } else {
         }
+        // if(ifs.tellg() > bytes_end) break;
+        first_line = false;
       }
-      my_file_paths.clear();
     }
+    my_file_paths.clear();
+  }
+
+  std::string read_first_line() {
+    std::string line;
+    if (m_comm.rank0()) {
+      std::ifstream ifs(m_paths[0].first);
+      std::getline(ifs, line);
+    }
+
+    line = m_comm.mpi_bcast(line, 0, m_comm.get_mpi_comm());
+
+    return line;
   }
 
+  void set_skip_first_line(bool skip_first) { m_skip_first_line = skip_first; }
+
+  ygm::comm& comm() { return m_comm; }
+
+  const ygm::comm& comm() const { return m_comm; }
+
  private:
   /**
    * @brief Check readability of paths and iterates through directories
@@ -149,37 +234,82 @@ class line_parser: public ygm::container::detail::base_iteration<line_parser, st
    */
   void check_paths(const std::vector<std::string>& stringpaths,
                    bool                            recursive) {
-    //
-    //
     for (const std::string& strp : stringpaths) {
-      fs::path p(strp);
-      if (fs::exists(p)) {
-        if (fs::is_regular_file(p)) {
-          if (is_file_good(p)) {
-            m_paths.push_back(p);
-          }
-        } else if (fs::is_directory(p)) {
-          if (recursive) {
-            //
-            // If a directory & user requested recursive
-            const std::filesystem::recursive_directory_iterator end;
-            for (std::filesystem::recursive_directory_iterator itr{p};
-                 itr != end; itr++) {
-              if (fs::is_regular_file(itr->path())) {
-                if (is_file_good(itr->path())) {
-                  m_paths.push_back(itr->path());
+      if (strp.starts_with("local://")) {
+        if (m_comm.layout().local_id() == 0) {
+          fs::path p(strp.substr(8));  // Remove prefix
+          if (fs::exists(p)) {
+            if (fs::is_regular_file(p)) {
+              if (is_file_good(p)) {
+                m_paths.push_back(std::make_pair(p, accessibility_tag::local));
+              }
+            } else if (fs::is_directory(p)) {
+              if (recursive) {
+                //
+                // If a directory & user requested recursive
+                const std::filesystem::recursive_directory_iterator end;
+                for (std::filesystem::recursive_directory_iterator itr{p};
+                     itr != end; itr++) {
+                  if (fs::is_regular_file(itr->path())) {
+                    if (is_file_good(itr->path())) {
+                      m_paths.push_back(std::make_pair(
+                          itr->path(), accessibility_tag::local));
+                    }
+                  }
+                }
+              } else {
+                //
+                // If a directory & user did not request recursive
+                const std::filesystem::directory_iterator end;
+                for (std::filesystem::directory_iterator itr{p}; itr != end;
+                     itr++) {
+                  if (fs::is_regular_file(itr->path())) {
+                    if (is_file_good(itr->path())) {
+                      m_paths.push_back(std::make_pair(
+                          itr->path(), accessibility_tag::local));
+                    }
+                  }
                 }
               }
             }
-          } else {
-            //
-            // If a directory & user requested recursive
-            const std::filesystem::directory_iterator end;
-            for (std::filesystem::directory_iterator itr{p}; itr != end;
-                 itr++) {
-              if (fs::is_regular_file(itr->path())) {
-                if (is_file_good(itr->path())) {
-                  m_paths.push_back(itr->path());
+          }
+        }
+      } else {
+        // Assign distributed files to rank 0 for splitting
+        if (m_comm.rank0()) {
+          fs::path p(strp);
+          if (fs::exists(p)) {
+            if (fs::is_regular_file(p)) {
+              if (is_file_good(p)) {
+                m_paths.push_back(
+                    std::make_pair(p, accessibility_tag::distributed));
+              }
+            } else if (fs::is_directory(p)) {
+              if (recursive) {
+                //
+                // If a directory & user requested recursive
+                const std::filesystem::recursive_directory_iterator end;
+                for (std::filesystem::recursive_directory_iterator itr{p};
+                     itr != end; itr++) {
+                  if (fs::is_regular_file(itr->path())) {
+                    if (is_file_good(itr->path())) {
+                      m_paths.push_back(std::make_pair(
+                          itr->path(), accessibility_tag::distributed));
+                    }
+                  }
+                }
+              } else {
+                //
+                // If a directory & user did not request recursive
+                const std::filesystem::directory_iterator end;
+                for (std::filesystem::directory_iterator itr{p}; itr != end;
+                     itr++) {
+                  if (fs::is_regular_file(itr->path())) {
+                    if (is_file_good(itr->path())) {
+                      m_paths.push_back(std::make_pair(
+                          itr->path(), accessibility_tag::distributed));
+                    }
+                  }
                 }
               }
             }
@@ -209,9 +339,9 @@ class line_parser: public ygm::container::detail::base_iteration<line_parser, st
     }
     return good;
   }
-  ygm::comm&            m_comm;
-  std::vector<fs::path> m_paths;
-  bool                  m_node_local_filesystem;
+  ygm::comm&                                          m_comm;
+  std::vector<std::pair<fs::path, accessibility_tag>> m_paths;
+  bool                                                m_skip_first_line;
 };
 
 }  // namespace ygm::io
diff --git a/include/ygm/io/ndjson_parser.hpp b/include/ygm/io/ndjson_parser.hpp
index 0731478a..440cc597 100644
--- a/include/ygm/io/ndjson_parser.hpp
+++ b/include/ygm/io/ndjson_parser.hpp
@@ -10,9 +10,9 @@
 #endif
 
 #include <ygm/comm.hpp>
+#include <ygm/container/detail/base_iteration.hpp>
 #include <ygm/detail/cereal_boost_json.hpp>
 #include <ygm/io/line_parser.hpp>
-#include <ygm/container/detail/base_iteration.hpp>
 
 namespace ygm::io {
 std::size_t json_erase(boost::json::object            &obj,
@@ -37,7 +37,8 @@ std::size_t json_filter(boost::json::object            &obj,
   return json_erase(obj, keys_to_erase);
 }
 
-class ndjson_parser : public ygm::container::detail::base_iteration<ndjson_parser, std::tuple<boost::json::object>>{
+class ndjson_parser : public ygm::container::detail::base_iteration_value<
+                          ndjson_parser, std::tuple<boost::json::object>> {
  public:
   using for_all_args = std::tuple<boost::json::object>;
   template <typename... Args>
@@ -56,6 +57,10 @@ class ndjson_parser : public ygm::container::detail::base_iteration<ndjson_parse
     });
   }
 
+  ygm::comm &comm() { return m_lp.comm(); }
+
+  const ygm::comm &comm() const { return m_lp.comm(); }
+
  private:
   line_parser m_lp;
 };
diff --git a/include/ygm/version.hpp b/include/ygm/version.hpp
new file mode 100644
index 00000000..3e923149
--- /dev/null
+++ b/include/ygm/version.hpp
@@ -0,0 +1,10 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#include <string>
+
+namespace ygm {
+static const std::string ygm_version("v0.7-dev");
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a65d4594..39b4e490 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -36,6 +36,7 @@ add_ygm_seq_test(test_byte_vector)
 
 add_ygm_test(test_comm)
 add_ygm_test(test_comm_2)
+add_ygm_test(test_barrier)
 add_ygm_test(test_layout)
 add_ygm_test(test_large_messages)
 add_ygm_test(test_map)
@@ -50,6 +51,7 @@ add_ygm_test(test_disjoint_set)
 #add_ygm_test(test_container_serialization)
 add_ygm_test(test_line_parser)
 add_ygm_test(test_csv_parser)
+add_ygm_test(test_csv_headers)
 add_ygm_test(test_multi_output)
 add_ygm_test(test_daily_output)
 add_ygm_test(test_interrupt_mask)
@@ -57,11 +59,14 @@ add_ygm_test(test_interrupt_mask)
 add_ygm_test(test_random)
 #add_ygm_test(test_reduce_by_key)
 add_ygm_test(test_container_traits)
-#add_ygm_test(test_collective)
+add_ygm_test(test_collective)
 add_ygm_test(test_traits)
 add_ygm_test(test_concepts)
 add_ygm_test(test_recursion_large_messages)
 add_ygm_test(test_recursion_progress)
+add_ygm_test(test_gather_topk)
+add_ygm_test(test_reduce)
+add_ygm_test(test_transform)
 
 if (Boost_FOUND)
     add_ygm_seq_test(test_cereal_boost_json)
diff --git a/test/data/csv_headers.csv b/test/data/csv_headers.csv
new file mode 100644
index 00000000..129f06ad
--- /dev/null
+++ b/test/data/csv_headers.csv
@@ -0,0 +1,5 @@
+zero, four, two, six
+0, 4, 2, 6
+0, 4, 2, 6
+0, 4, 2, 6
+0, 4, 2, 6
diff --git a/test/test_array.cpp b/test/test_array.cpp
index 9dc5e093..d1757f17 100644
--- a/test/test_array.cpp
+++ b/test/test_array.cpp
@@ -44,7 +44,7 @@ int main(int argc, char **argv) {
     }
 
     arr.for_all([](const auto index, const auto value) {
-      ASSERT_RELEASE(index == value);
+      YGM_ASSERT_RELEASE(index == value);
     });
   }
 
@@ -67,7 +67,7 @@ int main(int argc, char **argv) {
     }
 
     arr.for_all([&world](const auto index, const auto value) {
-      ASSERT_RELEASE(value == index + 2 * world.size());
+      YGM_ASSERT_RELEASE(value == index + 2 * world.size());
     });
   }
 
@@ -105,7 +105,7 @@ int main(int argc, char **argv) {
           cumulative_xor = 0;
           break;
       }
-      ASSERT_RELEASE(value == index ^ cumulative_xor);
+      YGM_ASSERT_RELEASE(value == index ^ cumulative_xor);
     });
   }
 
@@ -128,7 +128,7 @@ int main(int argc, char **argv) {
     }
 
     arr.for_all([&world](const auto index, const auto value) {
-      ASSERT_RELEASE(value == index + world.size());
+      YGM_ASSERT_RELEASE(value == index + world.size());
     });
   }
 
@@ -148,7 +148,7 @@ int main(int argc, char **argv) {
 
     for (int i = 0; i < size; ++i) {
       arr.async_visit(i, [](const auto index, const auto value) {
-        ASSERT_RELEASE(value == index);
+        YGM_ASSERT_RELEASE(value == index);
       });
     }
   }
@@ -169,11 +169,67 @@ int main(int argc, char **argv) {
 
     for (int i = 0; i < size; ++i) {
       arr.async_visit(i, [](auto ptr, const auto index, const auto value) {
-        ASSERT_RELEASE(value == index);
+        YGM_ASSERT_RELEASE(value == index);
       });
     }
   }
 
+  // Test async_visit functor
+  {
+    struct visit_functor {
+      void operator()(const size_t index, const int value) {
+        YGM_ASSERT_RELEASE(value == index);
+      }
+    };
+
+    int size = 64;
+
+    ygm::container::array<int> arr(world, size);
+
+    if (world.rank0()) {
+      for (int i = 0; i < size; ++i) {
+        arr.async_set(i, i);
+      }
+    }
+
+    world.barrier();
+
+    for (int i = 0; i < size; ++i) {
+      arr.async_visit(i, visit_functor());
+    }
+  }
+
+  //
+  // Test async_reduce
+  {
+    ygm::container::array<int> arr(world, 3);
+
+    int num_reductions = 5;
+    for (int i = 0; i < num_reductions; ++i) {
+      arr.async_reduce(0, i, std::plus<int>());
+      arr.async_reduce(
+          1, i, [](const int &a, const int &b) { return std::min<int>(a, b); });
+      arr.async_reduce(
+          2, i, [](const int &a, const int &b) { return std::max<int>(a, b); });
+    }
+
+    world.barrier();
+
+    arr.for_all(
+        [&world, &num_reductions](const auto &index, const auto &value) {
+          if (index == 0) {
+            YGM_ASSERT_RELEASE(value == world.size() * num_reductions *
+                                            (num_reductions - 1) / 2);
+          } else if (index == 1) {
+            YGM_ASSERT_RELEASE(value == 0);
+          } else if (index == 2) {
+            YGM_ASSERT_RELEASE(value == num_reductions - 1);
+          } else {
+            YGM_ASSERT_RELEASE(false);
+          }
+        });
+  }
+
   // Test value-only for_all
   {
     int size = 64;
@@ -193,7 +249,7 @@ int main(int argc, char **argv) {
     }
 
     arr.for_all([&world](const auto value) {
-      ASSERT_RELEASE(value == world.size() + 1);
+      YGM_ASSERT_RELEASE(value == world.size() + 1);
     });
   }
 
@@ -209,7 +265,7 @@ int main(int argc, char **argv) {
     }
 
     arr.for_all([](const auto index, const auto value) {
-      ASSERT_RELEASE(index == value);
+      YGM_ASSERT_RELEASE(index == value);
     });
   }
 
@@ -233,7 +289,7 @@ int main(int argc, char **argv) {
       arr.async_visit(
           index,
           [](const auto &index, const auto &my_value, const auto &other_value) {
-            ASSERT_RELEASE(my_value == other_value);
+            YGM_ASSERT_RELEASE(my_value == other_value);
           },
           value);
     });
@@ -242,7 +298,7 @@ int main(int argc, char **argv) {
       arr_copy.async_visit(
           index,
           [](const auto &index, const auto &my_value, const auto &other_value) {
-            ASSERT_RELEASE(my_value == other_value);
+            YGM_ASSERT_RELEASE(my_value == other_value);
           },
           value);
     });
@@ -263,24 +319,24 @@ int main(int argc, char **argv) {
 
     world.barrier();
 
-    ASSERT_RELEASE(arr.size() == large_size);
+    YGM_ASSERT_RELEASE(arr.size() == large_size);
     arr.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 2 * index);
+      YGM_ASSERT_RELEASE(value == 2 * index);
     });
 
     arr.resize(small_size);
 
-    ASSERT_RELEASE(arr.size() == small_size);
+    YGM_ASSERT_RELEASE(arr.size() == small_size);
     arr.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 2 * index);
+      YGM_ASSERT_RELEASE(value == 2 * index);
     });
 
     arr.resize(large_size);
 
-    ASSERT_RELEASE(arr.size() == large_size);
+    YGM_ASSERT_RELEASE(arr.size() == large_size);
     arr.for_all([&small_size](const auto &index, const auto &value) {
       if (index < small_size) {
-        ASSERT_RELEASE(value == 2 * index);
+        YGM_ASSERT_RELEASE(value == 2 * index);
       }
     });
   }
@@ -299,11 +355,11 @@ int main(int argc, char **argv) {
 
     world.barrier();
 
-    ASSERT_RELEASE(arr.size() == initial_size);
+    YGM_ASSERT_RELEASE(arr.size() == initial_size);
 
     arr.clear();
 
-    ASSERT_RELEASE(arr.size() == 0);
+    YGM_ASSERT_RELEASE(arr.size() == 0);
   }
 
   // Test swap
@@ -325,28 +381,28 @@ int main(int argc, char **argv) {
 
     world.barrier();
 
-    ASSERT_RELEASE(arr1.size() == size1);
-    ASSERT_RELEASE(arr2.size() == size2);
+    YGM_ASSERT_RELEASE(arr1.size() == size1);
+    YGM_ASSERT_RELEASE(arr2.size() == size2);
 
     arr1.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 2 * index);
+      YGM_ASSERT_RELEASE(value == 2 * index);
     });
 
     arr2.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 3 * index + 1);
+      YGM_ASSERT_RELEASE(value == 3 * index + 1);
     });
 
     arr1.swap(arr2);
 
-    ASSERT_RELEASE(arr1.size() == size2);
-    ASSERT_RELEASE(arr2.size() == size1);
+    YGM_ASSERT_RELEASE(arr1.size() == size2);
+    YGM_ASSERT_RELEASE(arr2.size() == size1);
 
     arr1.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 3 * index + 1);
+      YGM_ASSERT_RELEASE(value == 3 * index + 1);
     });
 
     arr2.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 2 * index);
+      YGM_ASSERT_RELEASE(value == 2 * index);
     });
   }
 
@@ -369,9 +425,9 @@ int main(int argc, char **argv) {
 
     arr.for_all([&default_value](const auto &index, const auto &value) {
       if (index % 2 == 0) {
-        ASSERT_RELEASE(value == 2 * index);
+        YGM_ASSERT_RELEASE(value == 2 * index);
       } else {
-        ASSERT_RELEASE(value == default_value);
+        YGM_ASSERT_RELEASE(value == default_value);
       }
     });
   }
@@ -381,7 +437,7 @@ int main(int argc, char **argv) {
     ygm::container::array<int> arr(world, {1, 3, 5, 7, 9, 11});
 
     arr.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 2 * index + 1);
+      YGM_ASSERT_RELEASE(value == 2 * index + 1);
     });
   }
 
@@ -394,9 +450,9 @@ int main(int argc, char **argv) {
 
     arr.for_all([](const auto &index, const auto &value) {
       if (index % 2 == 1) {
-        ASSERT_RELEASE(value == 2 * index);
+        YGM_ASSERT_RELEASE(value == 2 * index);
       } else {
-        ASSERT_RELEASE(value == 0);
+        YGM_ASSERT_RELEASE(value == 0);
       }
     });
   }
@@ -416,7 +472,7 @@ int main(int argc, char **argv) {
     ygm::container::array<int> arr(world, b);
 
     arr.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == 1);
+      YGM_ASSERT_RELEASE(value == 1);
     });
   }
 
@@ -434,12 +490,12 @@ int main(int argc, char **argv) {
 
     ygm::container::array<int> arr(world, b);
 
-    ASSERT_RELEASE(arr.size() == 2 * bag_size - 1);
+    YGM_ASSERT_RELEASE(arr.size() == 2 * bag_size - 1);
     arr.for_all([](const auto &index, const auto &value) {
       if (index % 2 == 0) {
-        ASSERT_RELEASE(value == index / 2);
+        YGM_ASSERT_RELEASE(value == index / 2);
       } else {
-        ASSERT_RELEASE(value == 0);
+        YGM_ASSERT_RELEASE(value == 0);
       }
     });
   }
@@ -458,12 +514,12 @@ int main(int argc, char **argv) {
 
     ygm::container::array<int> arr(world, m);
 
-    ASSERT_RELEASE(arr.size() == 2 * bag_size - 1);
+    YGM_ASSERT_RELEASE(arr.size() == 2 * bag_size - 1);
     arr.for_all([](const auto &index, const auto &value) {
       if (index % 2 == 0) {
-        ASSERT_RELEASE(value == index / 2);
+        YGM_ASSERT_RELEASE(value == index / 2);
       } else {
-        ASSERT_RELEASE(value == 0);
+        YGM_ASSERT_RELEASE(value == 0);
       }
     });
   }
@@ -478,9 +534,9 @@ int main(int argc, char **argv) {
 
     ygm::container::array<int> arr(world, local_vec);
 
-    ASSERT_RELEASE(arr.size() == world.size() * (world.size() + 1) / 2);
+    YGM_ASSERT_RELEASE(arr.size() == world.size() * (world.size() + 1) / 2);
     arr.for_all([](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == index);
+      YGM_ASSERT_RELEASE(value == index);
     });
   }
 
@@ -496,9 +552,9 @@ int main(int argc, char **argv) {
 
     ygm::container::array<float> arr(world, local_vec);
 
-    ASSERT_RELEASE(arr.size() == world.size() * local_size);
+    YGM_ASSERT_RELEASE(arr.size() == world.size() * local_size);
     arr.for_all([&world](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == float(index % world.size()));
+      YGM_ASSERT_RELEASE(value == float(index % world.size()));
     });
   }
 
@@ -513,9 +569,9 @@ int main(int argc, char **argv) {
 
     ygm::container::array<float> arr(world, local_map);
 
-    ASSERT_RELEASE(arr.size() == world.size() * local_size);
+    YGM_ASSERT_RELEASE(arr.size() == world.size() * local_size);
     arr.for_all([&world](const auto &index, const auto &value) {
-      ASSERT_RELEASE(value == float(index % world.size()));
+      YGM_ASSERT_RELEASE(value == float(index % world.size()));
     });
   }
 
@@ -543,7 +599,7 @@ int main(int argc, char **argv) {
     arr.sort();
 
     arr.for_all([](const auto index, const auto &value) {
-      ASSERT_RELEASE(index == value);
+      YGM_ASSERT_RELEASE(index == value);
     });
   }
 
diff --git a/test/test_bag.cpp b/test/test_bag.cpp
index f1b13099..04362208 100644
--- a/test/test_bag.cpp
+++ b/test/test_bag.cpp
@@ -37,10 +37,10 @@ int main(int argc, char** argv) {
       bbag.async_insert("apple");
       bbag.async_insert("red");
     }
-    ASSERT_RELEASE(bbag.count("dog") == 1);
-    ASSERT_RELEASE(bbag.count("apple") == 1);
-    ASSERT_RELEASE(bbag.count("red") == 1);
-    ASSERT_RELEASE(bbag.size() == 3);
+    YGM_ASSERT_RELEASE(bbag.count("dog") == 1);
+    YGM_ASSERT_RELEASE(bbag.count("apple") == 1);
+    YGM_ASSERT_RELEASE(bbag.count("red") == 1);
+    YGM_ASSERT_RELEASE(bbag.size() == 3);
   }
 
   //
@@ -53,18 +53,18 @@ int main(int argc, char** argv) {
     //   bbag.async_insert("red");
     // }
     // world.barrier();
-    // ASSERT_RELEASE(bbag.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag.size() == 3);
     // ygm::container::bag<std::string> bbag2(bbag);
 
-    // ASSERT_RELEASE(bbag.size() == 3);
-    // ASSERT_RELEASE(bbag2.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag2.size() == 3);
 
     // if (world.rank0()) {
     //   bbag2.async_insert("car");
     // }
     // world.barrier();
-    // ASSERT_RELEASE(bbag.size() == 3);
-    // ASSERT_RELEASE(bbag2.size() == 4);
+    // YGM_ASSERT_RELEASE(bbag.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag2.size() == 4);
   }
 
   //
@@ -77,18 +77,18 @@ int main(int argc, char** argv) {
       bbag.async_insert("red");
     }
     world.barrier();
-    ASSERT_RELEASE(bbag.size() == 3);
+    YGM_ASSERT_RELEASE(bbag.size() == 3);
     ygm::container::bag<std::string> bbag2(std::move(bbag));
 
-    ASSERT_RELEASE(bbag.size() == 0);
-    ASSERT_RELEASE(bbag2.size() == 3);
+    YGM_ASSERT_RELEASE(bbag.size() == 0);
+    YGM_ASSERT_RELEASE(bbag2.size() == 3);
 
     if (world.rank0()) {
       bbag2.async_insert("car");
     }
     world.barrier();
-    ASSERT_RELEASE(bbag.size() == 0);
-    ASSERT_RELEASE(bbag2.size() == 4);
+    YGM_ASSERT_RELEASE(bbag.size() == 0);
+    YGM_ASSERT_RELEASE(bbag2.size() == 4);
   }
 
   // Testing = operator
@@ -100,23 +100,23 @@ int main(int argc, char** argv) {
       bbag.async_insert("red");
     }
     world.barrier();
-    ASSERT_RELEASE(bbag.size() == 3);
+    YGM_ASSERT_RELEASE(bbag.size() == 3);
 
     // ygm::container::bag<std::string> bbag2 = bbag;
 
-    // ASSERT_RELEASE(bbag.size() == 3);
-    // ASSERT_RELEASE(bbag2.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag2.size() == 3);
 
     // if (world.rank0()) {
     //   bbag2.async_insert("car");
     // }
     // world.barrier();
-    // ASSERT_RELEASE(bbag.size() == 3);
-    // ASSERT_RELEASE(bbag2.size() == 4);
+    // YGM_ASSERT_RELEASE(bbag.size() == 3);
+    // YGM_ASSERT_RELEASE(bbag2.size() == 4);
 
     ygm::container::bag<std::string> bbag3 = std::move(bbag);
-    ASSERT_RELEASE(bbag.size() == 0);
-    ASSERT_RELEASE(bbag3.size() == 3);  
+    YGM_ASSERT_RELEASE(bbag.size() == 0);
+    YGM_ASSERT_RELEASE(bbag3.size() == 3);  
   }
 
 
@@ -128,23 +128,23 @@ int main(int argc, char** argv) {
     bbag.async_insert("dog");
     bbag.async_insert("apple");
     bbag.async_insert("red");
-    ASSERT_RELEASE(bbag.size() == 3 * (size_t)world.size());
-    ASSERT_RELEASE(bbag.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(bbag.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(bbag.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(bbag.size() == 3 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(bbag.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(bbag.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(bbag.count("red") == (size_t)world.size());
 
     {
       std::vector<std::string> all_data;
       bbag.gather(all_data, 0);
       if (world.rank0()) {
-        ASSERT_RELEASE(all_data.size() == 3 * (size_t)world.size());
+        YGM_ASSERT_RELEASE(all_data.size() == 3 * (size_t)world.size());
       }
     }
     {
       std::set<std::string> all_data;
       bbag.gather(all_data, 0);
       if (world.rank0()) {
-        ASSERT_RELEASE(all_data.size() == 3);
+        YGM_ASSERT_RELEASE(all_data.size() == 3);
       }
     }
   }
@@ -156,7 +156,7 @@ int main(int argc, char** argv) {
     bbag.async_insert(1);
     bbag.async_insert(2);
     bbag.async_insert(3);
-    ASSERT_RELEASE(bbag.reduce(std::plus<int>()) == 6 * world.size());
+    YGM_ASSERT_RELEASE(bbag.reduce(std::plus<int>()) == 6 * world.size());
   }
 
   //
@@ -181,7 +181,7 @@ int main(int argc, char** argv) {
     bbag.local_shuffle();
     bbag.global_shuffle();
 
-    ASSERT_RELEASE(bbag.size() == num_of_items);
+    YGM_ASSERT_RELEASE(bbag.size() == num_of_items);
 
     std::vector<int> bag_content;
     bbag.gather(bag_content, 0);
@@ -189,7 +189,7 @@ int main(int argc, char** argv) {
       for (int i = 0; i < num_of_items; i++) {
         if (std::find(bag_content.begin(), bag_content.end(), i) ==
             bag_content.end()) {
-          ASSERT_RELEASE(false);
+          YGM_ASSERT_RELEASE(false);
         }
       }
     }
@@ -208,7 +208,7 @@ int main(int argc, char** argv) {
     bbag.for_all([&count](std::string& mstr) { ++count; });
     int global_count = world.all_reduce_sum(count);
     world.barrier();
-    ASSERT_RELEASE(global_count == 3);
+    YGM_ASSERT_RELEASE(global_count == 3);
   }
 
   //
@@ -225,7 +225,7 @@ int main(int argc, char** argv) {
         [&count](std::pair<std::string, int>& mstr) { count += mstr.second; });
     int global_count = world.all_reduce_sum(count);
     world.barrier();
-    ASSERT_RELEASE(global_count == 6);
+    YGM_ASSERT_RELEASE(global_count == 6);
   }
 
   // //
@@ -242,7 +242,7 @@ int main(int argc, char** argv) {
   //       [&count](std::string& first, int& second) { count += second; });
   //   int global_count = world.all_reduce_sum(count);
   //   world.barrier();
-  //   ASSERT_RELEASE(global_count == 6);
+  //   YGM_ASSERT_RELEASE(global_count == 6);
   // }
 
   //
@@ -252,7 +252,7 @@ int main(int argc, char** argv) {
     bbag.async_insert("begin", 0);
     bbag.async_insert("end", world.size() - 1);
     bbag.rebalance();
-    ASSERT_RELEASE(bbag.local_size() == 2);
+    YGM_ASSERT_RELEASE(bbag.local_size() == 2);
   }
 
   //
@@ -271,9 +271,9 @@ int main(int argc, char** argv) {
         bbag.size() / world.size() + (bbag.size() % world.size() > 0);
 
     if (world.rank() < remainder) {
-      ASSERT_RELEASE(bbag.local_size() == large_block_size);
+      YGM_ASSERT_RELEASE(bbag.local_size() == large_block_size);
     } else {
-      ASSERT_RELEASE(bbag.local_size() == small_block_size);
+      YGM_ASSERT_RELEASE(bbag.local_size() == small_block_size);
     }
   }
 
@@ -294,10 +294,10 @@ int main(int argc, char** argv) {
     std::set<int> value_set;
     bbag.gather(value_set, 0);
     if (world.rank0()) {
-      ASSERT_RELEASE(value_set.size() == 200);
-      ASSERT_RELEASE(*std::min_element(value_set.begin(), value_set.end()) ==
+      YGM_ASSERT_RELEASE(value_set.size() == 200);
+      YGM_ASSERT_RELEASE(*std::min_element(value_set.begin(), value_set.end()) ==
                      0);
-      ASSERT_RELEASE(*std::max_element(value_set.begin(), value_set.end()) ==
+      YGM_ASSERT_RELEASE(*std::max_element(value_set.begin(), value_set.end()) ==
                      199);
     }
   }
@@ -313,19 +313,19 @@ int main(int argc, char** argv) {
         bbag2.async_insert("apple");
         bbag2.async_insert("red");
       }
-      ASSERT_RELEASE(bbag2.size() == 3);
+      YGM_ASSERT_RELEASE(bbag2.size() == 3);
       bbag2.swap(bbag);
-      ASSERT_RELEASE(bbag2.size() == 0);
+      YGM_ASSERT_RELEASE(bbag2.size() == 0);
     }
-    ASSERT_RELEASE(bbag.size() == 3);
-    ASSERT_RELEASE(bbag.count("dog") == 1);
-    ASSERT_RELEASE(bbag.count("apple") == 1);
-    ASSERT_RELEASE(bbag.count("red") == 1);
+    YGM_ASSERT_RELEASE(bbag.size() == 3);
+    YGM_ASSERT_RELEASE(bbag.count("dog") == 1);
+    YGM_ASSERT_RELEASE(bbag.count("apple") == 1);
+    YGM_ASSERT_RELEASE(bbag.count("red") == 1);
     if (world.rank0()) {
       bbag.async_insert("car");
     }
-    ASSERT_RELEASE(bbag.size() == 4);
-    ASSERT_RELEASE(bbag.count("car") == 1);
+    YGM_ASSERT_RELEASE(bbag.size() == 4);
+    YGM_ASSERT_RELEASE(bbag.count("car") == 1);
   }
 
   //
@@ -346,7 +346,7 @@ int main(int argc, char** argv) {
 
     world.barrier();
     for (int bag_index = 0; bag_index < num_bags; ++bag_index) {
-      ASSERT_RELEASE(vec_bags[bag_index].size() == world.size() * 2);
+      YGM_ASSERT_RELEASE(vec_bags[bag_index].size() == world.size() * 2);
     }
   }
 
diff --git a/test/test_barrier.cpp b/test/test_barrier.cpp
new file mode 100644
index 00000000..00a86884
--- /dev/null
+++ b/test/test_barrier.cpp
@@ -0,0 +1,28 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#undef NDEBUG
+
+#include <ygm/comm.hpp>
+
+int main(int argc, char **argv) {
+  ygm::comm world(&argc, &argv);
+
+  // Test barriers for early exit
+  {
+    int        num_rounds = 100;
+    static int round      = 0;
+    for (int i = 0; i < num_rounds; ++i) {
+      world.async_bcast(
+          [](int curr_round) { YGM_ASSERT_RELEASE(curr_round == round); }, round);
+
+      world.barrier();
+
+      ++round;
+    }
+  }
+
+  return 0;
+}
diff --git a/test/test_cereal_archive.cpp b/test/test_cereal_archive.cpp
index 51d5619b..f5527b3f 100644
--- a/test/test_cereal_archive.cpp
+++ b/test/test_cereal_archive.cpp
@@ -34,7 +34,7 @@ int main() {
       // std::cout << tmp << std::endl;
       out_sentences.push_back(tmp);
     }
-    ASSERT_RELEASE(vec_sentences == out_sentences);
+    YGM_ASSERT_RELEASE(vec_sentences == out_sentences);
   }
 
   return 0;
diff --git a/test/test_cereal_boost_container.cpp b/test/test_cereal_boost_container.cpp
index 39985c2b..e6b8dd55 100644
--- a/test/test_cereal_boost_container.cpp
+++ b/test/test_cereal_boost_container.cpp
@@ -30,7 +30,7 @@ int main() {
     boost::container::vector<int> load_value;
     archive(load_value);
 
-    ASSERT_RELEASE(original_value == load_value);
+    YGM_ASSERT_RELEASE(original_value == load_value);
   }
 
   return 0;
diff --git a/test/test_cereal_boost_json.cpp b/test/test_cereal_boost_json.cpp
index 1dc78f60..558de070 100644
--- a/test/test_cereal_boost_json.cpp
+++ b/test/test_cereal_boost_json.cpp
@@ -45,7 +45,7 @@ int main() {
     const bj::value original_value = bj::parse(json_string);
     // std::cout << original_value << std::endl;
     // std::cout << load_value << std::endl;
-    ASSERT_RELEASE(original_value == load_value);
+    YGM_ASSERT_RELEASE(original_value == load_value);
   }
 
   return 0;
diff --git a/test/test_collective.cpp b/test/test_collective.cpp
index 306beb24..92fe4d63 100644
--- a/test/test_collective.cpp
+++ b/test/test_collective.cpp
@@ -11,26 +11,26 @@
 int main(int argc, char** argv) {
   ygm::comm world(&argc, &argv);
 
-  ASSERT_RELEASE(ygm::sum(size_t(1), world) == world.size());
-  ASSERT_RELEASE(ygm::sum(double(1), world) == double(world.size()));
-  ASSERT_RELEASE(ygm::sum(float(1), world) == float(world.size()));
+  YGM_ASSERT_RELEASE(ygm::sum(size_t(1), world) == world.size());
+  YGM_ASSERT_RELEASE(ygm::sum(double(1), world) == double(world.size()));
+  YGM_ASSERT_RELEASE(ygm::sum(float(1), world) == float(world.size()));
 
-  ASSERT_RELEASE(ygm::min(world.rank(), world) == 0);
-  ASSERT_RELEASE(ygm::min(double(world.rank()), world) == double(0));
-  ASSERT_RELEASE(ygm::min(float(world.rank()), world) == float(0));
+  YGM_ASSERT_RELEASE(ygm::min(world.rank(), world) == 0);
+  YGM_ASSERT_RELEASE(ygm::min(double(world.rank()), world) == double(0));
+  YGM_ASSERT_RELEASE(ygm::min(float(world.rank()), world) == float(0));
 
-  ASSERT_RELEASE(ygm::max(world.rank(), world) == world.size() - 1);
+  YGM_ASSERT_RELEASE(ygm::max(world.rank(), world) == world.size() - 1);
 
-  ASSERT_RELEASE(ygm::prefix_sum(1, world) == world.rank());
+  YGM_ASSERT_RELEASE(ygm::prefix_sum(1, world) == world.rank());
 
-  ASSERT_RELEASE(ygm::logical_and(true, world) == true);
-  ASSERT_RELEASE(ygm::logical_and(false, world) == false);
-  ASSERT_RELEASE(ygm::logical_or(true, world) == true);
-  ASSERT_RELEASE(ygm::logical_or(false, world) == false);
+  YGM_ASSERT_RELEASE(ygm::logical_and(true, world) == true);
+  YGM_ASSERT_RELEASE(ygm::logical_and(false, world) == false);
+  YGM_ASSERT_RELEASE(ygm::logical_or(true, world) == true);
+  YGM_ASSERT_RELEASE(ygm::logical_or(false, world) == false);
 
   if (world.size() > 1) {
-    ASSERT_RELEASE(ygm::logical_and(world.rank() % 2 == 0, world) == 0);
-    ASSERT_RELEASE(ygm::logical_or(world.rank() % 2 == 0, world) == 1);
+    YGM_ASSERT_RELEASE(ygm::logical_and(world.rank() % 2 == 0, world) == 0);
+    YGM_ASSERT_RELEASE(ygm::logical_or(world.rank() % 2 == 0, world) == 1);
   }
 
   {
@@ -40,7 +40,7 @@ int main(int argc, char** argv) {
       value = 3.14;
     }
     ygm::bcast(value, root, world);
-    ASSERT_RELEASE(value == 3.14);
+    YGM_ASSERT_RELEASE(value == 3.14);
   }
 
   {
@@ -51,23 +51,27 @@ int main(int argc, char** argv) {
         value = 42;
       }
       ygm::bcast(value, root, world);
-      ASSERT_RELEASE(value == 42);
+      YGM_ASSERT_RELEASE(value == 42);
     }
   }
 
-  ASSERT_RELEASE(is_same(42, world));
+  YGM_ASSERT_RELEASE(is_same(42, world));
   std::set<std::string> string_set;
   if (world.rank() == 0) {
     string_set.insert("Howdy");
     string_set.insert("Aggs");
   }
 
-  ASSERT_RELEASE(not is_same(string_set, world));
+  if (world.size() > 1) {
+    YGM_ASSERT_RELEASE(not is_same(string_set, world));
+  }
   string_set.insert("Howdy");
   string_set.insert("Aggs");
-  ASSERT_RELEASE(is_same(string_set, world));
+  YGM_ASSERT_RELEASE(is_same(string_set, world));
 
-  ASSERT_RELEASE(not is_same(world.rank(), world));
+  if (world.size() > 1) {
+    YGM_ASSERT_RELEASE(not is_same(world.rank(), world));
+  }
 
   return 0;
-}
\ No newline at end of file
+}
diff --git a/test/test_comm.cpp b/test/test_comm.cpp
index 697f2070..ee879d7e 100644
--- a/test/test_comm.cpp
+++ b/test/test_comm.cpp
@@ -8,7 +8,7 @@
 #include <ygm/detail/ygm_ptr.hpp>
 
 int main(int argc, char** argv) {
-  ASSERT_MPI(MPI_Init(nullptr, nullptr));
+  YGM_ASSERT_MPI(MPI_Init(nullptr, nullptr));
 
   std::vector<std::string> routing_schemes{"NONE", "NR", "NLNR"};
   for (const auto& routing_scheme : routing_schemes) {
@@ -28,7 +28,7 @@ int main(int argc, char** argv) {
         }
       }
       world.barrier();
-      ASSERT_RELEASE(counter == 1);
+      YGM_ASSERT_RELEASE(counter == 1);
     }
 
     //
@@ -41,7 +41,7 @@ int main(int argc, char** argv) {
             dest, [](auto pcounter) { (*pcounter)++; }, pcounter);
       }
       world.barrier();
-      ASSERT_RELEASE(counter == (size_t)world.size());
+      YGM_ASSERT_RELEASE(counter == (size_t)world.size());
     }
 
     //
@@ -54,7 +54,7 @@ int main(int argc, char** argv) {
       }
 
       world.barrier();
-      ASSERT_RELEASE(counter == 1);
+      YGM_ASSERT_RELEASE(counter == 1);
     }
 
     {
@@ -66,7 +66,7 @@ int main(int argc, char** argv) {
       }
 
       world.barrier();
-      ASSERT_RELEASE(counter == num_bcasts * world.size());
+      YGM_ASSERT_RELEASE(counter == num_bcasts * world.size());
     }
 
     //
@@ -85,9 +85,9 @@ int main(int argc, char** argv) {
 
       world.barrier();
       if (world.rank() % 2) {
-        ASSERT_RELEASE(counter == 0);
+        YGM_ASSERT_RELEASE(counter == 0);
       } else {
-        ASSERT_RELEASE(counter == 1);
+        YGM_ASSERT_RELEASE(counter == 1);
       }
     }
 
@@ -95,13 +95,13 @@ int main(int argc, char** argv) {
     // Test reductions
     {
       auto max = world.all_reduce_max(size_t(world.rank()));
-      ASSERT_RELEASE(max == (size_t)world.size() - 1);
+      YGM_ASSERT_RELEASE(max == (size_t)world.size() - 1);
 
       auto min = world.all_reduce_min(size_t(world.rank()));
-      ASSERT_RELEASE(min == 0);
+      YGM_ASSERT_RELEASE(min == 0);
 
       auto sum = world.all_reduce_sum(size_t(world.rank()));
-      ASSERT_RELEASE(sum ==
+      YGM_ASSERT_RELEASE(sum ==
                      (((size_t)world.size() - 1) * (size_t)world.size()) / 2);
 
       size_t id  = world.rank();
@@ -112,7 +112,7 @@ int main(int argc, char** argv) {
           return b;
         }
       });
-      ASSERT_RELEASE(red == 0);
+      YGM_ASSERT_RELEASE(red == 0);
       auto red2 = world.all_reduce(id, [](size_t a, size_t b) {
         if (a > b) {
           return a;
@@ -120,7 +120,7 @@ int main(int argc, char** argv) {
           return b;
         }
       });
-      ASSERT_RELEASE(red2 == (size_t)world.size() - 1);
+      YGM_ASSERT_RELEASE(red2 == (size_t)world.size() - 1);
     }
 
     //
@@ -131,10 +131,10 @@ int main(int argc, char** argv) {
       world.async_bcast([]() { done = true; });
       world.local_wait_until([]() { return done; });
       world.barrier();
-      ASSERT_RELEASE(done);
+      YGM_ASSERT_RELEASE(done);
     }
   }
 
-  ASSERT_MPI(MPI_Finalize());
+  YGM_ASSERT_MPI(MPI_Finalize());
   return 0;
 }
diff --git a/test/test_comm_2.cpp b/test/test_comm_2.cpp
index bd697c3a..91a2df5c 100644
--- a/test/test_comm_2.cpp
+++ b/test/test_comm_2.cpp
@@ -9,11 +9,11 @@
 
 int main(int argc, char** argv) {
   int provided;
-  ASSERT_MPI(MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &provided));
-  ASSERT_RELEASE(MPI_THREAD_MULTIPLE == provided);
+  YGM_ASSERT_MPI(MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &provided));
+  YGM_ASSERT_RELEASE(MPI_THREAD_MULTIPLE == provided);
 
   for (size_t i = 0; i < 1000; ++i) { ygm::comm world(MPI_COMM_WORLD); }
 
-  ASSERT_MPI(MPI_Finalize());
+  YGM_ASSERT_MPI(MPI_Finalize());
   return 0;
 }
\ No newline at end of file
diff --git a/test/test_concepts.cpp b/test/test_concepts.cpp
index af3a97e8..fbbad263 100644
--- a/test/test_concepts.cpp
+++ b/test/test_concepts.cpp
@@ -6,6 +6,7 @@
 #include <ygm/container/array.hpp>
 #include <ygm/container/bag.hpp>
 #include <ygm/container/detail/base_concepts.hpp>
+#include <ygm/container/detail/reducing_adapter.hpp>
 #include <ygm/container/map.hpp>
 #include <ygm/container/set.hpp>
 
@@ -49,29 +50,36 @@ int main(int argc, char **argv) {
     static_assert(not HasForAll<std::vector<int>>);
   }
 
-  /*
-  // Test IsSame
+  // Test HasAsyncReduce
   {
-    static_assert(IsSame<int, int>);
-    static_assert(IsSame<int, int &>);
-    static_assert(IsSame<int, const int &>);
-    static_assert(not IsSame<int, int *>);
-    static_assert(not IsSame<int, float>);
-  }
-  */
+    static_assert(not HasAsyncReduce<ygm::container::bag<int>>);
+    static_assert(not HasAsyncReduce<ygm::container::set<int>>);
+    static_assert(HasAsyncReduce<ygm::container::map<int, float>>);
+    static_assert(HasAsyncReduce<ygm::container::array<float>>);
+    static_assert(HasAsyncReduce<ygm::container::detail::reducing_adapter<
+                      ygm::container::array<float>, std::plus<float>>>);
 
-  /*
-  // Test IsInvocable
-  {
-    auto lambda1 = [](int) {};
-    auto lambda2 = [](int, float) {};
+    static_assert(not HasAsyncReduceWithReductionOp<ygm::container::bag<int>>);
+    static_assert(not HasAsyncReduceWithReductionOp<ygm::container::set<int>>);
+    static_assert(
+        HasAsyncReduceWithReductionOp<ygm::container::map<int, float>>);
+    static_assert(HasAsyncReduceWithReductionOp<ygm::container::array<float>>);
+    static_assert(not HasAsyncReduceWithReductionOp<
+                  ygm::container::detail::reducing_adapter<
+                      ygm::container::array<float>, std::plus<float>>>);
 
-    static_assert(IsInvocable<decltype(lambda1), int>);
-    static_assert(not IsInvocable<decltype(lambda1), int, float>);
-    static_assert(not IsInvocable<decltype(lambda2), int>);
-    static_assert(IsInvocable<decltype(lambda2), int, float>);
+    static_assert(
+        not HasAsyncReduceWithoutReductionOp<ygm::container::bag<int>>);
+    static_assert(
+        not HasAsyncReduceWithoutReductionOp<ygm::container::set<int>>);
+    static_assert(
+        not HasAsyncReduceWithoutReductionOp<ygm::container::map<int, float>>);
+    static_assert(
+        not HasAsyncReduceWithoutReductionOp<ygm::container::array<float>>);
+    static_assert(HasAsyncReduceWithoutReductionOp<
+                  ygm::container::detail::reducing_adapter<
+                      ygm::container::array<float>, std::plus<float>>>);
   }
-  */
 
   return 0;
 }
diff --git a/test/test_container_serialization.cpp b/test/test_container_serialization.cpp
index 066391f7..7167b38c 100644
--- a/test/test_container_serialization.cpp
+++ b/test/test_container_serialization.cpp
@@ -23,7 +23,7 @@ int main(int argc, char** argv) {
     my_bag.async_insert(5);
     my_bag.async_insert(8);
   }
-  ASSERT_RELEASE(my_bag.size() == 4);
+  YGM_ASSERT_RELEASE(my_bag.size() == 4);
 
   my_bag.serialize("serialization_test.bag");
 }
@@ -33,7 +33,7 @@ int main(int argc, char** argv) {
   ygm::container::bag<int> reloaded_bag(world);
   reloaded_bag.deserialize("serialization_test.bag");
 
-  ASSERT_RELEASE(reloaded_bag.size() == 4);
+  YGM_ASSERT_RELEASE(reloaded_bag.size() == 4);
 }
 }
 
@@ -47,10 +47,10 @@ if (world.rank0()) {
   my_set.async_insert(3);
   my_set.async_insert(3);
 }
-ASSERT_RELEASE(my_set.count(0) == 1);
-ASSERT_RELEASE(my_set.count(2) == 1);
-ASSERT_RELEASE(my_set.count(3) == 1);
-ASSERT_RELEASE(my_set.size() == 3);
+YGM_ASSERT_RELEASE(my_set.count(0) == 1);
+YGM_ASSERT_RELEASE(my_set.count(2) == 1);
+YGM_ASSERT_RELEASE(my_set.count(3) == 1);
+YGM_ASSERT_RELEASE(my_set.size() == 3);
 
 my_set.serialize("serialization_test.set");
 }
@@ -60,13 +60,13 @@ my_set.serialize("serialization_test.set");
   ygm::container::set<int> reloaded_set(world);
   reloaded_set.deserialize("serialization_test.set");
 
-  ASSERT_RELEASE(reloaded_set.count(0) == 1);
-  ASSERT_RELEASE(reloaded_set.count(2) == 1);
-  ASSERT_RELEASE(reloaded_set.count(3) == 1);
-  ASSERT_RELEASE(reloaded_set.size() == 3);
+  YGM_ASSERT_RELEASE(reloaded_set.count(0) == 1);
+  YGM_ASSERT_RELEASE(reloaded_set.count(2) == 1);
+  YGM_ASSERT_RELEASE(reloaded_set.count(3) == 1);
+  YGM_ASSERT_RELEASE(reloaded_set.size() == 3);
 
   reloaded_set.async_insert(4);
-  ASSERT_RELEASE(reloaded_set.size() == 4);
+  YGM_ASSERT_RELEASE(reloaded_set.size() == 4);
 }
 }
 
@@ -80,10 +80,10 @@ my_set.serialize("serialization_test.set");
 //   my_mset.async_insert(3);
 //   my_mset.async_insert(3);
 // }
-// ASSERT_RELEASE(my_mset.count(0) == 1);
-// ASSERT_RELEASE(my_mset.count(2) == 1);
-// ASSERT_RELEASE(my_mset.count(3) == 2);
-// ASSERT_RELEASE(my_mset.size() == 4);
+// YGM_ASSERT_RELEASE(my_mset.count(0) == 1);
+// YGM_ASSERT_RELEASE(my_mset.count(2) == 1);
+// YGM_ASSERT_RELEASE(my_mset.count(3) == 2);
+// YGM_ASSERT_RELEASE(my_mset.size() == 4);
 
 // my_mset.serialize("serialization_test.set");
 // }
@@ -93,13 +93,13 @@ my_set.serialize("serialization_test.set");
 //   ygm::container::set<int> reloaded_mset(world);
 //   reloaded_mset.deserialize("serialization_test.set");
 
-//   ASSERT_RELEASE(reloaded_mset.count(0) == 1);
-//   ASSERT_RELEASE(reloaded_mset.count(2) == 1);
-//   ASSERT_RELEASE(reloaded_mset.count(3) == 2);
-//   ASSERT_RELEASE(reloaded_mset.size() == 4);
+//   YGM_ASSERT_RELEASE(reloaded_mset.count(0) == 1);
+//   YGM_ASSERT_RELEASE(reloaded_mset.count(2) == 1);
+//   YGM_ASSERT_RELEASE(reloaded_mset.count(3) == 2);
+//   YGM_ASSERT_RELEASE(reloaded_mset.size() == 4);
 
 //   reloaded_mset.async_insert(4);
-//   ASSERT_RELEASE(reloaded_mset.size() == 5);
+//   YGM_ASSERT_RELEASE(reloaded_mset.size() == 5);
 // }
 // }
 
@@ -112,9 +112,9 @@ smap.async_insert("dog", "cat");
 smap.async_insert("apple", "orange");
 smap.async_insert("red", "green");
 
-ASSERT_RELEASE(smap.count("dog") == 1);
-ASSERT_RELEASE(smap.count("apple") == 1);
-ASSERT_RELEASE(smap.count("red") == 1);
+YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+YGM_ASSERT_RELEASE(smap.count("apple") == 1);
+YGM_ASSERT_RELEASE(smap.count("red") == 1);
 
 smap.serialize("serialization_test.map");
 }
@@ -124,9 +124,9 @@ smap.serialize("serialization_test.map");
   ygm::container::map<std::string, std::string> reloaded_map(world);
   reloaded_map.deserialize("serialization_test.map");
 
-  ASSERT_RELEASE(reloaded_map.count("dog") == 1);
-  ASSERT_RELEASE(reloaded_map.count("apple") == 1);
-  ASSERT_RELEASE(reloaded_map.count("red") == 1);
+  YGM_ASSERT_RELEASE(reloaded_map.count("dog") == 1);
+  YGM_ASSERT_RELEASE(reloaded_map.count("apple") == 1);
+  YGM_ASSERT_RELEASE(reloaded_map.count("red") == 1);
 }
 }
 
@@ -139,9 +139,9 @@ smap.async_insert("dog", "cat");
 smap.async_insert("apple", "orange");
 smap.async_insert("red", "green");
 
-ASSERT_RELEASE(smap.count("dog") == (size_t)world.size());
-ASSERT_RELEASE(smap.count("apple") == (size_t)world.size());
-ASSERT_RELEASE(smap.count("red") == (size_t)world.size());
+YGM_ASSERT_RELEASE(smap.count("dog") == (size_t)world.size());
+YGM_ASSERT_RELEASE(smap.count("apple") == (size_t)world.size());
+YGM_ASSERT_RELEASE(smap.count("red") == (size_t)world.size());
 
 smap.serialize("serialization_test.mmap");
 }
@@ -151,9 +151,9 @@ smap.serialize("serialization_test.mmap");
   ygm::container::multimap<std::string, std::string> reloaded_mmap(world);
   reloaded_mmap.deserialize("serialization_test.mmap");
 
-  ASSERT_RELEASE(reloaded_mmap.count("dog") == (size_t)world.size());
-  ASSERT_RELEASE(reloaded_mmap.count("apple") == (size_t)world.size());
-  ASSERT_RELEASE(reloaded_mmap.count("red") == (size_t)world.size());
+  YGM_ASSERT_RELEASE(reloaded_mmap.count("dog") == (size_t)world.size());
+  YGM_ASSERT_RELEASE(reloaded_mmap.count("apple") == (size_t)world.size());
+  YGM_ASSERT_RELEASE(reloaded_mmap.count("red") == (size_t)world.size());
 }
 }
 
@@ -168,17 +168,17 @@ smap.serialize("serialization_test.mmap");
     cset.async_insert("apple");
     cset.async_insert("red");
 
-    ASSERT_RELEASE(cset.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("red") == (size_t)world.size());
-    ASSERT_RELEASE(cset.size() == 3);
+    YGM_ASSERT_RELEASE(cset.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.size() == 3);
 
     auto count_map = cset.all_gather({"dog", "cat", "apple"});
-    ASSERT_RELEASE(count_map["dog"] == (size_t) world.size());
-    ASSERT_RELEASE(count_map["apple"] == (size_t) world.size());
-    ASSERT_RELEASE(cset.count("cat") == 0);
+    YGM_ASSERT_RELEASE(count_map["dog"] == (size_t) world.size());
+    YGM_ASSERT_RELEASE(count_map["apple"] == (size_t) world.size());
+    YGM_ASSERT_RELEASE(cset.count("cat") == 0);
 
-    ASSERT_RELEASE(cset.count_all() == 3 * (size_t) world.size());
+    YGM_ASSERT_RELEASE(cset.count_all() == 3 * (size_t) world.size());
 
     cset.serialize("serialization_test.cset");
   }
@@ -188,17 +188,17 @@ smap.serialize("serialization_test.mmap");
     ygm::container::counting_set<std::string> reloaded_cset(world);
     reloaded_cset.deserialize("serialization_test.cset");
 
-    ASSERT_RELEASE(reloaded_cset.count("dog") == (size_t) world.size());
-    ASSERT_RELEASE(reloaded_cset.count("apple") == (size_t) world.size());
-    ASSERT_RELEASE(reloaded_cset.count("red") == (size_t) world.size());
-    ASSERT_RELEASE(reloaded_cset.size() == 3);
+    YGM_ASSERT_RELEASE(reloaded_cset.count("dog") == (size_t) world.size());
+    YGM_ASSERT_RELEASE(reloaded_cset.count("apple") == (size_t) world.size());
+    YGM_ASSERT_RELEASE(reloaded_cset.count("red") == (size_t) world.size());
+    YGM_ASSERT_RELEASE(reloaded_cset.size() == 3);
 
     auto count_map = reloaded_cset.all_gather({"dog", "cat", "apple"});
-    ASSERT_RELEASE(count_map["dog"] == (size_t) world.size());
-    ASSERT_RELEASE(count_map["apple"] == (size_t) world.size());
-    ASSERT_RELEASE(reloaded_cset.count("cat") == 0);
+    YGM_ASSERT_RELEASE(count_map["dog"] == (size_t) world.size());
+    YGM_ASSERT_RELEASE(count_map["apple"] == (size_t) world.size());
+    YGM_ASSERT_RELEASE(reloaded_cset.count("cat") == 0);
 
-    ASSERT_RELEASE(reloaded_cset.count_all() == 3 * (size_t) world.size());
+    YGM_ASSERT_RELEASE(reloaded_cset.count_all() == 3 * (size_t) world.size());
   }
 }
 return 0;
diff --git a/test/test_counting_set.cpp b/test/test_counting_set.cpp
index ac6a48ab..206a644a 100644
--- a/test/test_counting_set.cpp
+++ b/test/test_counting_set.cpp
@@ -35,15 +35,15 @@ int main(int argc, char **argv) {
       cset.async_insert("red");
     }
 
-    ASSERT_RELEASE(cset.count("dog") == 1);
-    ASSERT_RELEASE(cset.count("apple") == 1);
-    ASSERT_RELEASE(cset.count("red") == 1);
-    ASSERT_RELEASE(cset.size() == 3);
-
-    auto count_map = cset.key_gather({"dog", "cat", "apple"});
-    ASSERT_RELEASE(count_map["dog"] == 1);
-    ASSERT_RELEASE(count_map["apple"] == 1);
-    ASSERT_RELEASE(count_map.count("cat") == 0);
+    YGM_ASSERT_RELEASE(cset.count("dog") == 1);
+    YGM_ASSERT_RELEASE(cset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(cset.count("red") == 1);
+    YGM_ASSERT_RELEASE(cset.size() == 3);
+
+    auto count_map = cset.gather_keys({"dog", "cat", "apple"});
+    YGM_ASSERT_RELEASE(count_map["dog"] == 1);
+    YGM_ASSERT_RELEASE(count_map["apple"] == 1);
+    YGM_ASSERT_RELEASE(count_map.count("cat") == 0);
   }
 
   //
@@ -55,17 +55,17 @@ int main(int argc, char **argv) {
     cset.async_insert("apple");
     cset.async_insert("red");
 
-    ASSERT_RELEASE(cset.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("red") == (size_t)world.size());
-    ASSERT_RELEASE(cset.size() == 3);
+    YGM_ASSERT_RELEASE(cset.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.size() == 3);
 
-    auto count_map = cset.key_gather({"dog", "cat", "apple"});
-    ASSERT_RELEASE(count_map["dog"] == (size_t)world.size());
-    ASSERT_RELEASE(count_map["apple"] == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("cat") == 0);
+    auto count_map = cset.gather_keys({"dog", "cat", "apple"});
+    YGM_ASSERT_RELEASE(count_map["dog"] == (size_t)world.size());
+    YGM_ASSERT_RELEASE(count_map["apple"] == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("cat") == 0);
 
-    ASSERT_RELEASE(cset.count_all() == 3 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count_all() == 3 * (size_t)world.size());
   }
 
   //
@@ -80,17 +80,17 @@ int main(int argc, char **argv) {
     cset_ptr->async_insert("apple");
     cset.async_insert("red");
 
-    ASSERT_RELEASE(cset_ptr->count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(cset_ptr->count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("red") == (size_t)world.size());
-    ASSERT_RELEASE(cset.size() == 3);
+    YGM_ASSERT_RELEASE(cset_ptr->count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset_ptr->count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.size() == 3);
 
-    auto count_map = cset.key_gather({"dog", "cat", "apple"});
-    ASSERT_RELEASE(count_map["dog"] == (size_t)world.size());
-    ASSERT_RELEASE(count_map["apple"] == (size_t)world.size());
-    ASSERT_RELEASE(cset.count("cat") == 0);
+    auto count_map = cset.gather_keys({"dog", "cat", "apple"});
+    YGM_ASSERT_RELEASE(count_map["dog"] == (size_t)world.size());
+    YGM_ASSERT_RELEASE(count_map["apple"] == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count("cat") == 0);
 
-    ASSERT_RELEASE(cset.count_all() == 3 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset.count_all() == 3 * (size_t)world.size());
   }
 
   //
@@ -103,16 +103,16 @@ int main(int argc, char **argv) {
       cset.async_insert("red");
     }
 
-    ASSERT_RELEASE(cset.count("dog") == 1);
-    ASSERT_RELEASE(cset.count("apple") == 1);
-    ASSERT_RELEASE(cset.count("red") == 1);
-    ASSERT_RELEASE(cset.size() == 3);
+    YGM_ASSERT_RELEASE(cset.count("dog") == 1);
+    YGM_ASSERT_RELEASE(cset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(cset.count("red") == 1);
+    YGM_ASSERT_RELEASE(cset.size() == 3);
 
     cset.clear();
-    ASSERT_RELEASE(cset.size() == 0);
-    ASSERT_RELEASE(cset.count("dog") == 0);
-    ASSERT_RELEASE(cset.count("apple") == 0);
-    ASSERT_RELEASE(cset.count("red") == 0);
+    YGM_ASSERT_RELEASE(cset.size() == 0);
+    YGM_ASSERT_RELEASE(cset.count("dog") == 0);
+    YGM_ASSERT_RELEASE(cset.count("apple") == 0);
+    YGM_ASSERT_RELEASE(cset.count("red") == 0);
   }
 
   // //
@@ -130,10 +130,10 @@ int main(int argc, char **argv) {
   //   auto topk = cset.topk(
   //       2, [](const auto &a, const auto &b) { return a.second > b.second; });
 
-  //   ASSERT_RELEASE(topk[0].first == "dog");
-  //   ASSERT_RELEASE(topk[0].second == 3 * world.size());
-  //   ASSERT_RELEASE(topk[1].first == "cat");
-  //   ASSERT_RELEASE(topk[1].second == 2 * world.size());
+  //   YGM_ASSERT_RELEASE(topk[0].first == "dog");
+  //   YGM_ASSERT_RELEASE(topk[0].second == 3 * world.size());
+  //   YGM_ASSERT_RELEASE(topk[1].first == "cat");
+  //   YGM_ASSERT_RELEASE(topk[1].second == 2 * world.size());
   // }
 
   //
@@ -149,11 +149,11 @@ int main(int argc, char **argv) {
     cset1.async_insert("cat");
     cset1.async_insert("bird");
 
-    ASSERT_RELEASE(cset1.count("dog") == (size_t)world.size() * 3);
-    ASSERT_RELEASE(cset1.count("cat") == (size_t)world.size() * 2);
-    ASSERT_RELEASE(cset1.count("bird") == (size_t)world.size());
-    ASSERT_RELEASE(cset1.count("red") == 0);
-    ASSERT_RELEASE(cset1.size() == 3);
+    YGM_ASSERT_RELEASE(cset1.count("dog") == (size_t)world.size() * 3);
+    YGM_ASSERT_RELEASE(cset1.count("cat") == (size_t)world.size() * 2);
+    YGM_ASSERT_RELEASE(cset1.count("bird") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset1.count("red") == 0);
+    YGM_ASSERT_RELEASE(cset1.size() == 3);
 
     cset1.for_all([&cset2](const auto &key, const auto &value) {
       for (int i = 0; i < value; i++) {
@@ -161,11 +161,11 @@ int main(int argc, char **argv) {
       }
     });
 
-    ASSERT_RELEASE(cset2.count("dog") == (size_t)world.size() * 3);
-    ASSERT_RELEASE(cset2.count("cat") == (size_t)world.size() * 2);
-    ASSERT_RELEASE(cset2.count("bird") == (size_t)world.size());
-    ASSERT_RELEASE(cset2.count("red") == 0);
-    ASSERT_RELEASE(cset2.size() == 3);
+    YGM_ASSERT_RELEASE(cset2.count("dog") == (size_t)world.size() * 3);
+    YGM_ASSERT_RELEASE(cset2.count("cat") == (size_t)world.size() * 2);
+    YGM_ASSERT_RELEASE(cset2.count("bird") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(cset2.count("red") == 0);
+    YGM_ASSERT_RELEASE(cset2.size() == 3);
   }
 
   return 0;
diff --git a/test/test_csv_headers.cpp b/test/test_csv_headers.cpp
new file mode 100644
index 00000000..8f331fbc
--- /dev/null
+++ b/test/test_csv_headers.cpp
@@ -0,0 +1,35 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#undef NDEBUG
+
+#include <filesystem>
+#include <ygm/comm.hpp>
+#include <ygm/io/csv_parser.hpp>
+
+int main(int argc, char** argv) {
+  ygm::comm world(&argc, &argv);
+
+  ygm::io::csv_parser csvp(world,
+                           std::vector<std::string>{"data/csv_headers.csv"});
+  csvp.read_headers();
+  csvp.for_all([&world](const auto& vfields) {
+    // Test lookups by header names
+    YGM_ASSERT_RELEASE(vfields["zero"].as_integer() == 0);
+    YGM_ASSERT_RELEASE(vfields["two"].as_integer() == 2);
+    YGM_ASSERT_RELEASE(vfields["four"].as_integer() == 4);
+    YGM_ASSERT_RELEASE(vfields["six"].as_integer() == 6);
+
+    // Test lookup by column names agrees with positional lookups
+    YGM_ASSERT_RELEASE(vfields["zero"].as_integer() == vfields[0].as_integer());
+    YGM_ASSERT_RELEASE(vfields["two"].as_integer() == vfields[2].as_integer());
+    YGM_ASSERT_RELEASE(vfields["four"].as_integer() == vfields[1].as_integer());
+    YGM_ASSERT_RELEASE(vfields["six"].as_integer() == vfields[3].as_integer());
+  });
+
+  world.barrier();
+
+  return 0;
+}
diff --git a/test/test_csv_parser.cpp b/test/test_csv_parser.cpp
index f654a292..23ec7fc3 100644
--- a/test/test_csv_parser.cpp
+++ b/test/test_csv_parser.cpp
@@ -16,13 +16,13 @@ int main(int argc, char** argv) {
   ygm::io::csv_parser csvp(world, std::vector<std::string>{"data/100.csv"});
   csvp.for_all([&world, &local_count](const auto& vfields) {
     for (auto f : vfields) {
-      ASSERT_RELEASE(f.is_integer());
+      YGM_ASSERT_RELEASE(f.is_integer());
       local_count += f.as_integer();
     }
   });
 
   world.barrier();
-  ASSERT_RELEASE(world.all_reduce_sum(local_count) == 100);
+  YGM_ASSERT_RELEASE(world.all_reduce_sum(local_count) == 100);
 
   return 0;
 }
diff --git a/test/test_daily_output.cpp b/test/test_daily_output.cpp
index 5584c6a7..b32a06eb 100644
--- a/test/test_daily_output.cpp
+++ b/test/test_daily_output.cpp
@@ -31,7 +31,7 @@ int main(int argc, char **argv) {
 
     if (world.rank0()) {
       std::string expected_path(prefix_path + "1970/1/1");
-      ASSERT_RELEASE(fs::exists(fs::path(expected_path)));
+      YGM_ASSERT_RELEASE(fs::exists(fs::path(expected_path)));
       fs::remove_all(fs::path(base_dir));
     }
   }
@@ -77,7 +77,7 @@ int main(int argc, char **argv) {
       fs::remove_all(fs::path(base_dir));
     }
 
-    ASSERT_RELEASE(xor_write == xor_read);
+    YGM_ASSERT_RELEASE(xor_write == xor_read);
   }
 
   // Test appending
@@ -132,7 +132,7 @@ int main(int argc, char **argv) {
       fs::remove_all(fs::path(base_dir));
     }
 
-    ASSERT_RELEASE(xor_write == xor_read);
+    YGM_ASSERT_RELEASE(xor_write == xor_read);
   }
 
   return 0;
diff --git a/test/test_disjoint_set.cpp b/test/test_disjoint_set.cpp
index 1e54209c..20c4794d 100644
--- a/test/test_disjoint_set.cpp
+++ b/test/test_disjoint_set.cpp
@@ -45,8 +45,8 @@ int main(int argc, char** argv) {
     std::vector<std::string> to_find = {"cat", "dog", "car"};
 
     auto reps = dset.all_find(to_find);
-    ASSERT_RELEASE(reps["cat"] == reps["dog"]);
-    ASSERT_RELEASE(reps["cat"] != reps["car"]);
+    YGM_ASSERT_RELEASE(reps["cat"] == reps["dog"]);
+    YGM_ASSERT_RELEASE(reps["cat"] != reps["car"]);
   }
 
   //
@@ -67,8 +67,8 @@ int main(int argc, char** argv) {
     std::vector<std::string> to_find = {"cat", "dog", "car"};
 
     auto reps = dset.all_find(to_find);
-    ASSERT_RELEASE(reps["cat"] == reps["dog"]);
-    ASSERT_RELEASE(reps["cat"] != reps["car"]);
+    YGM_ASSERT_RELEASE(reps["cat"] == reps["dog"]);
+    YGM_ASSERT_RELEASE(reps["cat"] != reps["car"]);
   }
 
   //
@@ -86,13 +86,13 @@ int main(int argc, char** argv) {
 
     dset.async_union("cat", "dog");
 
-    ASSERT_RELEASE(dset.size() == 3);
-    ASSERT_RELEASE(dset.num_sets() == 2);
+    YGM_ASSERT_RELEASE(dset.size() == 3);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 2);
 
     dset.clear();
 
-    ASSERT_RELEASE(dset.size() == 0);
-    ASSERT_RELEASE(dset.num_sets() == 0);
+    YGM_ASSERT_RELEASE(dset.size() == 0);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 0);
   }
 
   //
@@ -110,7 +110,7 @@ int main(int argc, char** argv) {
     }
 
     world.barrier();
-    ASSERT_RELEASE(dset.num_sets() == 6);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 6);
 
     std::vector<int> to_find = {0, 1, 2, 3, 4, 5};
 
@@ -120,24 +120,24 @@ int main(int argc, char** argv) {
     dset.async_union(3, 4);
     dset.async_union(4, 5);
 
-    ASSERT_RELEASE(dset.num_sets() == 2);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 2);
 
     auto reps = dset.all_find(to_find);
-    ASSERT_RELEASE(reps[0] == reps[1]);
-    ASSERT_RELEASE(reps[1] == reps[2]);
-    ASSERT_RELEASE(reps[2] != reps[3]);
-    ASSERT_RELEASE(reps[3] == reps[4]);
-    ASSERT_RELEASE(reps[4] == reps[5]);
+    YGM_ASSERT_RELEASE(reps[0] == reps[1]);
+    YGM_ASSERT_RELEASE(reps[1] == reps[2]);
+    YGM_ASSERT_RELEASE(reps[2] != reps[3]);
+    YGM_ASSERT_RELEASE(reps[3] == reps[4]);
+    YGM_ASSERT_RELEASE(reps[4] == reps[5]);
 
     dset.async_union(0, 3);
-    ASSERT_RELEASE(dset.num_sets() == 1);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 1);
 
     auto reps_final = dset.all_find(to_find);
-    ASSERT_RELEASE(reps_final[0] == reps_final[1]);
-    ASSERT_RELEASE(reps_final[1] == reps_final[2]);
-    ASSERT_RELEASE(reps_final[2] == reps_final[3]);
-    ASSERT_RELEASE(reps_final[3] == reps_final[4]);
-    ASSERT_RELEASE(reps_final[4] == reps_final[5]);
+    YGM_ASSERT_RELEASE(reps_final[0] == reps_final[1]);
+    YGM_ASSERT_RELEASE(reps_final[1] == reps_final[2]);
+    YGM_ASSERT_RELEASE(reps_final[2] == reps_final[3]);
+    YGM_ASSERT_RELEASE(reps_final[3] == reps_final[4]);
+    YGM_ASSERT_RELEASE(reps_final[4] == reps_final[5]);
   }
 
   //
@@ -155,7 +155,7 @@ int main(int argc, char** argv) {
     }
 
     world.barrier();
-    ASSERT_RELEASE(dset.num_sets() == 6);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 6);
 
     std::vector<int> to_find = {0, 1, 2, 3, 4, 5};
 
@@ -165,26 +165,26 @@ int main(int argc, char** argv) {
     dset.async_union(4, 5);
     dset.async_union(3, 5);
 
-    ASSERT_RELEASE(dset.num_sets() == 2);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 2);
 
     auto reps = dset.all_find(to_find);
-    ASSERT_RELEASE(reps[0] == reps[1]);
-    ASSERT_RELEASE(reps[1] == reps[2]);
-    ASSERT_RELEASE(reps[2] != reps[3]);
-    ASSERT_RELEASE(reps[3] == reps[4]);
-    ASSERT_RELEASE(reps[4] == reps[5]);
+    YGM_ASSERT_RELEASE(reps[0] == reps[1]);
+    YGM_ASSERT_RELEASE(reps[1] == reps[2]);
+    YGM_ASSERT_RELEASE(reps[2] != reps[3]);
+    YGM_ASSERT_RELEASE(reps[3] == reps[4]);
+    YGM_ASSERT_RELEASE(reps[4] == reps[5]);
 
     dset.async_union(0, 3);
-    ASSERT_RELEASE(dset.num_sets() == 1);
+    YGM_ASSERT_RELEASE(dset.num_sets() == 1);
 
     dset.all_compress();
 
     auto reps_final = dset.all_find(to_find);
-    ASSERT_RELEASE(reps_final[0] == reps_final[1]);
-    ASSERT_RELEASE(reps_final[1] == reps_final[2]);
-    ASSERT_RELEASE(reps_final[2] == reps_final[3]);
-    ASSERT_RELEASE(reps_final[3] == reps_final[4]);
-    ASSERT_RELEASE(reps_final[4] == reps_final[5]);
+    YGM_ASSERT_RELEASE(reps_final[0] == reps_final[1]);
+    YGM_ASSERT_RELEASE(reps_final[1] == reps_final[2]);
+    YGM_ASSERT_RELEASE(reps_final[2] == reps_final[3]);
+    YGM_ASSERT_RELEASE(reps_final[3] == reps_final[4]);
+    YGM_ASSERT_RELEASE(reps_final[4] == reps_final[5]);
   }
 
   //
@@ -200,11 +200,11 @@ int main(int argc, char** argv) {
     }
 
     dset.for_all([&counter](const auto& item, const auto& rep) {
-      ASSERT_RELEASE(item == rep);
+      YGM_ASSERT_RELEASE(item == rep);
       ++counter;
     });
 
-    ASSERT_RELEASE(world.all_reduce_sum(counter) == num_items);
+    YGM_ASSERT_RELEASE(world.all_reduce_sum(counter) == num_items);
   }
 
   // Test async_union_and_execute
@@ -224,6 +224,6 @@ int main(int argc, char** argv) {
 
     world.barrier();
 
-    ASSERT_RELEASE(world.all_reduce_sum(counter) == 3);
+    YGM_ASSERT_RELEASE(world.all_reduce_sum(counter) == 3);
   }
 }
diff --git a/test/test_gather_topk.cpp b/test/test_gather_topk.cpp
new file mode 100644
index 00000000..99a5a4dd
--- /dev/null
+++ b/test/test_gather_topk.cpp
@@ -0,0 +1,46 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#undef NDEBUG
+
+#include <set>
+#include <string>
+#include <vector>
+#include <ygm/comm.hpp>
+#include <ygm/container/bag.hpp>
+#include <ygm/container/counting_set.hpp>
+#include <ygm/random.hpp>
+
+int main(int argc, char** argv) {
+  ygm::comm world(&argc, &argv);
+
+  {
+    ygm::container::bag<int> ibag(world, {42, 1, 8, 16, 32, 3, 4, 5, 6, 7});
+
+    auto top2 = ibag.gather_topk(2);
+
+    YGM_ASSERT_RELEASE(top2[0] == 42);
+    YGM_ASSERT_RELEASE(top2[1] == 32);
+    YGM_ASSERT_RELEASE(top2.size() == 2);
+  }
+
+  {
+    ygm::container::counting_set<std::string> cs(world);
+    cs.async_insert("one");
+    cs.async_insert("fish");
+    cs.async_insert("two");
+    cs.async_insert("fish");
+    cs.async_insert("red");
+    cs.async_insert("fish");
+    cs.async_insert("blue");
+    cs.async_insert("fish");
+
+    std::vector< std::pair<std::string, size_t> > top1 = cs.gather_topk(
+        1, [](auto p1, auto p2) { return p1.second > p2.second; });
+
+    YGM_ASSERT_RELEASE(top1[0].first == "fish");
+    YGM_ASSERT_RELEASE(top1[0].second == 4 * world.size());
+  }
+}
\ No newline at end of file
diff --git a/test/test_interrupt_mask.cpp b/test/test_interrupt_mask.cpp
index 18fb15c5..5bb29eff 100644
--- a/test/test_interrupt_mask.cpp
+++ b/test/test_interrupt_mask.cpp
@@ -24,13 +24,13 @@ int main(int argc, char** argv) {
 
     world.cf_barrier();
 
-    ASSERT_RELEASE(count == 0);
+    YGM_ASSERT_RELEASE(count == 0);
   }
 
   world.barrier();
 
   if (world.rank0()) {
-    ASSERT_RELEASE(count == num_sends * world.size());
+    YGM_ASSERT_RELEASE(count == num_sends * world.size());
   }
 
   return 0;
diff --git a/test/test_large_messages.cpp b/test/test_large_messages.cpp
index 73c2ebcd..a8b55335 100644
--- a/test/test_large_messages.cpp
+++ b/test/test_large_messages.cpp
@@ -34,7 +34,7 @@ int main(int argc, char** argv) {
     }
 
     world.barrier();
-    ASSERT_RELEASE(counter == large_msg_size);
+    YGM_ASSERT_RELEASE(counter == large_msg_size);
   }
 
   return 0;
diff --git a/test/test_layout.cpp b/test/test_layout.cpp
index 6e5e8f3c..df6392e7 100644
--- a/test/test_layout.cpp
+++ b/test/test_layout.cpp
@@ -16,7 +16,7 @@ int main(int argc, char** argv) {
     int node_size(world.layout().node_size());
     int min_node_size = world.all_reduce_min(node_size);
     world.barrier();
-    ASSERT_RELEASE(min_node_size == node_size);
+    YGM_ASSERT_RELEASE(min_node_size == node_size);
   }
 
   //
@@ -25,7 +25,7 @@ int main(int argc, char** argv) {
     int local_size(world.layout().local_size());
     int min_local_size = world.all_reduce_min(local_size);
     world.barrier();
-    ASSERT_RELEASE(min_local_size == local_size);
+    YGM_ASSERT_RELEASE(min_local_size == local_size);
   }
 
   //
@@ -33,8 +33,8 @@ int main(int argc, char** argv) {
   {
     for (int dst(0); dst < world.size(); ++dst) {
       auto p = world.layout().rank_to_nl(dst);
-      ASSERT_RELEASE(p.first == world.layout().node_id(dst));
-      ASSERT_RELEASE(p.second == world.layout().local_id(dst));
+      YGM_ASSERT_RELEASE(p.first == world.layout().node_id(dst));
+      YGM_ASSERT_RELEASE(p.second == world.layout().local_id(dst));
     }
     world.barrier();
   }
@@ -45,8 +45,8 @@ int main(int argc, char** argv) {
     if (world.rank0()) {
       for (int dst(0); dst < world.size(); ++dst) {
         auto check_fn = [](auto pcomm, int node_guess, int local_guess) {
-          ASSERT_RELEASE(pcomm->layout().node_id() == node_guess);
-          ASSERT_RELEASE(pcomm->layout().local_id() == local_guess);
+          YGM_ASSERT_RELEASE(pcomm->layout().node_id() == node_guess);
+          YGM_ASSERT_RELEASE(pcomm->layout().local_id() == local_guess);
         };
         auto p = world.layout().rank_to_nl(dst);
         world.async(dst, check_fn, p.first, p.second);
@@ -59,7 +59,7 @@ int main(int argc, char** argv) {
   // is_local() is correct
   {
     auto check_fn = [](auto pcomm, int rank, bool tru) {
-      ASSERT_RELEASE(pcomm->layout().is_local(rank) == tru);
+      YGM_ASSERT_RELEASE(pcomm->layout().is_local(rank) == tru);
     };
 
     bool target = (world.layout().node_id() == 0) ? true : false;
@@ -72,9 +72,9 @@ int main(int argc, char** argv) {
   {
     std::vector<int> strided_ranks = world.layout().strided_ranks();
     for (auto sr : strided_ranks) {
-      ASSERT_RELEASE(world.layout().is_strided(sr) == true);
+      YGM_ASSERT_RELEASE(world.layout().is_strided(sr) == true);
       if (world.layout().rank() != sr) {
-        ASSERT_RELEASE(world.layout().is_local(sr) == false);
+        YGM_ASSERT_RELEASE(world.layout().is_local(sr) == false);
       }
     }
     world.barrier();
@@ -85,9 +85,9 @@ int main(int argc, char** argv) {
   {
     std::vector<int> local_ranks = world.layout().local_ranks();
     for (auto lr : local_ranks) {
-      ASSERT_RELEASE(world.layout().is_local(lr) == true);
+      YGM_ASSERT_RELEASE(world.layout().is_local(lr) == true);
       if (world.layout().rank() != lr) {
-        ASSERT_RELEASE(world.layout().is_strided(lr) == false);
+        YGM_ASSERT_RELEASE(world.layout().is_strided(lr) == false);
       }
     }
     world.barrier();
@@ -96,9 +96,9 @@ int main(int argc, char** argv) {
   {
     std::vector<int> strided_ranks = world.layout().strided_ranks();
     auto             check_fn      = [](auto pcomm, int src_rank) {
-      ASSERT_RELEASE(pcomm->layout().is_strided(src_rank) == true);
+      YGM_ASSERT_RELEASE(pcomm->layout().is_strided(src_rank) == true);
       if (pcomm->layout().rank() != src_rank) {
-        ASSERT_RELEASE(pcomm->layout().is_local(src_rank) == false);
+        YGM_ASSERT_RELEASE(pcomm->layout().is_local(src_rank) == false);
       }
     };
     for (auto dst : strided_ranks) {
@@ -110,9 +110,9 @@ int main(int argc, char** argv) {
   {
     std::vector<int> local_ranks = world.layout().local_ranks();
     auto             check_fn    = [](auto pcomm, int src_rank) {
-      ASSERT_RELEASE(pcomm->layout().is_local(src_rank) == true);
+      YGM_ASSERT_RELEASE(pcomm->layout().is_local(src_rank) == true);
       if (pcomm->layout().rank() != src_rank) {
-        ASSERT_RELEASE(pcomm->layout().is_strided(src_rank) == false);
+        YGM_ASSERT_RELEASE(pcomm->layout().is_strided(src_rank) == false);
       }
     };
     for (auto dst : local_ranks) {
diff --git a/test/test_line_parser.cpp b/test/test_line_parser.cpp
index e7700f43..2eb2174a 100644
--- a/test/test_line_parser.cpp
+++ b/test/test_line_parser.cpp
@@ -64,7 +64,7 @@ void test_line_parser_files(ygm::comm& comm, const std::vector<std::string>& fil
   std::set<std::string>                     line_set_sequential;
   for (const auto& f : files) {
     std::ifstream ifs(f.c_str());
-    ASSERT_RELEASE(ifs.good());
+    YGM_ASSERT_RELEASE(ifs.good());
     std::string line;
     while (std::getline(ifs, line)) {
       line_set.async_insert(line);
@@ -72,10 +72,10 @@ void test_line_parser_files(ygm::comm& comm, const std::vector<std::string>& fil
     }
   }
 
-  ASSERT_RELEASE(line_set.size() == line_set_sequential.size());
+  YGM_ASSERT_RELEASE(line_set.size() == line_set_sequential.size());
   //comm.cout0(line_set.size(), " =? ", line_set_to_test.size());
-  ASSERT_RELEASE(line_set.size() == line_set_to_test.size());
-  // ASSERT_RELEASE(line_set == line_set_to_test);
+  YGM_ASSERT_RELEASE(line_set.size() == line_set_to_test.size());
+  // YGM_ASSERT_RELEASE(line_set == line_set_to_test);
 }
 
 
@@ -88,5 +88,5 @@ void test_line_parser_directory(ygm::comm& comm, const std::string& dir, size_t
     line_set_to_test.async_insert(line);
   });
 
-  ASSERT_RELEASE(unique_line_count == line_set_to_test.size());  
+  YGM_ASSERT_RELEASE(unique_line_count == line_set_to_test.size());  
 }
\ No newline at end of file
diff --git a/test/test_map.cpp b/test/test_map.cpp
index f7916635..46c9f43d 100644
--- a/test/test_map.cpp
+++ b/test/test_map.cpp
@@ -7,7 +7,9 @@
 #include <algorithm>
 #include <string>
 #include <ygm/comm.hpp>
+#include <ygm/container/bag.hpp>
 #include <ygm/container/map.hpp>
+#include <ygm/container/set.hpp>
 
 int main(int argc, char **argv) {
   ygm::comm world(&argc, &argv);
@@ -35,9 +37,9 @@ int main(int argc, char **argv) {
       smap.async_insert("apple", "orange");
       smap.async_insert("red", "green");
     }
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("apple") == 1);
-    ASSERT_RELEASE(smap.count("red") == 1);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("apple") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 1);
   }
 
   //
@@ -49,9 +51,9 @@ int main(int argc, char **argv) {
     smap.async_insert("apple", "orange");
     smap.async_insert("red", "green");
 
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("apple") == 1);
-    ASSERT_RELEASE(smap.count("red") == 1);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("apple") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 1);
   }
 
   //
@@ -70,20 +72,45 @@ int main(int argc, char **argv) {
     world.barrier();
 
     smap.async_visit("dog", [](const auto &key, auto &value) {
-      ASSERT_RELEASE(value == "cat");
+      YGM_ASSERT_RELEASE(value == "cat");
     });
 
     smap.async_visit_if_contains("apple", [](auto key, auto &value) {
-      ASSERT_RELEASE(value == "orange");
+      YGM_ASSERT_RELEASE(value == "orange");
     });
 
     const ygm::container::map<std::string, std::string> &csmap = smap;
-    csmap.async_visit_if_contains(
-        "red", [](auto key, auto &value) { ASSERT_RELEASE(value == "green"); });
+    csmap.async_visit_if_contains("red", [](auto key, auto &value) {
+      YGM_ASSERT_RELEASE(value == "green");
+    });
 
     smap.async_visit_if_contains(
         "SHOULD_BE_MISSING",
-        [](auto key, auto &value) { ASSERT_RELEASE(false); });
+        [](auto key, auto &value) { YGM_ASSERT_RELEASE(false); });
+  }
+
+  //
+  // Test async_visit with functor
+  {
+    ygm::container::map<std::string, std::string> smap(world);
+
+    smap.async_insert("dog", "cat");
+    smap.async_insert("apple", "orange");
+
+    world.barrier();
+
+    smap.async_insert("dog", "dog");
+    smap.async_insert("red", "green");
+
+    world.barrier();
+
+    struct dog_check {
+      void operator()(const std::string &key, std::string &value) {
+        YGM_ASSERT_RELEASE(value == "cat");
+      }
+    };
+
+    smap.async_visit("dog", dog_check());
   }
 
   //
@@ -92,31 +119,52 @@ int main(int argc, char **argv) {
     ygm::container::map<std::string, std::string> smap(world);
     smap.async_visit("dog",
                      [](const std::string &key, const std::string &value) {
-                       ASSERT_RELEASE(key == "dog");
-                       ASSERT_RELEASE(value == "");
+                       YGM_ASSERT_RELEASE(key == "dog");
+                       YGM_ASSERT_RELEASE(value == "");
                      });
     smap.async_visit("cat", [](const std::string &key, std::string &value) {
-      ASSERT_RELEASE(key == "cat");
-      ASSERT_RELEASE(value == "");
+      YGM_ASSERT_RELEASE(key == "cat");
+      YGM_ASSERT_RELEASE(value == "");
     });
     smap.async_visit_if_contains(
-        "red", [](const auto &k, const auto &v) { ASSERT_RELEASE(false); });
+        "red", [](const auto &k, const auto &v) { YGM_ASSERT_RELEASE(false); });
 
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("cat") == 1);
-    ASSERT_RELEASE(smap.count("red") == 0);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("cat") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 0);
 
-    ASSERT_RELEASE(smap.size() == 2);
+    YGM_ASSERT_RELEASE(smap.size() == 2);
 
     if (world.rank() == 0) {
       smap.async_erase("dog");
     }
-    ASSERT_RELEASE(smap.count("dog") == 0);
-    ASSERT_RELEASE(smap.size() == 1);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 0);
+    YGM_ASSERT_RELEASE(smap.size() == 1);
     smap.async_erase("cat");
-    ASSERT_RELEASE(smap.count("cat") == 0);
+    YGM_ASSERT_RELEASE(smap.count("cat") == 0);
 
-    ASSERT_RELEASE(smap.size() == 0);
+    YGM_ASSERT_RELEASE(smap.size() == 0);
+  }
+
+  //
+  // Test default value
+  {
+    ygm::container::map<std::string, std::string> smap(world, "NOT FOUND");
+
+    smap.async_insert("dog", "cat");
+
+    world.barrier();
+
+    if (world.rank0()) {
+      smap.async_visit("dog", [](const auto &k, const auto &v) {
+        YGM_ASSERT_RELEASE(v == "cat");
+      });
+      smap.async_visit("not inserted", [](const auto &k, const auto &v) {
+        YGM_ASSERT_RELEASE(v == "NOT FOUND");
+      });
+    }
+
+    YGM_ASSERT_RELEASE(smap.size() == 2);
   }
 
   // //
@@ -138,7 +186,8 @@ int main(int argc, char **argv) {
 
   //   world.barrier();
 
-  //   ASSERT_RELEASE(world.all_reduce_sum(dog_visit_counter) == world.size());
+  //   YGM_ASSERT_RELEASE(world.all_reduce_sum(dog_visit_counter) ==
+  //   world.size());
 
   //   static int apple_visit_counter{0};
 
@@ -150,14 +199,14 @@ int main(int argc, char **argv) {
 
   //   world.barrier();
 
-  //   ASSERT_RELEASE(world.all_reduce_sum(apple_visit_counter) ==
+  //   YGM_ASSERT_RELEASE(world.all_reduce_sum(apple_visit_counter) ==
   //                  world.size() - 1);
 
   //   if (world.rank0()) {
   //     smap.async_insert_else_visit(
   //         "red", "green",
   //         [](const auto &key, const auto &value, const auto &new_value) {
-  //           ASSERT_RELEASE(true == false);
+  //           YGM_ASSERT_RELEASE(true == false);
   //         });
   //   }
   // }
@@ -180,17 +229,16 @@ int main(int argc, char **argv) {
 
     world.barrier();
 
-    smap.for_all([&world, &num_reductions](const auto &key, const auto
-    &value) {
+    smap.for_all([&world, &num_reductions](const auto &key, const auto &value) {
       if (key == "sum") {
-        ASSERT_RELEASE(value == world.size() * num_reductions *
-                                    (num_reductions - 1) / 2);
+        YGM_ASSERT_RELEASE(value == world.size() * num_reductions *
+                                        (num_reductions - 1) / 2);
       } else if (key == "min") {
-        ASSERT_RELEASE(value == 0);
+        YGM_ASSERT_RELEASE(value == 0);
       } else if (key == "max") {
-        ASSERT_RELEASE(value == num_reductions - 1);
+        YGM_ASSERT_RELEASE(value == num_reductions - 1);
       } else {
-        ASSERT_RELEASE(false);
+        YGM_ASSERT_RELEASE(false);
       }
     });
   }
@@ -205,15 +253,190 @@ int main(int argc, char **argv) {
       smap2.async_insert("apple", "orange");
       smap2.async_insert("red", "green");
       smap2.swap(smap);
-      ASSERT_RELEASE(smap2.size() == 0);
+      YGM_ASSERT_RELEASE(smap2.size() == 0);
     }
-    ASSERT_RELEASE(smap.size() == 3);
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("apple") == 1);
-    ASSERT_RELEASE(smap.count("red") == 1);
+    YGM_ASSERT_RELEASE(smap.size() == 3);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("apple") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 1);
     smap.async_insert_or_assign("car", "truck");
-    ASSERT_RELEASE(smap.size() == 4);
-    ASSERT_RELEASE(smap.count("car") == 1);
+    YGM_ASSERT_RELEASE(smap.size() == 4);
+    YGM_ASSERT_RELEASE(smap.count("car") == 1);
+  }
+
+  // Test batch erase from set
+  {
+    int                           num_items   = 100;
+    int                           remove_size = 20;
+    ygm::container::map<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        imap.async_insert(i, i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items);
+
+    ygm::container::set<int> to_remove(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.async_insert(i);
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(key >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items - remove_size);
+  }
+
+  // Test batch erase from map
+  {
+    int                           num_items   = 100;
+    int                           remove_size = 20;
+    ygm::container::map<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        imap.async_insert(i, i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items);
+
+    ygm::container::map<int, int> to_remove(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.async_insert(i, i + (i % 2));
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(((key % 2) == 1) || (key >= remove_size));
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items - remove_size / 2);
+  }
+
+  // Test batch erase from vector
+  {
+    int                           num_items   = 100;
+    int                           remove_size = 20;
+    ygm::container::map<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        imap.async_insert(i, i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items);
+
+    std::vector<int> to_remove;
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.push_back(i);
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(key >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items - remove_size);
+  }
+
+  // Test batch erase from vector of keys and values
+  {
+    int                           num_items   = 100;
+    int                           remove_size = 20;
+    ygm::container::map<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        imap.async_insert(i, i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items);
+
+    std::vector<std::pair<int, int>> to_remove;
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.push_back(std::make_pair(i, i + (i % 2)));
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(((key % 2) == 1) || (key >= remove_size));
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items - remove_size / 2);
+  }
+
+  // Test batch erase from bag of keys and values
+  {
+    int                           num_items   = 100;
+    int                           remove_size = 20;
+    ygm::container::map<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        imap.async_insert(i, i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items);
+
+    ygm::container::bag<std::pair<int, int>> to_remove(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.async_insert(std::make_pair(i, i + (i % 2)));
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(((key % 2) == 1) || (key >= remove_size));
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_items - remove_size / 2);
   }
 
   //
@@ -236,13 +459,13 @@ int main(int argc, char **argv) {
       gather_list.clear();
     }
 
-    auto gmap = smap.key_gather(gather_list);
+    auto gmap = smap.gather_keys(gather_list);
 
     if (world.rank0()) {
-      ASSERT_RELEASE(gmap["foo"][0] == "bar");
-      ASSERT_RELEASE(gmap["foo"][1] == "baz");
+      YGM_ASSERT_RELEASE(gmap["foo"][0] == "bar");
+      YGM_ASSERT_RELEASE(gmap["foo"][1] == "baz");
     } else {
-      ASSERT_RELEASE(gmap["foo"].empty());
+      YGM_ASSERT_RELEASE(gmap["foo"].empty());
     }
   }
 
@@ -260,9 +483,9 @@ int main(int argc, char **argv) {
       smap2.async_insert(key, value);
     });
 
-    ASSERT_RELEASE(smap2.count("dog") == 1);
-    ASSERT_RELEASE(smap2.count("apple") == 1);
-    ASSERT_RELEASE(smap2.count("red") == 1);
+    YGM_ASSERT_RELEASE(smap2.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap2.count("apple") == 1);
+    YGM_ASSERT_RELEASE(smap2.count("red") == 1);
   }
 
   return 0;
diff --git a/test/test_multi_output.cpp b/test/test_multi_output.cpp
index 4490d1a9..74f03471 100644
--- a/test/test_multi_output.cpp
+++ b/test/test_multi_output.cpp
@@ -30,7 +30,7 @@ int main(int argc, char **argv) {
     }
 
     std::string expected_path(prefix_path + subpath);
-    ASSERT_RELEASE(fs::exists(fs::path(expected_path)));
+    YGM_ASSERT_RELEASE(fs::exists(fs::path(expected_path)));
 
     world.barrier();
 
@@ -79,7 +79,7 @@ int main(int argc, char **argv) {
       fs::remove_all(fs::path(base_dir));
     }
 
-    ASSERT_RELEASE(xor_write == xor_read);
+    YGM_ASSERT_RELEASE(xor_write == xor_read);
   }
 
   // Test appending
@@ -134,7 +134,7 @@ int main(int argc, char **argv) {
       fs::remove_all(fs::path(base_dir));
     }
 
-    ASSERT_RELEASE(xor_write == xor_read);
+    YGM_ASSERT_RELEASE(xor_write == xor_read);
   }
 
   return 0;
diff --git a/test/test_multimap.cpp b/test/test_multimap.cpp
index 054a8303..4a3ebccb 100644
--- a/test/test_multimap.cpp
+++ b/test/test_multimap.cpp
@@ -7,6 +7,7 @@
 #include <string>
 #include <ygm/comm.hpp>
 #include <ygm/container/map.hpp>
+#include <ygm/container/set.hpp>
 
 int main(int argc, char **argv) {
   ygm::comm world(&argc, &argv);
@@ -20,9 +21,9 @@ int main(int argc, char **argv) {
       smap.async_insert("apple", "orange");
       smap.async_insert("red", "green");
     }
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("apple") == 1);
-    ASSERT_RELEASE(smap.count("red") == 1);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("apple") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 1);
   }
 
   //
@@ -34,9 +35,9 @@ int main(int argc, char **argv) {
     smap.async_insert("apple", "orange");
     smap.async_insert("red", "green");
 
-    ASSERT_RELEASE(smap.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(smap.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(smap.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("red") == (size_t)world.size());
   }
 
   //
@@ -45,33 +46,33 @@ int main(int argc, char **argv) {
     ygm::container::multimap<std::string, std::string> smap(world);
     smap.async_visit("dog",
                      [](const std::string &key, const std::string &value) {
-                       ASSERT_RELEASE(key == "dog");
-                       ASSERT_RELEASE(value == "");
+                       YGM_ASSERT_RELEASE(key == "dog");
+                       YGM_ASSERT_RELEASE(value == "");
                      });
     smap.async_visit("cat",
                      [](const std::string &key, const std::string &value) {
-                       ASSERT_RELEASE(key == "cat");
-                       ASSERT_RELEASE(value == "");
+                       YGM_ASSERT_RELEASE(key == "cat");
+                       YGM_ASSERT_RELEASE(value == "");
                      });
     smap.async_visit_if_contains("red", [](const auto &key, const auto &value) {
-      ASSERT_RELEASE(false);
+      YGM_ASSERT_RELEASE(false);
     });
 
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("cat") == 1);
-    ASSERT_RELEASE(smap.count("red") == 0);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("cat") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 0);
 
-    ASSERT_RELEASE(smap.size() == 2);
+    YGM_ASSERT_RELEASE(smap.size() == 2);
 
     if (world.rank() == 0) {
       smap.async_erase("dog");
     }
-    ASSERT_RELEASE(smap.count("dog") == 0);
-    ASSERT_RELEASE(smap.size() == 1);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 0);
+    YGM_ASSERT_RELEASE(smap.size() == 1);
     smap.async_erase("cat");
-    ASSERT_RELEASE(smap.count("cat") == 0);
+    YGM_ASSERT_RELEASE(smap.count("cat") == 0);
 
-    ASSERT_RELEASE(smap.size() == 0);
+    YGM_ASSERT_RELEASE(smap.size() == 0);
   }
 
   //
@@ -79,31 +80,31 @@ int main(int argc, char **argv) {
   {
     ygm::container::multimap<std::string, std::string> smap(world);
     smap.async_visit("dog", [](const std::string &key, std::string &value) {
-      ASSERT_RELEASE(key == "dog");
-      ASSERT_RELEASE(value == "");
+      YGM_ASSERT_RELEASE(key == "dog");
+      YGM_ASSERT_RELEASE(value == "");
     });
     smap.async_visit("cat", [](const std::string &key, std::string &value) {
-      ASSERT_RELEASE(key == "cat");
-      ASSERT_RELEASE(value == "");
+      YGM_ASSERT_RELEASE(key == "cat");
+      YGM_ASSERT_RELEASE(value == "");
     });
     smap.async_visit_if_contains(
-        "red", [](const auto &k, const auto &v) { ASSERT_RELEASE(false); });
+        "red", [](const auto &k, const auto &v) { YGM_ASSERT_RELEASE(false); });
 
-    ASSERT_RELEASE(smap.count("dog") == 1);
-    ASSERT_RELEASE(smap.count("cat") == 1);
-    ASSERT_RELEASE(smap.count("red") == 0);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 1);
+    YGM_ASSERT_RELEASE(smap.count("cat") == 1);
+    YGM_ASSERT_RELEASE(smap.count("red") == 0);
 
-    ASSERT_RELEASE(smap.size() == 2);
+    YGM_ASSERT_RELEASE(smap.size() == 2);
 
     if (world.rank() == 0) {
       smap.async_erase("dog");
     }
-    ASSERT_RELEASE(smap.count("dog") == 0);
-    ASSERT_RELEASE(smap.size() == 1);
+    YGM_ASSERT_RELEASE(smap.count("dog") == 0);
+    YGM_ASSERT_RELEASE(smap.size() == 1);
     smap.async_erase("cat");
-    ASSERT_RELEASE(smap.count("cat") == 0);
+    YGM_ASSERT_RELEASE(smap.count("cat") == 0);
 
-    ASSERT_RELEASE(smap.size() == 0);
+    YGM_ASSERT_RELEASE(smap.size() == 0);
   }
 
   // //
@@ -120,12 +121,13 @@ int main(int argc, char **argv) {
 
   //   smap.async_visit_group(
   //       "dog", [](auto pmap, const auto begin, const auto end) {
-  //         ASSERT_RELEASE(std::distance(begin, end) == 2 *
+  //         YGM_ASSERT_RELEASE(std::distance(begin, end) == 2 *
   //         pmap->comm().size());
   //       });
   //   smap.async_visit_group(
   //       "cat", [](auto pmap, const auto begin, const auto end) {
-  //         ASSERT_RELEASE(std::distance(begin, end) == pmap->comm().size());
+  //         YGM_ASSERT_RELEASE(std::distance(begin, end) ==
+  //         pmap->comm().size());
   //       });
   // }
 
@@ -139,15 +141,15 @@ int main(int argc, char **argv) {
       smap2.async_insert("apple", "orange");
       smap2.async_insert("red", "green");
       smap2.swap(smap);
-      ASSERT_RELEASE(smap2.size() == 0);
+      YGM_ASSERT_RELEASE(smap2.size() == 0);
     }
-    ASSERT_RELEASE(smap.size() == 3 * (size_t)world.size());
-    ASSERT_RELEASE(smap.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(smap.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(smap.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.size() == 3 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("red") == (size_t)world.size());
     smap.async_insert("car", "truck");
-    ASSERT_RELEASE(smap.size() == 4 * (size_t)world.size());
-    ASSERT_RELEASE(smap.count("car") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.size() == 4 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap.count("car") == (size_t)world.size());
   }
 
   //
@@ -161,9 +163,9 @@ int main(int argc, char **argv) {
     world.barrier();
     auto values = smap.local_get("foo");
     if (smap.partitioner.owner("foo") == world.rank()) {
-      ASSERT_RELEASE(values.size() == 4 * (size_t)world.size());
+      YGM_ASSERT_RELEASE(values.size() == 4 * (size_t)world.size());
     } else {
-      ASSERT_RELEASE(values.size() == 0);
+      YGM_ASSERT_RELEASE(values.size() == 0);
     }
   }
 
@@ -181,9 +183,9 @@ int main(int argc, char **argv) {
       smap2.async_insert(key, value);
     });
 
-    ASSERT_RELEASE(smap2.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(smap2.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(smap2.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap2.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap2.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap2.count("red") == (size_t)world.size());
   }
 
   //
@@ -200,9 +202,173 @@ int main(int argc, char **argv) {
       smap2.async_insert(std::make_pair(k, v));
     });
 
-    ASSERT_RELEASE(smap2.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(smap2.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(smap2.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap2.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap2.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(smap2.count("red") == (size_t)world.size());
+  }
+
+  // Test batch erase from set
+  {
+    int                                num_items            = 100;
+    int                                remove_size          = 20;
+    int                                num_insertion_rounds = 5;
+    ygm::container::multimap<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_insertion_rounds; ++round) {
+        for (int i = 0; i < num_items; ++i) {
+          imap.async_insert(i, round);
+        }
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_insertion_rounds * num_items);
+
+    ygm::container::set<int> to_remove(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.async_insert(i);
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(key >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() ==
+                       num_insertion_rounds * (num_items - remove_size));
+  }
+
+  // Test batch erase from vector
+  {
+    int                                num_items            = 100;
+    int                                remove_size          = 20;
+    int                                num_insertion_rounds = 5;
+    ygm::container::multimap<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_insertion_rounds; ++round) {
+        for (int i = 0; i < num_items; ++i) {
+          imap.async_insert(i, round);
+        }
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_insertion_rounds * num_items);
+
+    std::vector<int> to_remove;
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.push_back(i);
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, &world](const auto &key, const auto &value) {
+      YGM_ASSERT_RELEASE(key >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() ==
+                       num_insertion_rounds * (num_items - remove_size));
+  }
+
+  // Test batch erase from multimap
+  {
+    int                                num_items            = 100;
+    int                                remove_size          = 20;
+    int                                num_insertion_rounds = 5;
+    int                                num_removal_rounds   = 2;
+    ygm::container::multimap<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_insertion_rounds; ++round) {
+        for (int i = 0; i < num_items; ++i) {
+          imap.async_insert(i, round);
+        }
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_insertion_rounds * num_items);
+
+    ygm::container::multimap<int, int> to_remove(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_removal_rounds; ++round) {
+        for (int i = 0; i < remove_size; ++i) {
+          to_remove.async_insert(i, round);
+        }
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, num_removal_rounds, &world](const auto &key,
+                                                           const auto &value) {
+      YGM_ASSERT_RELEASE((key >= remove_size) || (value >= num_removal_rounds));
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_insertion_rounds * num_items -
+                                          num_removal_rounds * remove_size);
+  }
+
+  // Test batch erase from vector of keys and values
+  {
+    int                                num_items            = 100;
+    int                                remove_size          = 20;
+    int                                num_insertion_rounds = 5;
+    int                                num_removal_rounds   = 2;
+    ygm::container::multimap<int, int> imap(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_insertion_rounds; ++round) {
+        for (int i = 0; i < num_items; ++i) {
+          imap.async_insert(i, round);
+        }
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(imap.size() == num_insertion_rounds * num_items);
+
+    std::vector<std::pair<int, int>> to_remove;
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_removal_rounds; ++round) {
+        for (int i = 0; i < remove_size; ++i) {
+          to_remove.push_back(std::make_pair(i, round));
+        }
+      }
+    }
+
+    world.barrier();
+
+    imap.erase(to_remove);
+
+    imap.for_all([remove_size, num_removal_rounds, &world](const auto &key,
+                                                           const auto &value) {
+      YGM_ASSERT_RELEASE((key >= remove_size) || (value >= num_removal_rounds));
+    });
+
+    YGM_ASSERT_RELEASE(imap.size() == num_insertion_rounds * num_items -
+                                          num_removal_rounds * remove_size);
   }
 
   return 0;
diff --git a/test/test_multiset.cpp b/test/test_multiset.cpp
index 31f2c86d..0f89e6b0 100644
--- a/test/test_multiset.cpp
+++ b/test/test_multiset.cpp
@@ -22,20 +22,20 @@ int main(int argc, char** argv) {
       sset.async_insert("apple");
       sset.async_insert("red");
     }
-    ASSERT_RELEASE(sset.count("dog") == 2);
-    ASSERT_RELEASE(sset.count("apple") == 1);
-    ASSERT_RELEASE(sset.count("red") == 1);
-    ASSERT_RELEASE(sset.size() == 4);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 2);
+    YGM_ASSERT_RELEASE(sset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(sset.count("red") == 1);
+    YGM_ASSERT_RELEASE(sset.size() == 4);
     if (world.rank() == 0) {
       sset.async_erase("dog");
     }
-    ASSERT_RELEASE(sset.size() == 2);
+    YGM_ASSERT_RELEASE(sset.size() == 2);
     if (world.rank() == 0) {
       sset.async_erase("apple");
     }
-    ASSERT_RELEASE(sset.size() == 1);
-    ASSERT_RELEASE(sset.count("dog") == 0);
-    ASSERT_RELEASE(sset.count("apple") == 0);
+    YGM_ASSERT_RELEASE(sset.size() == 1);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 0);
+    YGM_ASSERT_RELEASE(sset.count("apple") == 0);
   }
 
   //
@@ -47,31 +47,29 @@ int main(int argc, char** argv) {
     sset.async_insert("apple");
     sset.async_insert("red");
 
-    ASSERT_RELEASE(sset.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(sset.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(sset.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("red") == (size_t)world.size());
 
     sset.async_insert("dog");
-    ASSERT_RELEASE(sset.count("dog") == (size_t)world.size() * 2);
+    YGM_ASSERT_RELEASE(sset.count("dog") == (size_t)world.size() * 2);
   }
 
   //
   // Test async_contains
   {
-    static bool              set_contains = false;
+    static bool                   set_contains = false;
     ygm::container::multiset<int> iset(world);
     world.barrier();
     int val = 42;
 
-    auto f = [](bool contains, const int& i) {
-      set_contains = contains;
-    };   
+    auto f = [](bool contains, const int& i) { set_contains = contains; };
 
     if (world.rank0()) {
       iset.async_contains(val, f);
     }
     world.barrier();
-    ASSERT_RELEASE(not ygm::logical_or(set_contains, world));
+    YGM_ASSERT_RELEASE(not ygm::logical_or(set_contains, world));
 
     if (world.rank0()) {
       iset.async_insert(val);
@@ -81,31 +79,107 @@ int main(int argc, char** argv) {
       iset.async_contains(val, f);
     }
     world.barrier();
-    ASSERT_RELEASE(ygm::logical_or(set_contains, world));
+    YGM_ASSERT_RELEASE(ygm::logical_or(set_contains, world));
   }
 
   //
   // Test async_insert_contains
   {
-    static bool              already_contains = false;
+    static bool                           already_contains = false;
     ygm::container::multiset<std::string> sset(world);
     world.barrier();
 
     auto f = [](bool& contains, const std::string& s) {
       already_contains = contains;
-    };   
+    };
 
     if (world.rank0()) {
       sset.async_insert_contains("dog", f);
     }
     world.barrier();
-    ASSERT_RELEASE(not ygm::logical_or(already_contains, world));
+    YGM_ASSERT_RELEASE(not ygm::logical_or(already_contains, world));
 
     if (world.rank0()) {
       sset.async_insert_contains("dog", f);
     }
     world.barrier();
-    ASSERT_RELEASE(ygm::logical_or(already_contains, world));
+    YGM_ASSERT_RELEASE(ygm::logical_or(already_contains, world));
+  }
+
+  // Test batch erase
+  {
+    int                           num_items            = 100;
+    int                           num_insertion_rounds = 5;
+    int                           remove_size          = 20;
+    ygm::container::multiset<int> iset(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_insertion_rounds; ++round) {
+        for (int i = 0; i < num_items; ++i) {
+          iset.async_insert(i);
+        }
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(iset.size() == num_insertion_rounds * num_items);
+
+    ygm::container::set<int> to_remove(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.async_insert(i);
+      }
+    }
+
+    world.barrier();
+
+    iset.erase(to_remove);
+
+    iset.for_all([remove_size, &world](const auto& item) {
+      YGM_ASSERT_RELEASE(item >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(iset.size() ==
+                       num_insertion_rounds * (num_items - remove_size));
+  }
+
+  // Test batch erase from vector
+  {
+    int                           num_items            = 100;
+    int                           num_insertion_rounds = 5;
+    int                           remove_size          = 20;
+    ygm::container::multiset<int> iset(world);
+
+    if (world.rank0()) {
+      for (int round = 0; round < num_insertion_rounds; ++round) {
+        for (int i = 0; i < num_items; ++i) {
+          iset.async_insert(i);
+        }
+      }
+    }
+
+    YGM_ASSERT_RELEASE(iset.size() == num_items * num_insertion_rounds);
+
+    std::vector<int> to_remove;
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.push_back(i);
+      }
+    }
+
+    world.barrier();
+
+    iset.erase(to_remove);
+
+    iset.for_all([remove_size, &world](const auto& item) {
+      YGM_ASSERT_RELEASE(item >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(iset.size() ==
+                       num_insertion_rounds * (num_items - remove_size));
   }
 
   //
@@ -118,15 +192,15 @@ int main(int argc, char** argv) {
       sset2.async_insert("apple");
       sset2.async_insert("red");
       sset2.swap(sset);
-      ASSERT_RELEASE(sset2.size() == 0);
+      YGM_ASSERT_RELEASE(sset2.size() == 0);
     }
-    ASSERT_RELEASE(sset.size() == 3 * (size_t)world.size());
-    ASSERT_RELEASE(sset.count("dog") == (size_t)world.size());
-    ASSERT_RELEASE(sset.count("apple") == (size_t)world.size());
-    ASSERT_RELEASE(sset.count("red") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.size() == 3 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("dog") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("apple") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("red") == (size_t)world.size());
     sset.async_insert("car");
-    ASSERT_RELEASE(sset.size() == 4 * (size_t)world.size());
-    ASSERT_RELEASE(sset.count("car") == (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.size() == 4 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(sset.count("car") == (size_t)world.size());
   }
 
   //
@@ -139,17 +213,17 @@ int main(int argc, char** argv) {
     sset1.async_insert("apple");
     sset1.async_insert("red");
 
-    sset1.for_all([&sset2](const auto &key) { sset2.async_insert(key); });
+    sset1.for_all([&sset2](const auto& key) { sset2.async_insert(key); });
 
-    ASSERT_RELEASE(sset2.count("dog") == world.size());
-    ASSERT_RELEASE(sset2.count("apple") == world.size());
-    ASSERT_RELEASE(sset2.count("red") == world.size());
+    YGM_ASSERT_RELEASE(sset2.count("dog") == world.size());
+    YGM_ASSERT_RELEASE(sset2.count("apple") == world.size());
+    YGM_ASSERT_RELEASE(sset2.count("red") == world.size());
   }
 
   //
   // Test vector of sets
   {
-    int                                   num_sets = 4;
+    int                                        num_sets = 4;
     std::vector<ygm::container::multiset<int>> vec_sets;
 
     for (int i = 0; i < num_sets; ++i) {
@@ -164,9 +238,9 @@ int main(int argc, char** argv) {
 
     world.barrier();
     for (int set_index = 0; set_index < num_sets; ++set_index) {
-      ASSERT_RELEASE(vec_sets[set_index].size() == world.size() * 2);
+      YGM_ASSERT_RELEASE(vec_sets[set_index].size() == world.size() * 2);
     }
   }
 
   return 0;
-}
\ No newline at end of file
+}
diff --git a/test/test_ndjson_parser.cpp b/test/test_ndjson_parser.cpp
index 1f1c9923..d017fedd 100644
--- a/test/test_ndjson_parser.cpp
+++ b/test/test_ndjson_parser.cpp
@@ -18,7 +18,7 @@ int main(int argc, char** argv) {
   jsonp.for_all([&world, &local_count](const auto& json) { ++local_count; });
 
   world.barrier();
-  ASSERT_RELEASE(world.all_reduce_sum(local_count) == 3);
+  YGM_ASSERT_RELEASE(world.all_reduce_sum(local_count) == 3);
 
   return 0;
 }
diff --git a/test/test_parquet_reader.cpp b/test/test_parquet_reader.cpp
index aac2aa2e..f55c3548 100644
--- a/test/test_parquet_reader.cpp
+++ b/test/test_parquet_reader.cpp
@@ -33,7 +33,7 @@ int main(int argc, char** argv) {
 
     world.barrier();
     auto row_count = world.all_reduce_sum(local_count);
-    ASSERT_RELEASE(row_count == 12);
+    YGM_ASSERT_RELEASE(row_count == 12);
   }
 
   //
@@ -74,9 +74,9 @@ int main(int argc, char** argv) {
 
     world.barrier();
     auto row_count = world.all_reduce_sum(rows.size());
-    ASSERT_RELEASE(row_count == 12);
+    YGM_ASSERT_RELEASE(row_count == 12);
 
-    ASSERT_RELEASE(world.all_reduce_sum(strings.count("Hennessey Venom F5")) ==
+    YGM_ASSERT_RELEASE(world.all_reduce_sum(strings.count("Hennessey Venom F5")) ==
                    1);
   }
 
@@ -126,9 +126,9 @@ int main(int argc, char** argv) {
 
     world.barrier();
     const auto sum = world.all_reduce_sum(local_sum);
-    ASSERT_RELEASE(sum == 11111111111);
+    YGM_ASSERT_RELEASE(sum == 11111111111);
     const auto row_count = world.all_reduce_sum(local_count);
-    ASSERT_RELEASE(row_count == 11);
+    YGM_ASSERT_RELEASE(row_count == 11);
   }
 
   return 0;
diff --git a/test/test_parquet_reader_json.cpp b/test/test_parquet_reader_json.cpp
index 0ce0b5d3..0e8e4b14 100644
--- a/test/test_parquet_reader_json.cpp
+++ b/test/test_parquet_reader_json.cpp
@@ -32,49 +32,49 @@ int main(int argc, char** argv) {
     world.async(
         0,
         [](auto, const auto& obj) {
-          ASSERT_RELEASE(obj.contains("id"));
-          ASSERT_RELEASE(obj.contains("bool"));
-          ASSERT_RELEASE(obj.contains("int32"));
-          ASSERT_RELEASE(obj.contains("int64"));
-          ASSERT_RELEASE(obj.contains("float"));
-          ASSERT_RELEASE(obj.contains("double"));
-          ASSERT_RELEASE(obj.contains("byte_array"));
+          YGM_ASSERT_RELEASE(obj.contains("id"));
+          YGM_ASSERT_RELEASE(obj.contains("bool"));
+          YGM_ASSERT_RELEASE(obj.contains("int32"));
+          YGM_ASSERT_RELEASE(obj.contains("int64"));
+          YGM_ASSERT_RELEASE(obj.contains("float"));
+          YGM_ASSERT_RELEASE(obj.contains("double"));
+          YGM_ASSERT_RELEASE(obj.contains("byte_array"));
 
-          ASSERT_RELEASE(obj.at("id").is_int64());
-          ASSERT_RELEASE(obj.at("bool").is_bool());
-          ASSERT_RELEASE(obj.at("int32").is_int64());
-          ASSERT_RELEASE(obj.at("int64").is_int64());
-          ASSERT_RELEASE(obj.at("float").is_double());
-          ASSERT_RELEASE(obj.at("double").is_double());
-          ASSERT_RELEASE(obj.at("byte_array").is_string());
+          YGM_ASSERT_RELEASE(obj.at("id").is_int64());
+          YGM_ASSERT_RELEASE(obj.at("bool").is_bool());
+          YGM_ASSERT_RELEASE(obj.at("int32").is_int64());
+          YGM_ASSERT_RELEASE(obj.at("int64").is_int64());
+          YGM_ASSERT_RELEASE(obj.at("float").is_double());
+          YGM_ASSERT_RELEASE(obj.at("double").is_double());
+          YGM_ASSERT_RELEASE(obj.at("byte_array").is_string());
 
           const auto id = obj.at("id").as_int64();
           if (id == 0) {
-            ASSERT_RELEASE(obj.at("bool").as_bool() == true);
-            ASSERT_RELEASE(obj.at("int32").as_int64() == -1);
-            ASSERT_RELEASE(obj.at("int64").as_int64() == -(1ULL << 32) - 1);
-            ASSERT_RELEASE(obj.at("float").as_double() == 1.5);
-            ASSERT_RELEASE(obj.at("double").as_double() == 10.5);
-            ASSERT_RELEASE(obj.at("byte_array").as_string() == "aa");
+            YGM_ASSERT_RELEASE(obj.at("bool").as_bool() == true);
+            YGM_ASSERT_RELEASE(obj.at("int32").as_int64() == -1);
+            YGM_ASSERT_RELEASE(obj.at("int64").as_int64() == -(1ULL << 32) - 1);
+            YGM_ASSERT_RELEASE(obj.at("float").as_double() == 1.5);
+            YGM_ASSERT_RELEASE(obj.at("double").as_double() == 10.5);
+            YGM_ASSERT_RELEASE(obj.at("byte_array").as_string() == "aa");
             ++cnt1;
           } else if (id == 1) {
-            ASSERT_RELEASE(obj.at("bool").as_bool() == false);
-            ASSERT_RELEASE(obj.at("int32").as_int64() == -2);
-            ASSERT_RELEASE(obj.at("int64").as_int64() == -(1ULL << 32) - 2);
-            ASSERT_RELEASE(obj.at("float").as_double() == 2.5);
-            ASSERT_RELEASE(obj.at("double").as_double() == 20.5);
-            ASSERT_RELEASE(obj.at("byte_array").as_string() == "bb");
+            YGM_ASSERT_RELEASE(obj.at("bool").as_bool() == false);
+            YGM_ASSERT_RELEASE(obj.at("int32").as_int64() == -2);
+            YGM_ASSERT_RELEASE(obj.at("int64").as_int64() == -(1ULL << 32) - 2);
+            YGM_ASSERT_RELEASE(obj.at("float").as_double() == 2.5);
+            YGM_ASSERT_RELEASE(obj.at("double").as_double() == 20.5);
+            YGM_ASSERT_RELEASE(obj.at("byte_array").as_string() == "bb");
             ++cnt2;
           } else if (id == 2) {
-            ASSERT_RELEASE(obj.at("bool").as_bool() == true);
-            ASSERT_RELEASE(obj.at("int32").as_int64() == -3);
-            ASSERT_RELEASE(obj.at("int64").as_int64() == -(1ULL << 32) - 3);
-            ASSERT_RELEASE(obj.at("float").as_double() == 3.5);
-            ASSERT_RELEASE(obj.at("double").as_double() == 30.5);
-            ASSERT_RELEASE(obj.at("byte_array").as_string() == "cc");
+            YGM_ASSERT_RELEASE(obj.at("bool").as_bool() == true);
+            YGM_ASSERT_RELEASE(obj.at("int32").as_int64() == -3);
+            YGM_ASSERT_RELEASE(obj.at("int64").as_int64() == -(1ULL << 32) - 3);
+            YGM_ASSERT_RELEASE(obj.at("float").as_double() == 3.5);
+            YGM_ASSERT_RELEASE(obj.at("double").as_double() == 30.5);
+            YGM_ASSERT_RELEASE(obj.at("byte_array").as_string() == "cc");
             ++cnt3;
           } else {
-            ASSERT_RELEASE(false);
+            YGM_ASSERT_RELEASE(false);
           }
         },
         obj);
@@ -82,13 +82,13 @@ int main(int argc, char** argv) {
   world.barrier();
 
   if (world.rank0()) {
-    ASSERT_RELEASE(cnt1 == 1);
-    ASSERT_RELEASE(cnt2 == 1);
-    ASSERT_RELEASE(cnt3 == 1);
+    YGM_ASSERT_RELEASE(cnt1 == 1);
+    YGM_ASSERT_RELEASE(cnt2 == 1);
+    YGM_ASSERT_RELEASE(cnt3 == 1);
   } else {
-    ASSERT_RELEASE(cnt1 == 0);
-    ASSERT_RELEASE(cnt2 == 0);
-    ASSERT_RELEASE(cnt3 == 0);
+    YGM_ASSERT_RELEASE(cnt1 == 0);
+    YGM_ASSERT_RELEASE(cnt2 == 0);
+    YGM_ASSERT_RELEASE(cnt3 == 0);
   }
 
   return 0;
diff --git a/test/test_random.cpp b/test/test_random.cpp
index dc8703f4..9f28e714 100644
--- a/test/test_random.cpp
+++ b/test/test_random.cpp
@@ -32,17 +32,17 @@ int main(int argc, char** argv) {
 
     int local_counter(0);
     seed_set.for_all([&local_counter](int key, int val) {
-      ASSERT_RELEASE(val == 1);
+      YGM_ASSERT_RELEASE(val == 1);
       ++local_counter;
     });
 
     // this can fail if two samples collide, but that is very unlikely.
     // is it worth the trouble of making the test more robust?
-    rn_set.for_all([](int key, int val) { ASSERT_RELEASE(val == 1); });
-    sample_set.for_all([](int key, int val) { ASSERT_RELEASE(val == 1); });
+    rn_set.for_all([](int key, int val) { YGM_ASSERT_RELEASE(val == 1); });
+    sample_set.for_all([](int key, int val) { YGM_ASSERT_RELEASE(val == 1); });
 
     int global_counter = world.all_reduce_sum(local_counter);
 
-    ASSERT_RELEASE(global_counter == world.size());
+    YGM_ASSERT_RELEASE(global_counter == world.size());
   }
 }
\ No newline at end of file
diff --git a/test/test_recursion_large_messages.cpp b/test/test_recursion_large_messages.cpp
index 03546cd8..2696cf82 100644
--- a/test/test_recursion_large_messages.cpp
+++ b/test/test_recursion_large_messages.cpp
@@ -49,7 +49,7 @@ int main(int argc, char **argv) {
     }
 
     world.barrier();
-    ASSERT_RELEASE(ygm::sum(counter, world) == ((size_t(1) << max_hops) - 1));
+    YGM_ASSERT_RELEASE(ygm::sum(counter, world) == ((size_t(1) << max_hops) - 1));
   }
 
   return 0;
diff --git a/test/test_recursion_progress.cpp b/test/test_recursion_progress.cpp
index 59721447..f42a7ad0 100644
--- a/test/test_recursion_progress.cpp
+++ b/test/test_recursion_progress.cpp
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
     }
 
     world.barrier();
-    ASSERT_RELEASE(ygm::sum(counter, world) == (world.size() * trips + 1));
+    YGM_ASSERT_RELEASE(ygm::sum(counter, world) == (world.size() * trips + 1));
   }
 
   return 0;
diff --git a/test/test_reduce.cpp b/test/test_reduce.cpp
new file mode 100644
index 00000000..c43861cb
--- /dev/null
+++ b/test/test_reduce.cpp
@@ -0,0 +1,30 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#undef NDEBUG
+
+#include <set>
+#include <string>
+#include <vector>
+#include <ygm/comm.hpp>
+#include <ygm/container/bag.hpp>
+#include <ygm/container/counting_set.hpp>
+#include <ygm/random.hpp>
+
+int main(int argc, char** argv) {
+  ygm::comm world(&argc, &argv);
+
+  {
+    ygm::container::bag<int> ibag(world, {42, 1, 8, 16, 32, 3, 4, 5, 6, 7});
+
+    int sum = ibag.reduce(std::plus<int>());
+    YGM_ASSERT_RELEASE(sum = 124);
+
+
+    int even_sum = ibag.filter([](int i){return i%2==0;}).reduce(std::plus<int>());
+    YGM_ASSERT_RELEASE(even_sum = 108);
+  }
+
+}
\ No newline at end of file
diff --git a/test/test_reduce_by_key.cpp b/test/test_reduce_by_key.cpp
index 1cfa0f22..b274dd54 100644
--- a/test/test_reduce_by_key.cpp
+++ b/test/test_reduce_by_key.cpp
@@ -20,9 +20,9 @@ int main(int argc, char** argv) {
     auto test = ygm::container::reduce_by_key_map<int, int>(
         mybag, [](int a, int b) { return a + b; }, world);
 
-    ASSERT_RELEASE(test.size() == 1);
+    YGM_ASSERT_RELEASE(test.size() == 1);
     test.async_visit(
-        0, [](int key, int value, int size) { ASSERT_RELEASE(value == size); },
+        0, [](int key, int value, int size) { YGM_ASSERT_RELEASE(value == size); },
         world.size());
   }
 
@@ -35,21 +35,21 @@ int main(int argc, char** argv) {
     auto test = ygm::container::reduce_by_key_map<std::string, size_t>(
         vec_str_count, [](size_t a, size_t b) { return a + b; }, world);
 
-    ASSERT_RELEASE(test.size() == 2);
+    YGM_ASSERT_RELEASE(test.size() == 2);
 
     size_t found = 0;
     test.for_all([&found, &world](const std::string& s, size_t c) {
       if (s == "Howdy") {
         ++found;
-        ASSERT_RELEASE(c == world.size());
+        YGM_ASSERT_RELEASE(c == world.size());
       } else if (s == "Aggs") {
         ++found;
-        ASSERT_RELEASE(c == world.size() * 2);
+        YGM_ASSERT_RELEASE(c == world.size() * 2);
       } else {
-        ASSERT_RELEASE(false);
+        YGM_ASSERT_RELEASE(false);
       }
     });
-    ASSERT_RELEASE(world.all_reduce_sum(found) == 2);
+    YGM_ASSERT_RELEASE(world.all_reduce_sum(found) == 2);
   }
 
   return 0;
diff --git a/test/test_reducing_adapter.cpp b/test/test_reducing_adapter.cpp
index 4b2e71f4..01f65d53 100644
--- a/test/test_reducing_adapter.cpp
+++ b/test/test_reducing_adapter.cpp
@@ -44,12 +44,12 @@ int main(int argc, char **argv) {
     test_map.for_all(
         [&num_reductions, &world](const auto &key, const auto &value) {
           if (key == "max") {
-            ASSERT_RELEASE(value == num_reductions - 1);
+            YGM_ASSERT_RELEASE(value == num_reductions - 1);
           } else if (key == "sum") {
-            ASSERT_RELEASE(value == world.size() * num_reductions *
+            YGM_ASSERT_RELEASE(value == world.size() * num_reductions *
                                         (num_reductions - 1) / 2);
           } else {
-            ASSERT_RELEASE(false);
+            YGM_ASSERT_RELEASE(false);
           }
         });
   }
@@ -70,9 +70,9 @@ int main(int argc, char **argv) {
 
     test_array.for_all([&num_reductions](const auto &index, const auto &value) {
       if (index == 0) {
-        ASSERT_RELEASE(value == num_reductions - 1);
+        YGM_ASSERT_RELEASE(value == num_reductions - 1);
       } else {
-        ASSERT_RELEASE(value == 0);
+        YGM_ASSERT_RELEASE(value == 0);
       }
     });
   }
diff --git a/test/test_set.cpp b/test/test_set.cpp
index 9072e86d..de1e51f7 100644
--- a/test/test_set.cpp
+++ b/test/test_set.cpp
@@ -9,10 +9,10 @@
 #include <string>
 
 #include <ygm/comm.hpp>
+#include <ygm/container/bag.hpp>
 #include <ygm/container/set.hpp>
-#include <ygm/for_all_adapter.hpp>
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
   ygm::comm world(&argc, &argv);
 
   //
@@ -29,17 +29,17 @@ int main(int argc, char **argv) {
 
   //
   // Test Rank 0 async_insert
-  { 
+  {
     ygm::container::set<std::string> sset(world);
     if (world.rank() == 0) {
       sset.async_insert("dog");
       sset.async_insert("apple");
       sset.async_insert("red");
     }
-    ASSERT_RELEASE(sset.count("dog") == 1);
-    ASSERT_RELEASE(sset.count("red") == 1);
-    ASSERT_RELEASE(sset.count("apple") == 1);
-    ASSERT_RELEASE(sset.size() == 3);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 1);
+    YGM_ASSERT_RELEASE(sset.count("red") == 1);
+    YGM_ASSERT_RELEASE(sset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(sset.size() == 3);
 
     ygm::container::set<int> iset(world);
     if (world.rank() == 0) {
@@ -47,29 +47,28 @@ int main(int argc, char **argv) {
       iset.async_insert(7);
       iset.async_insert(100);
     }
-    ASSERT_RELEASE(iset.count(42) == 1);
-    ASSERT_RELEASE(iset.count(7) == 1);
-    ASSERT_RELEASE(iset.count(100) == 1);
-    ASSERT_RELEASE(iset.size() == 3);
+    YGM_ASSERT_RELEASE(iset.count(42) == 1);
+    YGM_ASSERT_RELEASE(iset.count(7) == 1);
+    YGM_ASSERT_RELEASE(iset.count(100) == 1);
+    YGM_ASSERT_RELEASE(iset.size() == 3);
   }
 
   //
   // Test Rank 0 async_insert with ygm set pointer
   {
     ygm::container::set<std::string> sset(world);
-    auto sset_ptr = sset.get_ygm_ptr();
+    auto                             sset_ptr = sset.get_ygm_ptr();
     if (world.rank() == 0) {
       sset_ptr->async_insert("dog");
       sset_ptr->async_insert("apple");
       sset_ptr->async_insert("red");
     }
-    ASSERT_RELEASE(sset.count("dog") == 1);
-    ASSERT_RELEASE(sset.count("apple") == 1);
-    ASSERT_RELEASE(sset.count("red") == 1);
-    ASSERT_RELEASE(sset.size() == 3);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 1);
+    YGM_ASSERT_RELEASE(sset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(sset.count("red") == 1);
+    YGM_ASSERT_RELEASE(sset.size() == 3);
   }
 
-  
   //
   // Test all ranks async_insert
   {
@@ -79,32 +78,29 @@ int main(int argc, char **argv) {
     sset.async_insert("apple");
     sset.async_insert("red");
 
-    ASSERT_RELEASE(sset.count("dog") == 1);
-    ASSERT_RELEASE(sset.count("apple") == 1);
-    ASSERT_RELEASE(sset.count("red") == 1);
-    ASSERT_RELEASE(sset.size() == 3);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 1);
+    YGM_ASSERT_RELEASE(sset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(sset.count("red") == 1);
+    YGM_ASSERT_RELEASE(sset.size() == 3);
     sset.async_erase("dog");
-    ASSERT_RELEASE(sset.count("dog") == 0);
-    ASSERT_RELEASE(sset.size() == 2);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 0);
+    YGM_ASSERT_RELEASE(sset.size() == 2);
   }
 
-
   //
   // Test async_contains
   {
     static bool              set_contains = false;
     ygm::container::set<int> iset(world);
-    int val = 42;
+    int                      val = 42;
 
-    auto f = [](bool& contains, const int& i) {
-      set_contains = contains;
-    };   
+    auto f = [](bool& contains, const int& i) { set_contains = contains; };
 
     if (world.rank0()) {
       iset.async_contains(val, f);
     }
     world.barrier();
-    ASSERT_RELEASE(not ygm::logical_or(set_contains, world));
+    YGM_ASSERT_RELEASE(not ygm::logical_or(set_contains, world));
 
     if (world.rank0()) {
       iset.async_insert(val);
@@ -115,43 +111,135 @@ int main(int argc, char **argv) {
       iset.async_contains(val, f);
     }
     world.barrier();
-    ASSERT_RELEASE(ygm::logical_or(set_contains, world));
+    YGM_ASSERT_RELEASE(ygm::logical_or(set_contains, world));
   }
 
   //
   // Test async_insert_contains
   {
-    static bool              did_contain = false;
+    static bool                      did_contain = false;
     ygm::container::set<std::string> sset(world);
 
     auto f = [](bool& contains, const std::string& s) {
       did_contain = contains;
-    };   
+    };
 
     if (world.rank0()) {
       sset.async_insert_contains("dog", f);
     }
     world.barrier();
-    ASSERT_RELEASE(not ygm::logical_or(did_contain, world));
+    YGM_ASSERT_RELEASE(not ygm::logical_or(did_contain, world));
 
     if (world.rank0()) {
       sset.async_insert_contains("dog", f);
     }
     world.barrier();
-    ASSERT_RELEASE(ygm::logical_or(did_contain, world));
+    YGM_ASSERT_RELEASE(ygm::logical_or(did_contain, world));
+  }
+
+  // Test batch erase from set
+  {
+    int                      num_items   = 100;
+    int                      remove_size = 20;
+    ygm::container::set<int> iset(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        iset.async_insert(i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(iset.size() == num_items);
+
+    ygm::container::set<int> to_remove(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.async_insert(i);
+      }
+    }
+
+    world.barrier();
+
+    iset.erase(to_remove);
+
+    iset.for_all([remove_size, &world](const auto& item) {
+      YGM_ASSERT_RELEASE(item >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(iset.size() == num_items - remove_size);
+  }
+
+  // Test batch erase from vector
+  {
+    int                      num_items   = 100;
+    int                      remove_size = 20;
+    ygm::container::set<int> iset(world);
+
+    if (world.rank0()) {
+      for (int i = 0; i < num_items; ++i) {
+        iset.async_insert(i);
+      }
+    }
+
+    world.barrier();
+
+    YGM_ASSERT_RELEASE(iset.size() == num_items);
+
+    std::vector<int> to_remove;
+
+    if (world.rank0()) {
+      for (int i = 0; i < remove_size; ++i) {
+        to_remove.push_back(i);
+      }
+    }
+
+    world.barrier();
+
+    iset.erase(to_remove);
+
+    iset.for_all([remove_size, &world](const auto& item) {
+      YGM_ASSERT_RELEASE(item >= remove_size);
+    });
+
+    YGM_ASSERT_RELEASE(iset.size() == num_items - remove_size);
+  }
+
+  // Test from bag
+  {
+    ygm::container::bag<std::string> sbag(
+        world, {"one", "two", "three", "one", "two"});
+    YGM_ASSERT_RELEASE(sbag.size() == 5);
+
+    ygm::container::set<std::string> sset(world, sbag);
+    YGM_ASSERT_RELEASE(sset.size() == 3);
   }
 
+  // Test initializer list
+  {
+    ygm::container::set<std::string> sset(
+        world, {"one", "two", "three", "one", "two"});
+    YGM_ASSERT_RELEASE(sset.size() == 3);
+  }
+
+  // Test from STL vector
+  {
+    std::vector<int>         v({1, 2, 3, 4, 5, 1, 1, 1, 3});
+    ygm::container::set<int> iset(world, v);
+    YGM_ASSERT_RELEASE(iset.size() == 5);
+  }
 
   //
   // Test additional arguments of async_contains
   // {
   //   ygm::container::set<std::string> sset(world);
-  //   sset.async_contains("howdy", [](bool c, const std::string s, int i, float f){}, 3, 3.14);
-  //   sset.async_contains("howdy", [](auto ptr_set, bool c, const std::string s){});
-  //   world.barrier();
+  //   sset.async_contains("howdy", [](bool c, const std::string s, int i, float
+  //   f){}, 3, 3.14); sset.async_contains("howdy", [](auto ptr_set, bool c,
+  //   const std::string s){}); world.barrier();
   // }
 
-
   //
   // Test swap
   {
@@ -162,15 +250,15 @@ int main(int argc, char **argv) {
       sset2.async_insert("apple");
       sset2.async_insert("red");
       sset2.swap(sset);
-      ASSERT_RELEASE(sset2.size() == 0);
+      YGM_ASSERT_RELEASE(sset2.size() == 0);
     }
-    ASSERT_RELEASE(sset.size() == 3);
-    ASSERT_RELEASE(sset.count("dog") == 1);
-    ASSERT_RELEASE(sset.count("apple") == 1);
-    ASSERT_RELEASE(sset.count("red") == 1);
+    YGM_ASSERT_RELEASE(sset.size() == 3);
+    YGM_ASSERT_RELEASE(sset.count("dog") == 1);
+    YGM_ASSERT_RELEASE(sset.count("apple") == 1);
+    YGM_ASSERT_RELEASE(sset.count("red") == 1);
     sset.async_insert("car");
-    ASSERT_RELEASE(sset.size() == 4);
-    ASSERT_RELEASE(sset.count("car") == 1);
+    YGM_ASSERT_RELEASE(sset.size() == 4);
+    YGM_ASSERT_RELEASE(sset.count("car") == 1);
   }
 
   //
@@ -183,11 +271,11 @@ int main(int argc, char **argv) {
     sset1.async_insert("apple");
     sset1.async_insert("red");
 
-    sset1.for_all([&sset2](const auto &key) { sset2.async_insert(key); });
+    sset1.for_all([&sset2](const auto& key) { sset2.async_insert(key); });
 
-    ASSERT_RELEASE(sset2.count("dog") == 1);
-    ASSERT_RELEASE(sset2.count("apple") == 1);
-    ASSERT_RELEASE(sset2.count("red") == 1);
+    YGM_ASSERT_RELEASE(sset2.count("dog") == 1);
+    YGM_ASSERT_RELEASE(sset2.count("apple") == 1);
+    YGM_ASSERT_RELEASE(sset2.count("red") == 1);
   }
 
   // //
@@ -200,12 +288,13 @@ int main(int argc, char **argv) {
   //   sset1.async_insert("apple");
   //   sset1.async_insert("red");
 
-  //   sset1.consume_all([&sset2](const auto &key) { sset2.async_insert(key); });
+  //   sset1.consume_all([&sset2](const auto &key) { sset2.async_insert(key);
+  //   });
 
-  //   ASSERT_RELEASE(sset1.empty());
-  //   ASSERT_RELEASE(sset2.count("dog") == 1);
-  //   ASSERT_RELEASE(sset2.count("apple") == 1);
-  //   ASSERT_RELEASE(sset2.count("red") == 1);
+  //   YGM_ASSERT_RELEASE(sset1.empty());
+  //   YGM_ASSERT_RELEASE(sset2.count("dog") == 1);
+  //   YGM_ASSERT_RELEASE(sset2.count("apple") == 1);
+  //   YGM_ASSERT_RELEASE(sset2.count("red") == 1);
   // }
 
   // //
@@ -221,10 +310,10 @@ int main(int argc, char **argv) {
   //   ygm::consume_all_iterative_adapter cai(sset1);
   //   cai.consume_all([&sset2](const auto &key) { sset2.async_insert(key); });
 
-  //   ASSERT_RELEASE(sset1.empty());
-  //   ASSERT_RELEASE(sset2.count("dog") == 1);
-  //   ASSERT_RELEASE(sset2.count("apple") == 1);
-  //   ASSERT_RELEASE(sset2.count("red") == 1);
+  //   YGM_ASSERT_RELEASE(sset1.empty());
+  //   YGM_ASSERT_RELEASE(sset2.count("dog") == 1);
+  //   YGM_ASSERT_RELEASE(sset2.count("apple") == 1);
+  //   YGM_ASSERT_RELEASE(sset2.count("red") == 1);
   // }
 
   //
@@ -245,7 +334,7 @@ int main(int argc, char **argv) {
 
     world.barrier();
     for (int set_index = 0; set_index < num_sets; ++set_index) {
-      ASSERT_RELEASE(vec_sets[set_index].size() == world.size() + 1);
+      YGM_ASSERT_RELEASE(vec_sets[set_index].size() == world.size() + 1);
     }
   }
 
diff --git a/test/test_tagged_bag.cpp b/test/test_tagged_bag.cpp
index 3947f782..4f7963e5 100644
--- a/test/test_tagged_bag.cpp
+++ b/test/test_tagged_bag.cpp
@@ -30,23 +30,23 @@ int main(int argc, char** argv) {
       r0tags = std::vector<tag_type>{r0t1, r0t2, r0t3};
     }
 
-    ASSERT_RELEASE(tagbag.size() == 3);
+    YGM_ASSERT_RELEASE(tagbag.size() == 3);
     // Test gather
-    auto gather = tagbag.key_gather(r0tags);
+    auto gather = tagbag.gather_keys(r0tags);
     world.barrier();
     if (world.rank0()) {
-      ASSERT_RELEASE(gather.size() == 3);
+      YGM_ASSERT_RELEASE(gather.size() == 3);
     } else {
-      ASSERT_RELEASE(gather.empty());
+      YGM_ASSERT_RELEASE(gather.empty());
     }
 
     tagbag.for_all([](auto& k, auto& v) { v += "_added"; });
 
-    auto gatheradd = tagbag.key_gather(r0tags);
+    auto gatheradd = tagbag.gather_keys(r0tags);
     if (world.rank0()) {
       for (auto r0tag : r0tags) {
         auto ga = gatheradd.at(r0tag);
-        ASSERT_RELEASE(ga.substr(ga.size() - 6) == "_added");
+        YGM_ASSERT_RELEASE(ga.substr(ga.size() - 6) == "_added");
       }
     }
   }
@@ -58,6 +58,6 @@ int main(int argc, char** argv) {
     sbag.async_insert("dog");
     sbag.async_insert("apple");
     sbag.async_insert("red");
-    ASSERT_RELEASE(sbag.size() == 3 * (size_t)world.size());
+    YGM_ASSERT_RELEASE(sbag.size() == 3 * (size_t)world.size());
   }
 }
\ No newline at end of file
diff --git a/test/test_transform.cpp b/test/test_transform.cpp
new file mode 100644
index 00000000..1052c28c
--- /dev/null
+++ b/test/test_transform.cpp
@@ -0,0 +1,83 @@
+// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
+// Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: MIT
+
+#undef NDEBUG
+
+#include <set>
+#include <string>
+#include <vector>
+#include <ygm/comm.hpp>
+#include <ygm/container/bag.hpp>
+#include <ygm/container/counting_set.hpp>
+#include <ygm/container/map.hpp>
+#include <ygm/random.hpp>
+
+int main(int argc, char** argv) {
+  ygm::comm world(&argc, &argv);
+
+  {
+    ygm::container::bag<int> ibag(world, {42, 1, 8, 16, 32, 3, 4, 5, 6, 7});
+
+    int sum =
+        ibag.transform([](int i) { return i + 1; }).reduce(std::plus<int>());
+    YGM_ASSERT_RELEASE(sum = 134);
+  }
+
+  {
+    ygm::container::map<std::string, size_t> mymap(world);
+    if (world.rank0()) {
+      mymap.async_insert("red", 0);
+      mymap.async_insert("green", 1);
+      mymap.async_insert("blue", 2);
+    }
+
+    size_t slength = mymap.keys()
+                         .transform([](std::string s) { return s.size(); })
+                         .reduce(std::plus<int>());
+    YGM_ASSERT_RELEASE(slength = 12);
+
+    int vsum = mymap.values().reduce(std::plus<int>());
+    YGM_ASSERT_RELEASE(vsum = 3);
+  }
+
+  {
+    ygm::container::map<int, int> imap(world);
+    int                           num_entries = 100;
+
+    for (int i = 0; i < num_entries; ++i) {
+      imap.async_insert(i, i);
+    }
+
+    imap.values()
+        .transform([](int value) { return 2 * value; })
+        .for_all([](int transformed_value) {
+          YGM_ASSERT_RELEASE((transformed_value % 2) == 0);
+        });
+
+    imap.transform([](const int key, const int value) {
+          return std::make_pair(key, 2 * key);
+        })
+        .for_all([](const auto& kv) {
+          YGM_ASSERT_RELEASE(2 * kv.first == kv.second);
+        });
+
+    // Filter to only odd numbers, so integer division by 2 followed by
+    // multiplication by 2 do not yield the original value
+    imap.filter([](const int key, const int value) { return ((key % 2) == 1); })
+        .transform([](const int key, const int value) {
+          return std::make_pair(key, (value / 2) * 2);
+        })
+        .for_all(
+            [](const auto& kv) { YGM_ASSERT_RELEASE(kv.first != kv.second); });
+
+    // Same as above but with filter and transform order reversed
+    imap.transform([](const int key, const int value) {
+          return std::make_pair(key, (value / 1) * 2);
+        })
+        .filter([](const auto& kv) { return ((kv.first % 2) == 1); })
+        .for_all(
+            [](const auto& kv) { YGM_ASSERT_RELEASE(kv.first != kv.second); });
+  }
+}