diff --git a/Cargo.toml b/Cargo.toml
index 2cbd3fe7..8e54d313 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,11 +21,20 @@ maintenance = { status = "experimental" }
 
 [features]
 sanitize = ['crossbeam-epoch/sanitize']
+std = ["crossbeam-epoch/std", "num_cpus"]
+default = ["std"]
 
 [dependencies]
-crossbeam-epoch = "0.9"
 parking_lot = "0.10"
-num_cpus = "1.12.0"
+
+[dependencies.num_cpus]
+version = "1.12.0"
+optional = true
+
+[dependencies.crossbeam-epoch]
+version = "0.9"
+default-features = false
+features = ["alloc"]
 
 [dependencies.ahash]
 version = "0.3.2"
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 87c18a8a..bec32d53 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,6 +29,17 @@ jobs:
        condition: ne(variables.CACHE_RESTORED, 'true')
      - script: cargo deny check
        displayName: cargo deny
+ - job: no_std
+   displayName: "Compile-check on no_std target"
+   pool:
+     vmImage: ubuntu-16.04
+   steps:
+     - template: install-rust.yml@templates
+       parameters:
+         targets:
+          - thumbv7m-none-eabi
+     - bash: cargo check --target thumbv7m-none-eabi --no-default-features
+       displayName: cargo check
  - job: canary
    displayName: "Warning screening"
    dependsOn: deny
diff --git a/src/iter/mod.rs b/src/iter/mod.rs
index 5c594fc6..00343336 100644
--- a/src/iter/mod.rs
+++ b/src/iter/mod.rs
@@ -1,8 +1,8 @@
 mod traverser;
 pub(crate) use traverser::NodeIter;
 
+use core::sync::atomic::Ordering;
 use crossbeam_epoch::Guard;
-use std::sync::atomic::Ordering;
 
 /// An iterator over a map's entries.
 ///
@@ -63,9 +63,9 @@ impl<'g, K, V> Iterator for Values<'g, K, V> {
 #[cfg(test)]
 mod tests {
     use crate::HashMap;
+    use core::iter::FromIterator;
     use crossbeam_epoch as epoch;
     use std::collections::HashSet;
-    use std::iter::FromIterator;
 
     #[test]
     fn iter() {
diff --git a/src/iter/traverser.rs b/src/iter/traverser.rs
index 16076932..d309b746 100644
--- a/src/iter/traverser.rs
+++ b/src/iter/traverser.rs
@@ -1,7 +1,12 @@
+#[cfg(not(feature = "std"))]
+extern crate alloc;
+
 use crate::node::{BinEntry, Node};
 use crate::raw::Table;
+#[cfg(not(feature = "std"))]
+use alloc::boxed::Box;
+use core::sync::atomic::Ordering;
 use crossbeam_epoch::{Guard, Shared};
-use std::sync::atomic::Ordering;
 
 #[derive(Debug)]
 pub(crate) struct NodeIter<'g, K, V> {
diff --git a/src/lib.rs b/src/lib.rs
index 28cd7e4c..8f60000a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -195,13 +195,14 @@
 //! more efficient operation than if everything had to be atomically reference-counted.
 //!
 //!  [`crossbeam::epoch`]: https://docs.rs/crossbeam/0.7/crossbeam/epoch/index.html
-#![deny(
-    missing_docs,
-    missing_debug_implementations,
-    unreachable_pub,
-    intra_doc_link_resolution_failure
-)]
+#![deny(missing_docs, unreachable_pub, intra_doc_link_resolution_failure)]
 #![warn(rust_2018_idioms)]
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(feature = "std", deny(missing_debug_implementations))]
+
+#[cfg(not(feature = "std"))]
+#[macro_use]
+extern crate alloc;
 
 mod map;
 mod map_ref;
@@ -219,5 +220,7 @@ pub type DefaultHashBuilder = ahash::RandomState;
 
 /// Types needed to safely access shared data concurrently.
 pub mod epoch {
-    pub use crossbeam_epoch::{pin, Guard};
+    #[cfg(feature = "std")]
+    pub use crossbeam_epoch::pin;
+    pub use crossbeam_epoch::Guard;
 }
diff --git a/src/map.rs b/src/map.rs
index 4656519a..cb737c37 100644
--- a/src/map.rs
+++ b/src/map.rs
@@ -1,15 +1,16 @@
 use crate::iter::*;
 use crate::node::*;
 use crate::raw::*;
+use core::borrow::Borrow;
+use core::hash::{BuildHasher, Hash, Hasher};
+#[cfg(feature = "std")]
+use core::iter::FromIterator;
+use core::sync::atomic::{AtomicIsize, AtomicUsize, Ordering};
 use crossbeam_epoch::{self as epoch, Atomic, Guard, Owned, Shared};
-use std::borrow::Borrow;
+#[cfg(feature = "std")]
 use std::fmt::{self, Debug, Formatter};
-use std::hash::{BuildHasher, Hash, Hasher};
-use std::iter::FromIterator;
-use std::sync::{
-    atomic::{AtomicIsize, AtomicUsize, Ordering},
-    Once,
-};
+#[cfg(feature = "std")]
+use std::sync::Once;
 
 const ISIZE_BITS: usize = core::mem::size_of::<isize>() * 8;
 
@@ -43,8 +44,10 @@ const MAX_RESIZERS: isize = (1 << (ISIZE_BITS - RESIZE_STAMP_BITS)) - 1;
 /// The bit shift for recording size stamp in `size_ctl`.
 const RESIZE_STAMP_SHIFT: usize = ISIZE_BITS - RESIZE_STAMP_BITS;
 
+#[cfg(feature = "std")]
 static NCPU_INITIALIZER: Once = Once::new();
-static NCPU: AtomicUsize = AtomicUsize::new(0);
+#[cfg(feature = "std")]
+static NCPU: AtomicUsize = AtomicUsize::new(1);
 
 macro_rules! load_factor {
     ($n: expr) => {
@@ -123,6 +126,28 @@ pub struct HashMap<K: 'static, V: 'static, S = crate::DefaultHashBuilder> {
     build_hasher: S,
 }
 
+/// A concurrent hash table.
+///
+/// Note that `ahash::RandomState`, the default value of `S`, is not
+/// cryptographically secure. Therefore it is strongly recommended that you do
+/// not use this hash for cryptographic purproses.
+/// See [`ahash`](https://github.com/tkaitchuck/ahash) for more information.
+///
+/// See the [crate-level documentation](index.html) for details.
+#[cfg(not(feature = "std"))]
+pub struct HashMap<K: 'static, V: 'static, S = crate::DefaultHashBuilder> {
+    // NOTE: this is, and must be, an exact copy of the `HashMap` definition above, with just the
+    // default type for `L` unset. This is because in no_std environments, there is no sensible
+    // default lock type for us to use.
+    table: Atomic<Table<K, V>>,
+    next_table: Atomic<Table<K, V>>,
+    transfer_index: AtomicIsize,
+    count: AtomicUsize,
+    size_ctl: AtomicIsize,
+    collector: epoch::Collector,
+    build_hasher: S,
+}
+
 #[cfg(test)]
 #[test]
 #[should_panic]
@@ -144,6 +169,7 @@ fn disallow_evil() {
     assert_eq!(oops.unwrap(), "hello");
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> Default for HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Hash + Eq,
@@ -155,6 +181,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V> HashMap<K, V, crate::DefaultHashBuilder>
 where
     K: Sync + Send + Clone + Hash + Eq,
@@ -178,6 +205,7 @@ where
     V: Sync + Send,
     S: BuildHasher,
 {
+    #[cfg(feature = "std")]
     /// Creates an empty map which will use `hash_builder` to hash keys.
     ///
     /// The created map has the default initial capacity.
@@ -241,6 +269,7 @@ where
         }
     }
 
+    #[cfg(feature = "std")]
     /// Creates an empty map with the specified `capacity`, using `hash_builder` to hash the keys.
     ///
     /// The map will be sized to accommodate `capacity` elements with a low chance of reallocating
@@ -417,6 +446,15 @@ where
             // try to allocate the table
             let mut sc = self.size_ctl.load(Ordering::SeqCst);
             if sc < 0 {
+                #[cfg(not(feature = "std"))]
+                // for there to be a race, there must be another thread running
+                // concurrently with us. That thread cannot be blocked on us,
+                // since we are not in any mutually-exclusive section. So our
+                // goal is just to not waste cycles and give it some time to
+                // complete. It is not a requirement that we fully yield.
+                core::sync::atomic::spin_loop_hint();
+
+                #[cfg(feature = "std")]
                 // we lost the initialization race; just spin
                 std::thread::yield_now();
                 continue;
@@ -722,6 +760,7 @@ where
         }
     }
 
+    #[cfg_attr(not(feature = "std"), allow(dead_code))]
     fn put_all<I: Iterator<Item = (K, V)>>(&self, iter: I, guard: &Guard) {
         for (key, value) in iter {
             self.put(key, value, false, guard);
@@ -985,7 +1024,7 @@ where
     fn add_count(&self, n: isize, resize_hint: Option<usize>, guard: &Guard) {
         // TODO: implement the Java CounterCell business here
 
-        use std::cmp;
+        use core::cmp;
         let mut count = match n.cmp(&0) {
             cmp::Ordering::Greater => {
                 let n = n as usize;
@@ -1074,7 +1113,7 @@ where
         let ncpu = num_cpus();
 
         let stride = if ncpu > 1 { (n >> 3) / ncpu } else { n };
-        let stride = std::cmp::max(stride as isize, MIN_TRANSFER_STRIDE);
+        let stride = core::cmp::max(stride as isize, MIN_TRANSFER_STRIDE);
 
         if next_table.is_null() {
             // we are initiating a resize
@@ -1359,7 +1398,7 @@ where
             // TODO: find out if this is neccessary
             let size = size + (size >> 1) + 1;
 
-            std::cmp::min(MAXIMUM_CAPACITY, size.next_power_of_two())
+            core::cmp::min(MAXIMUM_CAPACITY, size.next_power_of_two())
         } as isize;
 
         loop {
@@ -1767,6 +1806,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> PartialEq for HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Eq + Hash,
@@ -1781,6 +1821,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> Eq for HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Eq + Hash,
@@ -1789,6 +1830,7 @@ where
 {
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> fmt::Debug for HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Debug + Eq + Hash,
@@ -1825,6 +1867,7 @@ impl<K, V, S> Drop for HashMap<K, V, S> {
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> Extend<(K, V)> for &HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Hash + Eq,
@@ -1851,6 +1894,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<'a, K, V, S> Extend<(&'a K, &'a V)> for &HashMap<K, V, S>
 where
     K: Sync + Send + Copy + Hash + Eq,
@@ -1863,6 +1907,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> FromIterator<(K, V)> for HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Hash + Eq,
@@ -1889,6 +1934,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<'a, K, V, S> FromIterator<(&'a K, &'a V)> for HashMap<K, V, S>
 where
     K: Sync + Send + Copy + Hash + Eq,
@@ -1901,6 +1947,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<'a, K, V, S> FromIterator<&'a (K, V)> for HashMap<K, V, S>
 where
     K: Sync + Send + Copy + Hash + Eq,
@@ -1913,6 +1960,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> Clone for HashMap<K, V, S>
 where
     K: Sync + Send + Clone + Hash + Eq,
@@ -1931,16 +1979,19 @@ where
     }
 }
 
-#[cfg(not(miri))]
 #[inline]
-/// Returns the number of physical CPUs in the machine (_O(1)_).
+#[cfg(all(not(miri), feature = "std"))]
+/// Returns the number of physical CPUs in the machine.
+/// Returns `1` in `no_std` environment.
 fn num_cpus() -> usize {
     NCPU_INITIALIZER.call_once(|| NCPU.store(num_cpus::get_physical(), Ordering::Relaxed));
     NCPU.load(Ordering::Relaxed)
 }
 
-#[cfg(miri)]
 #[inline]
+#[cfg(any(miri, not(feature = "std")))]
+/// Returns the number of physical CPUs in the machine.
+/// Returns `1` in `no_std` environment.
 const fn num_cpus() -> usize {
     1
 }
diff --git a/src/map_ref.rs b/src/map_ref.rs
index 2bece084..b412b050 100644
--- a/src/map_ref.rs
+++ b/src/map_ref.rs
@@ -1,16 +1,34 @@
 use crate::iter::*;
 use crate::HashMap;
+use core::borrow::Borrow;
+use core::fmt::{self, Debug, Formatter};
+use core::hash::{BuildHasher, Hash};
+use core::ops::{Deref, Index};
 use crossbeam_epoch::Guard;
-use std::borrow::Borrow;
-use std::fmt::{self, Debug, Formatter};
-use std::hash::{BuildHasher, Hash};
-use std::ops::{Deref, Index};
 
+#[cfg(feature = "std")]
 /// A reference to a [`HashMap`], constructed with [`HashMap::pin`] or [`HashMap::with_guard`].
 ///
 /// The current thread will be pinned for the duration of this reference.
 /// Keep in mind that this prevents the collection of garbage generated by the map.
-pub struct HashMapRef<'map, K: 'static, V: 'static, S = crate::DefaultHashBuilder> {
+pub struct HashMapRef<'map, K: 'static, V: 'static, S = crate::DefaultHashBuilder>
+where
+    S: BuildHasher,
+{
+    // NOTE: below struct must be identical
+    map: &'map HashMap<K, V, S>,
+    guard: GuardRef<'map>,
+}
+
+#[cfg(not(feature = "std"))]
+/// A reference to a [`HashMap`], constructed with [`HashMap::pin`] or [`HashMap::with_guard`].
+///
+/// The current thread will be pinned for the duration of this reference.
+/// Keep in mind that this prevents the collection of garbage generated by the map.
+pub struct HashMapRef<'map, K: 'static, V: 'static, S = crate::DefaultHashBuilder>
+where
+    S: BuildHasher,
+{
     map: &'map HashMap<K, V, S>,
     guard: GuardRef<'map>,
 }
@@ -211,6 +229,7 @@ where
     }
 }
 
+#[cfg(feature = "std")]
 impl<K, V, S> Clone for HashMapRef<'_, K, V, S>
 where
     K: Sync + Send + Clone + Hash + Eq,
diff --git a/src/node.rs b/src/node.rs
index aac4f9f5..219e609a 100644
--- a/src/node.rs
+++ b/src/node.rs
@@ -1,8 +1,8 @@
 use crate::raw::Table;
+use core::borrow::Borrow;
+use core::sync::atomic::Ordering;
 use crossbeam_epoch::{Atomic, Guard, Shared};
 use parking_lot::Mutex;
-use std::borrow::Borrow;
-use std::sync::atomic::Ordering;
 
 /// Entry in a bin.
 ///
diff --git a/src/raw/mod.rs b/src/raw/mod.rs
index ceb1c51a..99880c14 100644
--- a/src/raw/mod.rs
+++ b/src/raw/mod.rs
@@ -1,7 +1,11 @@
 use crate::node::*;
+#[cfg(not(feature = "std"))]
+use alloc::boxed::Box;
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+use core::fmt::Debug;
+use core::sync::atomic::Ordering;
 use crossbeam_epoch::{Atomic, Guard, Owned, Shared};
-use std::fmt::Debug;
-use std::sync::atomic::Ordering;
 
 #[derive(Debug)]
 pub(crate) struct Table<K, V> {
@@ -35,7 +39,10 @@ impl<K, V> Table<K, V> {
         // anything in the map.
         let guard = unsafe { crossbeam_epoch::unprotected() };
 
-        for bin in Vec::from(std::mem::replace(&mut self.bins, vec![].into_boxed_slice())) {
+        for bin in Vec::from(core::mem::replace(
+            &mut self.bins,
+            vec![].into_boxed_slice(),
+        )) {
             if bin.load(Ordering::SeqCst, guard).is_null() {
                 // bin was never used
                 continue;