diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9bdeef..5b3c06f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,17 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 set(LLAMA_STATIC ON CACHE BOOL "Build llama as static library")
 add_subdirectory("src/llama.cpp")
 
+# apply patches
+set(PATCH_FILE ${CMAKE_SOURCE_DIR}/patches/llama.patch)
+add_custom_target(patch)
+add_custom_command(
+  TARGET patch
+  COMMAND patch -N -p1 < ${PATCH_FILE} || true
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
+
+)
+add_dependencies(ggml patch)
+
 file(
   GLOB SOURCE_FILES
     "src/addons.cc"
@@ -95,6 +106,7 @@ if (LLAMA_QNN)
     message(FATAL_ERROR "QNN is not supported on this platform")
   endif()
   set(QNN_LIB_PATH ${QNN_ROOT}/lib/${QNN_PLATFORM})
+  message(STATUS "QNN_LIB_PATH: ${QNN_LIB_PATH}")
 
   file(
     GLOB QNN_SO_FILES
@@ -118,33 +130,22 @@ if (LLAMA_QNN)
 
   file(
     GLOB QNN_HEADER_FILES
-      "src/ggml-qnn.h"
+      "src/ggml-qnn/ggml-qnn.h"
   )
 
   file(
     GLOB QNN_SOURCE_FILES
-      "src/ggml-qnn.cpp"
+      "src/ggml-qnn/pthread-shim.h"
+      "src/ggml-qnn/ggml-qnn.cpp"
   )
 
   target_compile_definitions(ggml PUBLIC GGML_USE_QNN)
-  target_include_directories(ggml PUBLIC ${QNN_ROOT}/include/QNN)
+  target_include_directories(ggml PUBLIC ${QNN_ROOT}/include ${QNN_ROOT}/include/QNN)
   target_sources(ggml PRIVATE ${QNN_SOURCE_FILES} ${QNN_HEADER_FILES})
-  target_include_directories(llama PRIVATE "src")
+  target_include_directories(llama PRIVATE "src/ggml-qnn")
   set_target_properties(ggml PROPERTIES CXX_STANDARD 17)
-
-  # apply patches/qnn.patch to ggml
-  add_custom_command(
-    OUTPUT ${CMAKE_BUILD_DIR}/patch.log
-    COMMAND git apply ${CMAKE_SOURCE_DIR}/patches/qnn.patch
-    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
-  )
-else()
-  # undo patches/qnn.patch to ggml
-  add_custom_command(
-    OUTPUT ${CMAKE_BUILD_DIR}/patch.log
-    COMMAND git apply -R ${CMAKE_SOURCE_DIR}/patches/qnn.patch
-    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
-  )
+  set_target_properties(ggml PROPERTIES CXX_STANDARD_REQUIRED ON)
+  set_target_properties(ggml PROPERTIES C_STANDARD 11)
 endif()
 
 add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
diff --git a/package.json b/package.json
index fae134a..462098a 100644
--- a/package.json
+++ b/package.json
@@ -39,10 +39,8 @@
   },
   "files": [
     "bin/**/*",
-    "scripts/*.js",
-    "scripts/*.ts",
-    "src/*",
-    "externals/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
+    "patches/*",
+    "src/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
     "lib/*.js",
     "lib/*.ts",
     "CMakeLists.txt"
diff --git a/patches/qnn.patch b/patches/llama.patch
similarity index 77%
rename from patches/qnn.patch
rename to patches/llama.patch
index 3bbe1e1..156d5e7 100644
--- a/patches/qnn.patch
+++ b/patches/llama.patch
@@ -1,16 +1,7 @@
 diff --git a/ggml-backend.c b/ggml-backend.c
-index f5bdcf07..536a5767 100644
+index e91d97cd..be4989d3 100644
 --- a/ggml-backend.c
 +++ b/ggml-backend.c
-@@ -416,7 +416,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
-     }
- 
-     initialized = true;
--
-+    printf("GGML_USE_CPU\n");
-     ggml_backend_register("CPU", ggml_backend_reg_cpu_init, ggml_backend_cpu_buffer_type(), NULL);
- 
-     // add forward decls here to avoid including the backend headers
 @@ -445,6 +445,10 @@ GGML_CALL static void ggml_backend_registry_init(void) {
      extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
      ggml_backend_kompute_reg_devices();
@@ -23,7 +14,7 @@ index f5bdcf07..536a5767 100644
  
  GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
 diff --git a/llama.cpp b/llama.cpp
-index 18d6297c..f2a39613 100644
+index a25d115c..ff0d929f 100644
 --- a/llama.cpp
 +++ b/llama.cpp
 @@ -17,6 +17,8 @@
@@ -35,7 +26,7 @@ index 18d6297c..f2a39613 100644
  #endif
  
  #ifdef GGML_USE_METAL
-@@ -1679,6 +1681,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
+@@ -1658,6 +1660,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
      buft = ggml_backend_opencl_buffer_type();
  #elif defined(GGML_USE_KOMPUTE)
      buft = ggml_backend_kompute_buffer_type(gpu);
@@ -44,18 +35,16 @@ index 18d6297c..f2a39613 100644
      if (buft == nullptr) {
          LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu);
      }
-@@ -15293,8 +15297,9 @@ bool llama_supports_mlock(void) {
+@@ -14916,7 +14920,7 @@ bool llama_supports_mlock(void) {
  
  bool llama_supports_gpu_offload(void) {
  #if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
 -    defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE)
 +    defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_QNN)
      // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
-+    printf("llama_supports_gpu_offload: true\n");
      return true;
  #else
-     return false;
-@@ -15607,6 +15612,16 @@ struct llama_context * llama_new_context_with_model(
+@@ -15203,6 +15207,16 @@ struct llama_context * llama_new_context_with_model(
              }
              ctx->backends.push_back(backend);
          }
diff --git a/src/ggml-qnn.cpp b/src/ggml-qnn/ggml-qnn.cpp
similarity index 98%
rename from src/ggml-qnn.cpp
rename to src/ggml-qnn/ggml-qnn.cpp
index ef0a136..42e9325 100644
--- a/src/ggml-qnn.cpp
+++ b/src/ggml-qnn/ggml-qnn.cpp
@@ -50,10 +50,21 @@
 #include <inttypes.h>
 #include <math.h>
 #include <time.h>
+#include <sys/stat.h>
+#ifdef WIN32
+#include <io.h>
+#include <windows.h>
+#include "pthread-shim.h"
+
+#define R_OK    4       /* Test for read permission.  */
+#define W_OK    2       /* Test for write permission.  */
+#define F_OK    0       /* Test for existence.  */
+#define access _access
+#else
 #include <unistd.h>
 #include <dlfcn.h>
 #include <fcntl.h>
-#include <sys/stat.h>
+#endif
 
 #include <string>
 #include <vector>
@@ -79,16 +90,21 @@
 #include <utility>
 
 #ifdef __cplusplus
-    #include <atomic>
-    using std::atomic_int;
-    using std::atomic_bool;
-    using std::atomic_load;
-    using std::atomic_fetch_sub;
-    using std::atomic_store;
+#include <atomic>
+using std::atomic_int;
+using std::atomic_bool;
+using std::atomic_load;
+using std::atomic_fetch_sub;
+using std::atomic_store;
 #else /* not __cplusplus */
-    #include <stdatomic.h>
+#include <stdatomic.h>
 #endif /* __cplusplus */
 
+// dummy fix https://github.com/skypjack/entt/issues/615#issuecomment-749511697 
+#ifdef WIN32
+#define interface interface_
+#endif
+
 #include "QnnTypes.h"
 #include "QnnCommon.h"
 #include "QnnContext.h"
@@ -367,11 +383,31 @@ static void ggml_setup_op_has_task_pass(void) {
 //in GGML internal or FFmpeg
 
 //QNN cDSP and HTA backend would not be used currently, just focus on QNN CPU/GPU/HTP(aka DSP) backend currently
+#ifdef WIN32
+/*struct ggml_backend_qnn_context {
+    int device;
+    int threads;
+    char name[GGML_MAX_NAME];
+    char lib[GGML_MAX_NAME];
+    qnn_instance * instance;
+    qnn_buf_t * buffer_pool;
+    struct ggml_backend * backend;
+    QNN_INTERFACE_VER_TYPE raw_interface;
+    QNN_SYSTEM_INTERFACE_VER_TYPE raw_system_interface;
+} ;*/
+
+static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
+    {0, 1, "qnn-cpu", "QnnCpu.dll", nullptr, nullptr, nullptr, nullptr, nullptr},
+    {1, 1, "qnn-gpu", "QnnGpu.dll", nullptr, nullptr, nullptr, nullptr, nullptr},
+    {2, 1, "qnn-htp(aka dsp)", "QnnHtp.dll", nullptr, nullptr, nullptr, nullptr, nullptr}
+};
+#else
 static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
         [QNN_CPU]   = {.device = 0, .threads = 1, .name =   "qnn-cpu", .lib = "libQnnCpu.so", .instance = nullptr, .buffer_pool = nullptr, .backend = nullptr, .raw_interface = nullptr, .raw_system_interface = nullptr},
         [QNN_GPU]   = {.device = 1, .threads = 1, .name =   "qnn-gpu", .lib = "libQnnGpu.so", .instance = nullptr, .buffer_pool = nullptr, .backend = nullptr, .raw_interface = nullptr, .raw_system_interface = nullptr},
         [QNN_HTP]   = {.device = 2, .threads = 1, .name =   "qnn-htp(aka dsp)", .lib = "libQnnHtp.so", .instance = nullptr, .buffer_pool = nullptr, .backend = nullptr, .raw_interface = nullptr, .raw_system_interface = nullptr},
 };
+#endif
 
 
 
@@ -889,7 +925,17 @@ static size_t memscpy(void * dst, size_t dstSize, const void * src, size_t copyS
 
 
 static char * ggml_qnn_strndup(const char * source, size_t maxlen) {
-    return ::strndup(source, maxlen); 
+#ifdef WIN32
+    char * dest = (char *)malloc(maxlen + 1);
+    if (dest == nullptr) {
+        return nullptr;
+    }
+    strncpy_s(dest, maxlen + 1, source, maxlen);
+    dest[maxlen] = '\0';
+    return dest;
+#else
+    return ::strndup(source, maxlen);
+#endif
 }
 
 
@@ -1118,7 +1164,11 @@ static uint32_t ggml_get_tensor_data_size(const ggml_tensor * tensor) {
 
 template<typename Fn>
 Fn load_qnn_functionpointers(void * handle, const char * function_name) {
+#ifdef WIN32
+    return reinterpret_cast<Fn>(GetProcAddress(reinterpret_cast<HMODULE>(handle), function_name));
+#else
     return reinterpret_cast<Fn>(dlsym(handle, function_name));
+#endif
 }
 
 
@@ -2034,19 +2084,35 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
     Qnn_ErrorHandle_t error = QNN_SUCCESS;
     QNN_LOG_DEBUG("lib_path:%s\n", lib_path.c_str());
 
+#ifdef WIN32
+    void *lib_handle = LoadLibrary(lib_path.c_str());
+    if (nullptr == lib_handle) {
+        QNN_LOG_WARN("can not open QNN library %s, with error: %d", lib_path.c_str(), GetLastError());
+        return 1;
+    }
+#else
     void *lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
     if (nullptr == lib_handle) {
         QNN_LOG_WARN("can not open QNN library %s, with error: %s", lib_path.c_str(), dlerror());
         return 1;
     }
+#endif
 
     // load get_provider function
+#ifdef WIN32
+    auto get_providers = (_pfn_QnnInterface_getProviders *)GetProcAddress((HMODULE)lib_handle, "QnnInterface_getProviders");
+    if (nullptr == get_providers) {
+        QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %d", GetLastError());
+        return 2;
+    }
+#else
     auto get_providers = load_qnn_functionpointers<_pfn_QnnInterface_getProviders *>(lib_handle,
                                                                                      "QnnInterface_getProviders");
     if (nullptr == get_providers) {
         QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s", dlerror());
         return 2;
     }
+#endif
 
     // get QnnInterface Providers
     std::uint32_t num_providers = 0;
@@ -2094,10 +2160,14 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
     _loaded_backend[backend_id] = provider_list[0];
     if (_loaded_lib_handle.count(backend_id) > 0) {
         QNN_LOG_WARN("closing %p\n", _loaded_lib_handle[backend_id]);
+#ifdef WIN32
+        FreeLibrary((HMODULE)_loaded_lib_handle[backend_id]);
+#else
         int dlclose_error = dlclose(_loaded_lib_handle[backend_id]);
         if (dlclose_error != 0) {
             QNN_LOG_WARN("fail to close %p with error %s\n", _loaded_lib_handle[backend_id], dlerror());
         }
+#endif
     }
     _loaded_lib_handle[backend_id] = lib_handle;
     _backend_id = backend_id;
@@ -2136,6 +2206,11 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
 
 int qnn_instance::unload_backend() {
     ENTER_FUNC();
+#ifdef WIN32
+    for (auto &it : _loaded_lib_handle) {
+        FreeLibrary((HMODULE)it.second);
+    }
+#else
     int dlclose_error = 0;
     for (auto &it : _loaded_lib_handle) {
         dlclose_error = dlclose(it.second);
@@ -2143,6 +2218,7 @@ int qnn_instance::unload_backend() {
             QNN_LOG_WARN("failed to close QNN backend %d, error %s\n", it.first, dlerror());
         }
     }
+#endif
 
     _loaded_lib_handle.clear();
     _lib_path_to_backend_id.clear();
@@ -2161,6 +2237,20 @@ int qnn_instance::load_system() {
     std::string system_lib_path = _lib_path + "libQnnSystem.so";
     QNN_LOG_DEBUG("system_lib_path:%s\n", system_lib_path.c_str());
 
+#ifdef WIN32
+    _system_lib_handle = LoadLibrary(system_lib_path.c_str());
+    if (nullptr == _system_lib_handle) {
+        QNN_LOG_WARN("can not open QNN library %s, error: %d\n", system_lib_path.c_str(), GetLastError());
+        return 1;
+    }
+
+    auto * get_providers = reinterpret_cast<_pfn_QnnSystemInterface_getProviders *>(GetProcAddress(
+            (HMODULE)_system_lib_handle, "QnnSystemInterface_getProviders"));
+    if (nullptr == get_providers) {
+        QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %d\n", GetLastError());
+        return 2;
+    }
+#else
     _system_lib_handle = dlopen(system_lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
     if (nullptr == _system_lib_handle) {
         QNN_LOG_WARN("can not open QNN library %s, error: %s\n", system_lib_path.c_str(), dlerror());
@@ -2173,6 +2263,7 @@ int qnn_instance::load_system() {
         QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s\n", dlerror());
         return 2;
     }
+#endif
 
     uint32_t num_providers = 0;
     const QnnSystemInterface_t ** provider_list = nullptr;
@@ -2243,11 +2334,15 @@ int qnn_instance::unload_system() {
         _qnn_system_handle = nullptr;
     }
 
+#ifdef WIN32
+    FreeLibrary((HMODULE)_system_lib_handle);
+#else
     int dlclose_error = dlclose(_system_lib_handle);
     if (dlclose_error != 0) {
         QNN_LOG_WARN("failed to close QnnSystem library, error %s\n", dlerror());
         return 2;
     }
+#endif
 
     _system_lib_handle = nullptr;
     LEAVE_FUNC();
@@ -2406,7 +2501,7 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
         }
     }
 
-#ifdef __ANDROID__
+#if 0 // Latest SDK seems no libcdsprpc
     _rpc_lib_handle = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
     if (nullptr == _rpc_lib_handle) {
         QNN_LOG_WARN("failed to load qualcomm's rpc lib, error:%s\n", dlerror());
@@ -2458,7 +2553,7 @@ int qnn_instance::qnn_finalize() {
     if (nullptr != _pfn_rpc_mem_deinit) // make Qualcomm's SoC based low-end phone happy
         _pfn_rpc_mem_deinit();
 
-#ifdef __ANDROID__
+#if 0
     if (dlclose(_rpc_lib_handle) != 0) {
         QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s\n", dlerror());
     } else {
@@ -4120,11 +4215,15 @@ static const char * ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t
 
 static void * ggml_qnn_host_malloc(size_t n) {
     void * data = nullptr;
+#ifdef WIN32
+    data = _aligned_malloc(n, 32);
+#else
     const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
     if (result != 0) {
         QNN_LOG_WARN("%s: error: posix_memalign failed\n", __func__);
         return nullptr;
     }
+#endif
 
     return data;
 }
@@ -4136,12 +4235,20 @@ static ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer(ggml_back
 
     ggml_backend_qnn_buffer_context * ctx = new ggml_backend_qnn_buffer_context;
 
+#ifdef WIN32
+    const size_t size_page = 4096;
+    size_t size_aligned = size;
+    if ((size_aligned % size_page) != 0) {
+        size_aligned += (size_page - (size_aligned % size_page));
+    }
+#else
     const size_t size_page = sysconf(_SC_PAGESIZE);
 
     size_t size_aligned = size;
     if ((size_aligned % size_page) != 0) {
         size_aligned += (size_page - (size_aligned % size_page));
     }
+#endif
 
     //QNN_LOG_DEBUG("size %d, %d MB", size_aligned, size_aligned / (1 << 20));
 
@@ -4538,7 +4645,11 @@ static void ggml_graph_compute_thread_sync_node(int * node_n, struct ggml_comput
 
     while (true) {
         if (do_yield) {
+#ifdef WIN32
+            Sleep(0);
+#else
             sched_yield();
+#endif
         }
 
         * node_n = atomic_load(&state->shared->node_n);
@@ -4553,7 +4664,11 @@ static void ggml_graph_compute_thread_sync_task(int * task_phase, struct ggml_co
 
     while (true) {
         if (do_yield) {
+#ifdef WIN32
+            Sleep(0);
+#else
             sched_yield();
+#endif
         }
 
         * task_phase = atomic_load(&state->shared->node_task);
@@ -4982,12 +5097,7 @@ static ggml_status ggml_backend_qnn_graph_compute_multithread(ggml_backend_t bac
     // create thread pool
     if (n_threads > 1) {
         for (int j = 1; j < n_threads; ++j) {
-            workers[j] = (struct ggml_compute_state) {
-                    .thrd   = 0,
-                    .ith = j,
-                    .shared = &state_shared,
-                    .ec = GGML_STATUS_SUCCESS,
-            };
+            workers[j] = { 0, j, &state_shared, GGML_STATUS_SUCCESS };
 
             const int rc = pthread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
             GGML_ASSERT(rc == 0);
@@ -5154,7 +5264,7 @@ ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t device_index) {
 }
 
 // if build for windows, PATH_DELIMITER is '\'
-#ifdef _WINDOWS_
+#ifdef WIN32
 
 #define PATH_DELIMITER '\\'
 #define QNN_SYS_LIB_NAME "QnnSystem.dll"
diff --git a/src/ggml-qnn.h b/src/ggml-qnn/ggml-qnn.h
similarity index 100%
rename from src/ggml-qnn.h
rename to src/ggml-qnn/ggml-qnn.h
diff --git a/src/ggml-qnn/pthread-shim.h b/src/ggml-qnn/pthread-shim.h
new file mode 100644
index 0000000..eb547b8
--- /dev/null
+++ b/src/ggml-qnn/pthread-shim.h
@@ -0,0 +1,199 @@
+#ifdef _MSC_VER
+
+#include <windows.h>
+#include <sys/timeb.h>
+#include <process.h>
+
+typedef HANDLE pthread_mutex_t;
+typedef int pthread_condattr_t;
+typedef HANDLE pthread_t;
+typedef DWORD pthread_attr_t;
+
+#define pthread_create(thrp, attr, func, arg)                               \
+    (((*(thrp) = CreateThread(NULL, 0,                                     \
+        (LPTHREAD_START_ROUTINE)(func), (arg), 0, NULL)) == NULL) ? -1 : 0)
+#define pthread_join(thr, statusp)                                          \
+    ((WaitForSingleObject((thr), INFINITE) == WAIT_OBJECT_0) &&            \
+    ((statusp == NULL) ? 0 :                            \
+    (GetExitCodeThread((thr), (LPDWORD)(statusp)) ? 0 : -1)))
+
+#define PTHREAD_MUTEX_INITIALIZER {(void*)-1,-1,0,0,0,0}
+#define pthread_mutex_lock(pobject) WaitForSingleObject(*pobject,INFINITE)
+#define pthread_mutex_unlock(pobject) ReleaseMutex(*pobject)
+#define pthread_mutex_init(pobject,pattr) (*pobject=CreateMutex(NULL,FALSE,NULL))
+#define pthread_mutex_destroy(pobject) CloseHandle(*pobject)
+
+/* Windows doesn't have this, so declare it ourselves. */
+// vs2022 has this struct
+//typedef struct timespec {
+//	/* long long in windows is the same as long in unix for 64bit */
+//	long long tv_sec;
+//	long long tv_nsec;
+//} timespec;
+
+typedef struct {
+	int waiters_count_;
+	// Number of waiting threads.
+
+	CRITICAL_SECTION waiters_count_lock_;
+	// Serialize access to <waiters_count_>.
+
+	HANDLE sema_;
+	// Semaphore used to queue up threads waiting for the condition to
+	// become signaled. 
+
+	HANDLE waiters_done_;
+	// An auto-reset event used by the broadcast/signal thread to wait
+	// for all the waiting thread(s) to wake up and be released from the
+	// semaphore. 
+
+	size_t was_broadcast_;
+	// Keeps track of whether we were broadcasting or signaling.  This
+	// allows us to optimize the code if we're just signaling.
+} pthread_cond_t;
+
+static unsigned long long _pthread_time_in_ms(void)
+{
+	struct __timeb64 tb;
+
+	_ftime64(&tb);
+
+	return tb.time * 1000 + tb.millitm;
+}
+
+static unsigned long long _pthread_time_in_ms_from_timespec(const struct timespec *ts)
+{
+	unsigned long long t = ts->tv_sec * 1000;
+	t += ts->tv_nsec / 1000000;
+
+	return t;
+}
+
+static unsigned long long _pthread_rel_time_in_ms(const struct timespec *ts)
+{
+	unsigned long long t1 = _pthread_time_in_ms_from_timespec(ts);
+	unsigned long long t2 = _pthread_time_in_ms();
+
+	/* Prevent underflow */
+	if (t1 < t2) return 0;
+	return t1 - t2;
+}
+
+static int pthread_cond_init(pthread_cond_t *cv, const pthread_condattr_t * ignore)
+{
+	cv->waiters_count_ = 0;
+	cv->was_broadcast_ = 0;
+	cv->sema_ = CreateSemaphore(NULL, // no security
+				       0, // initially 0
+			      0x7fffffff, // max count
+			           NULL); // unnamed 
+	if (cv->sema_ == NULL)
+		return GetLastError();
+
+	InitializeCriticalSection(&cv->waiters_count_lock_);
+	cv->waiters_done_ = CreateEvent(NULL, // no security
+		                       FALSE, // auto-reset
+		                       FALSE, // non-signaled initially
+		                       NULL); // unnamed
+	return (cv->waiters_done_ == NULL) ? GetLastError() : 0;
+}
+
+static int pthread_cond_destroy(pthread_cond_t *cond)
+{
+	CloseHandle(cond->sema_);
+	DeleteCriticalSection(&cond->waiters_count_lock_);
+	return (CloseHandle(cond->waiters_done_) == 0) ? GetLastError() : 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t *cv)
+{
+	int have_waiters;
+	EnterCriticalSection(&(cv->waiters_count_lock_));
+	have_waiters = cv->waiters_count_ > 0;
+	LeaveCriticalSection(&cv->waiters_count_lock_);
+
+	// If there aren't any waiters, then this is a no-op.  
+	if (have_waiters){
+		return (ReleaseSemaphore(cv->sema_, 1, 0) == 0) ? GetLastError() : 0;
+	}
+	else
+		return 0;
+}
+
+static int pthread_cond_broadcast(pthread_cond_t *cv)
+{
+	// This is needed to ensure that <waiters_count_> and <was_broadcast_> are
+	// consistent relative to each other.
+	int have_waiters = 0;
+	EnterCriticalSection(&cv->waiters_count_lock_);
+
+	if (cv->waiters_count_ > 0) {
+		// We are broadcasting, even if there is just one waiter...
+		// Record that we are broadcasting, which helps optimize
+		// <pthread_cond_wait> for the non-broadcast case.
+		cv->was_broadcast_ = 1;
+		have_waiters = 1;
+	}
+
+	if (have_waiters) {
+		// Wake up all the waiters atomically.
+		ReleaseSemaphore(cv->sema_, cv->waiters_count_, 0);
+
+		LeaveCriticalSection(&cv->waiters_count_lock_);
+
+		// Wait for all the awakened threads to acquire the counting
+		// semaphore. 
+		WaitForSingleObject(cv->waiters_done_, INFINITE);
+		// This assignment is okay, even without the <waiters_count_lock_> held 
+		// because no other waiter threads can wake up to access it.
+		cv->was_broadcast_ = 0;
+	}
+	else
+		LeaveCriticalSection(&cv->waiters_count_lock_);
+
+	return 0;
+}
+
+static int pthread_cond_wait(pthread_cond_t *cv, pthread_mutex_t *external_mutex)
+{
+	int last_waiter;
+	// Avoid race conditions.
+	EnterCriticalSection(&cv->waiters_count_lock_);
+	cv->waiters_count_++;
+	LeaveCriticalSection(&cv->waiters_count_lock_);
+
+	// This call atomically releases the mutex and waits on the
+	// semaphore until <pthread_cond_signal> or <pthread_cond_broadcast>
+	// are called by another thread.
+	SignalObjectAndWait(*external_mutex, cv->sema_, INFINITE, FALSE);
+
+	// Reacquire lock to avoid race conditions.
+	EnterCriticalSection(&cv->waiters_count_lock_);
+
+	// We're no longer waiting...
+	cv->waiters_count_--;
+
+	// Check to see if we're the last waiter after <pthread_cond_broadcast>.
+	last_waiter = cv->was_broadcast_ && cv->waiters_count_ == 0;
+
+	LeaveCriticalSection(&cv->waiters_count_lock_);
+
+	// If we're the last waiter thread during this particular broadcast
+	// then let all the other threads proceed.
+	if (last_waiter)
+		// This call atomically signals the <waiters_done_> event and waits until
+		// it can acquire the <external_mutex>.  This is required to ensure fairness. 
+		SignalObjectAndWait(cv->waiters_done_, *external_mutex, INFINITE, FALSE);
+	else
+		// Always regain the external mutex since that's the guarantee we
+		// give to our callers. 
+		WaitForSingleObject(*external_mutex, INFINITE);
+
+	return 0;
+}
+
+#else // linux
+
+#include <pthread.h>
+
+#endif
\ No newline at end of file