diff --git a/autotest/la_tests.py b/autotest/la_tests.py
index f8538c5b..42d7cea2 100644
--- a/autotest/la_tests.py
+++ b/autotest/la_tests.py
@@ -607,7 +607,8 @@ def ends_freyberg_test(tmp_path):
     ends = pyemu.EnDS(pst=pst, sim_ensemble=oe, obscov=os.path.join(test_d, "obs.unc"),predictions=predictions)
 
 
-def ends_freyberg_dsi_test(tmp_path):
+
+def ends_run_freyberg_dsi(tmp_path,nst=False,nst_extrap=None,ztz=False,energy=1.0):
     import pyemu
     import os 
     test_d = "ends_master"
@@ -621,58 +622,44 @@ def ends_freyberg_dsi_test(tmp_path):
     oe_name = pst_name.replace(".pst", ".0.obs.csv")
     oe = pyemu.ObservationEnsemble.from_csv(pst=pst, filename=oe_name).iloc[:100, :]
 
-    
     ends = pyemu.EnDS(pst=pst, sim_ensemble=oe,verbose=True)
     t_d = os.path.join(tmp_path,"dsi_template")
-    ends.prep_for_dsi(t_d=t_d)
+
+    ends.prep_for_dsi(t_d=t_d,
+                      use_ztz=ztz,
+                      apply_normal_score_transform=nst,
+                      nst_extrap=nst_extrap,
+                      energy=energy)
     # copy exe to dsi_template
     #shutil.copy2(os.path.join(test_d,"pestpp-ies.exe"),os.path.join(t_d,"pestpp-ies.exe"))
-
+    filename=os.path.join(t_d,"dsi.0.obs.csv")
+    if os.path.exists(filename):
+        os.remove(filename)
     pst = pyemu.Pst(os.path.join(t_d,"dsi.pst"))
-    pst.control_data.noptmax = 0
+    pst.control_data.noptmax = -1
     pst.write(os.path.join(t_d,"dsi.pst"),version=2)
-    #pyemu.os_utils.run("pestpp-ies dsi.pst",cwd="dsi_template")
-    m_d = os.path.join(tmp_path,"master_dsi")
-    pyemu.os_utils.start_workers(t_d,"pestpp-ies","dsi.pst",num_workers=15,worker_root=tmp_path,
-                                 master_dir=m_d)
+    pyemu.os_utils.run("pestpp-ies dsi.pst",cwd=t_d)
 
-    # run test wtih truncated svd
-    ends.prep_for_dsi(t_d=t_d,truncated_svd=True)
-    # copy exe to dsi_template
-    #shutil.copy2(os.path.join(test_d,"pestpp-ies.exe"),os.path.join(t_d,"pestpp-ies.exe"))
+    #read in the results
+    oe = pyemu.ObservationEnsemble.from_csv(pst=pst, filename=os.path.join(t_d,"dsi.0.obs.csv"))
+    assert oe.shape[0]==50, f"{50-oe.shape} failed runs"
+    phi_vector = oe.phi_vector.sort_values().values
+    assert phi_vector[0] != phi_vector[1],phi_vector
 
-    pst = pyemu.Pst(os.path.join(t_d,"dsi.pst"))
-    pst.control_data.noptmax = 0
-    pst.write(os.path.join(t_d,"dsi.pst"),version=2)
-    pyemu.os_utils.start_workers(t_d,"pestpp-ies","dsi.pst",num_workers=15,worker_root=tmp_path,
-                                 master_dir=m_d)
+def ends_freyberg_dsi_test(tmp_path):
+    ends_run_freyberg_dsi(tmp_path)
 
+def ends_freyberg_dsi_nst_test(tmp_path):
+    ends_run_freyberg_dsi(tmp_path,nst=True,nst_extrap=None)
 
-    # run test wtih normal score transform
-    ends.prep_for_dsi(t_d=t_d,apply_normal_score_transform=True)
-    # copy exe to dsi_template
-    #shutil.copy2(os.path.join(test_d,"pestpp-ies.exe"),os.path.join(t_d,"pestpp-ies.exe"))
-    
-    pst = pyemu.Pst(os.path.join(t_d,"dsi.pst"))
-    pst.control_data.noptmax = 0
-    pst.write(os.path.join(t_d,"dsi.pst"),version=2)
-    pyemu.os_utils.start_workers(t_d,"pestpp-ies","dsi.pst",num_workers=15,worker_root=tmp_path,
-                                 master_dir=m_d)
-    
-    # run test with log-transform
-    pst = pyemu.Pst(pst_name)
-    pst.pestpp_options["predictions"] = predictions
-    pst.observation_data["obstransform"] = "log"
-    ends = pyemu.EnDS(pst=pst, sim_ensemble=oe,verbose=True)
-    ends.prep_for_dsi(t_d=t_d,apply_normal_score_transform=False)
-    # copy exe to dsi_template
-    #shutil.copy2(os.path.join(test_d,"pestpp-ies.exe"),os.path.join(t_d,"pestpp-ies.exe"))
-    
-    pst = pyemu.Pst(os.path.join(t_d,"dsi.pst"))
-    pst.control_data.noptmax = 0
-    pst.write(os.path.join(t_d,"dsi.pst"),version=2)
-    pyemu.os_utils.start_workers(t_d,"pestpp-ies","dsi.pst",num_workers=15,worker_root=tmp_path,
-                                 master_dir=m_d)
+def ends_freyberg_dsi_extrap_test(tmp_path):
+    ends_run_freyberg_dsi(tmp_path,nst=True,nst_extrap='quadratic')
+
+def ends_freyberg_dsi_ztz_test(tmp_path):
+    ends_run_freyberg_dsi(tmp_path,ztz=True)
+
+def ends_freyberg_dsi_svd_test(tmp_path):
+    ends_run_freyberg_dsi(tmp_path,ztz=True,energy=0.999)
 
 
 def plot_freyberg_dsi():
@@ -738,15 +725,28 @@ def dsi_normscoretransform_test():
     oe = pyemu.ObservationEnsemble.from_csv(pst=pst, filename=oe_name).iloc[:100, :]
 
     nstval = randrealgen_optimized(oe.shape[0], 1e-7, 1e4)
+    window_size=3   
+    if oe.shape[0]>40:
+        window_size=5                    
+    if oe.shape[0]>90:
+        window_size=7
+    if oe.shape[0]>200:
+        window_size=9            
     for name in oe.columns:
         print("transforming:",name)
         sorted_values = oe._df.loc[:,name].sort_values().copy()
+        #if all values are the same, skip
+        if sorted_values.iloc[0] == sorted_values.iloc[-1]:
+            print("all values are the same, skipping")
+            continue
+        sorted_values.loc[:] = pyemu.eds.moving_average_with_endpoints(sorted_values.values, window_size)
         transformed_values = np.asarray([normal_score_transform(nstval, sorted_values, value)[0] for value in sorted_values])
         backtransformed_values = np.asarray([inverse_normal_score_transform(nstval, sorted_values, value)[0] for value in transformed_values])
         
         diff = backtransformed_values-sorted_values
         assert max(abs(diff))<1e-7, backtransformed_values
 
+
 if __name__ == "__main__":
     #dsi_normscoretransform_test()
     #ends_freyberg_dev()
diff --git a/pyemu/eds.py b/pyemu/eds.py
index 49ed06e3..298c813c 100644
--- a/pyemu/eds.py
+++ b/pyemu/eds.py
@@ -11,7 +11,7 @@
 from pyemu.mat.mat_handler import Matrix, Jco, Cov
 from pyemu.pst.pst_handler import Pst
 from pyemu.utils.os_utils import _istextfile,run
-from pyemu.utils.helpers import normal_score_transform,randrealgen_optimized,inverse_normal_score_transform
+from pyemu.utils.helpers import normal_score_transform,randrealgen_optimized
 from .logger import Logger
 
 
@@ -496,7 +496,9 @@ def get_posterior_prediction_moments(self, obslist_dict=None,sim_ensemble=None,i
         return mean_dfs,dfstd,dfper
 
 
-    def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_transform=False,truncated_svd=False):
+    def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",
+                     apply_normal_score_transform=False,nst_extrap=None,
+                     use_ztz=False,energy=1.0):
         """Setup a new PEST interface for the data-space inversion process.
         If the observation data in the Pst object has a "obstransform" column, then observations for which "log" is specified will be subject to log-transformation. 
         If the `apply_normal_score_transform` flag is set to `True`, then the observations and predictions will be subject to a normal score transform.
@@ -508,8 +510,11 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
             t_d (`str`): template directory to setup the DSI model + pest files in.  Default is `dsi_template`
             apply_normal_score_transform (`bool`): flag to apply a normal score transform to the observations
                 and predictions.  Default is `False`
-            truncated_svd (`bool`): flag to use a truncated SVD for the pseudo-inverse of the deviations matrix.
+            nst_extrap (`str`): flag to apply extrapolation to the normal score transform. Can be None, 'linear' or 'quadratic'. Default is None. 
+            use_ztz (`bool`): flag to use the condensed ZtZ matrix for SVD. The ZtZ matrix has dimensions nreal*nreal, instead of the nreal*nobs dimensions of Z. 
+                This makes the SVD computation faster and more memory efficient when nobs >> nreal. 
                 Default is `False`
+            energy (`float`): energy threshold for truncating the sqrt(C) matrix.  Default is `1.0` which applys no truncation. 
 
         Example::
 
@@ -525,12 +530,15 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
         if sim_ensemble is None:
             sim_ensemble = self.sim_ensemble.copy()
 
+        if nst_extrap is not None:
+            assert nst_extrap in ["linear","quadratic"], "nst_extrap must be None, 'linear' or 'quadratic'"
+
         if os.path.exists(t_d):
             self.logger.warn("EnDS.prep_for_dsi(): t_d '{0}' exists, removing...".format(t_d))
             shutil.rmtree(t_d)
         os.makedirs(t_d)
 
-        self.logger.log("getting deviations")
+        
         nz_names = self.pst.nnz_obs_names
         snz_names = set(nz_names)
         z_names = [n for n in self.pst.obs_names if n not in snz_names]
@@ -538,39 +546,68 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
         names.extend(nz_names)
         names.sort()
 
+        # make sure names are sorted
+        sim_ensemble = sim_ensemble.loc[:,names]
+        
+        self.logger.log("applying transformations")
         # implement log-transform/offset and normal score transform
         transf_names = nz_names.copy()
         transf_names.extend(self.predictions)
         
         if "obstransform" in self.pst.observation_data.columns:
-            obs = self.pst.observation_data.loc[transf_names].copy()
+            obs = self.pst.observation_data.copy()
+            #make sure names are ordered
+            obs = obs.loc[names,:]
+            #TODO: deal with "scale" and user-specified "offset"
             obs["offset"] = 0.0 #TODO: more elegant? incase all 'none' are passed...
             obsnmes = obs.loc[obs.obstransform=='log'].obsnme.values
             if len(obsnmes) > 0:
                 for name in obsnmes:
                     #TODO: make more efficient
                     self.logger.log("applying obs log-transform to:"+name)
-                    values = sim_ensemble.loc[:,name].values
+                    values = sim_ensemble.loc[:,name].astype(float).values
                     offset = abs(min(values))+1.0 #arbitrary; enforce positive values
                     values+=offset
                     assert min(values)>0, "values must be positive. min value is "+str(min(values))
                     sim_ensemble.loc[:,name] = np.log10(values)
                     obs.loc[obs.obsnme==name,'offset'] = offset
             obs[['obsnme','obsval','obstransform','offset']].to_csv(os.path.join(t_d,"dsi_obs_transform.csv"),index=False)
-
+            #numpy binary for i/o speed
+            np.save(os.path.join(t_d,"dsi_obs_offset.npy"),
+                    obs.offset.values, 
+                    allow_pickle=False, fix_imports=True)
+            obs['flag'] = 0
+            obs.loc[obs.obstransform=='log', "flag"] = 1
+            np.save(os.path.join(t_d,"dsi_obs_log.npy"),
+                    obs.flag.values, 
+                    allow_pickle=False, fix_imports=True)
+            
         if apply_normal_score_transform:
             # prepare for normal score transform
-            nstval = randrealgen_optimized(sim_ensemble.shape[0], 1e-7, 1e4)
-
+            nstval = randrealgen_optimized(sim_ensemble.shape[0])
             back_transform_df = pd.DataFrame()
             self.logger.log("applying normal score transform to non-zero obs and predictions")
             #TODO: make more efficient
-            for name in transf_names:
+            for name in  transf_names:
                 print("transforming:",name)
                 values = sim_ensemble._df.loc[:,name].copy()
                 values.sort_values(inplace=True)
-                transformed_values = [normal_score_transform(nstval, values.values, v)[0] for v in values.values]
-                #transformed_values, sorted_values, sorted_idxs = normal_score_transform(values) #transformed data retains the same order as the original data
+                if values.iloc[0] != values.iloc[-1]:
+                    # apply smoothing as per DSI2; window sizes are arbitrary...                
+                    window_size=3   
+                    if values.shape[0]>40:
+                        window_size=5                    
+                    if values.shape[0]>90:
+                        window_size=7
+                    if values.shape[0]>200:
+                        window_size=9            
+                    #print("window size:",window_size,values.shape[0])     
+                    values.loc[:] = moving_average_with_endpoints(values.values, window_size)
+                    transformed_values = [normal_score_transform(nstval, values.values, v)[0] for v in values.values]
+                    #transformed_values, sorted_values, sorted_idxs = normal_score_transform(values) #transformed data retains the same order as the original data
+                elif values.iloc[0] == values.iloc[-1]:
+                    print("all values are the same, skipping nst")
+                    transformed_values = values.values
                 sim_ensemble.loc[values.index,name] = transformed_values
                 df = pd.DataFrame()
                 df['real'] = values.index
@@ -579,27 +616,31 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
                 df['nstval'] = nstval
                 df['obsnme'] = name
                 back_transform_df=pd.concat([back_transform_df,df],ignore_index=True)
-            back_transform_df.to_csv(os.path.join(t_d,"dsi_obs_backtransform.csv"),index=False)
-
-        Z = sim_ensemble.get_deviations() / np.sqrt(float(sim_ensemble.shape[0] - 1))
-        Z = Z.loc[:,names]
-
-        self.logger.log("getting deviations")
-
-        self.logger.log("pseudo inv of deviations matrix")
+            #back_transform_df.to_csv(os.path.join(t_d,"dsi_obs_backtransform.csv"),index=False)
+            #numpy binary for speed
+            np.save(os.path.join(t_d,"dsi_obs_backtransformvals.npy"),
+                    back_transform_df[['sorted_values',"nstval"]].values, 
+                    allow_pickle=False, fix_imports=True)
+            np.save(os.path.join(t_d,"dsi_obs_backtransformobsnmes.npy"),
+                    back_transform_df['obsnme'].values, 
+                    allow_pickle=True, fix_imports=True)
+        
+        self.logger.log("applying transformations")
 
-        if truncated_svd:
-            deltad = Matrix.from_dataframe(Z)
-            U, S, V = deltad.pseudo_inv_components(maxsing=Z.shape[0],
-                                                   eigthresh=1e-5)
-            S = S.x
-            V = V.x
+        self.logger.log("computing projection matrix")
+        if use_ztz:
+            self.logger.log("using ztz approach...")
+            pmat, s = compute_using_ztz(sim_ensemble)
+            self.logger.log("using ztz approach...")
         else:
-            U, S, V = np.linalg.svd(Z.values, full_matrices=False)
-            V = V.transpose()
-            S = np.diag(S)
-        self.logger.log("pseudo inv of deviations matrix")
+            self.logger.log("using z approach...")
+            pmat, s = compute_using_z(sim_ensemble)
+            self.logger.log("using z approach...")
+        self.logger.log("computing projection matrix")
 
+        self.logger.log("applying truncation...")
+        apply_energy_based_truncation(energy,s,pmat)
+        self.logger.log("applying truncation...")
 
         self.logger.log("creating tpl files")
         dsi_in_file = os.path.join(t_d, "dsi_pars.csv")
@@ -609,7 +650,7 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
         ftpl.write("ptf ~\n")
         fin.write("parnme,parval1\n")
         ftpl.write("parnme,parval1\n")
-        npar = S.shape[0]
+        npar = s.shape[0]
         dsi_pnames = []
         for i in range(npar):
             pname = "dsi_par{0:04d}".format(i)
@@ -620,6 +661,8 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
         ftpl.close()
 
         mn_vec = sim_ensemble.mean(axis=0)
+        # check that sim_ensemble has names ordered
+        assert (mn_vec.index.values == names).all(), "sim_ensemble names are not ordered"
         mn_in_file = os.path.join(t_d, "dsi_pr_mean.csv")
         mn_tpl_file = mn_in_file + ".tpl"
         fin = open(mn_in_file, 'w')
@@ -638,15 +681,16 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template",apply_normal_score_tr
         self.logger.log("creating tpl files")
 
         self.logger.log("saving proj mat")
-
-        #pmat = U * S
-        pmat = np.dot(V, S)
         #row_names = ["sing_vec_{0}".format(i) for i in range(pmat.shape[0])]
         pmat = Matrix(x=pmat,col_names=dsi_pnames,row_names=names)
         pmat.col_names = dsi_pnames
-        proj_name = "dsi_proj_mat.jcb" # dont change this name!!!
+        #proj_name = "dsi_proj_mat.jcb" # dont change this name!!!
+        proj_name = "dsi_proj_mat.npy" # dont change this name!!!
         proj_path = os.path.join(t_d,proj_name)
-        pmat.to_coo(proj_path)
+        #pmat.to_coo(proj_path)
+        # use numpy for speed
+        np.save(os.path.join(t_d,proj_name), pmat.x, allow_pickle=False, fix_imports=True)
+
         self.logger.statement("projection matrix dimensions:"+str(pmat.shape))
         self.logger.statement("projection matrix saved to "+proj_path)
         self.logger.log("saving proj mat")
@@ -657,35 +701,37 @@ def dsi_forward_run():
             import os
             import numpy as np
             import pandas as pd
-            from pyemu.mat.mat_handler import Matrix
             from pyemu.utils.helpers import inverse_normal_score_transform
-            pmat = Matrix.from_binary("dsi_proj_mat.jcb")
+            pmat = np.load("dsi_proj_mat.npy")
             pvals = pd.read_csv("dsi_pars.csv",index_col=0)
-            pvals = pvals.loc[pmat.col_names,:]
             ovals = pd.read_csv("dsi_pr_mean.csv",index_col=0)
-            ovals = ovals.loc[pmat.row_names,:]
-            sim_vals = ovals + np.dot(pmat.x,pvals.values)
-            filename = "dsi_obs_backtransform.csv"
+            sim_vals = ovals + np.dot(pmat,pvals.values)
+            filename = "dsi_obs_backtransformvals.npy"
             if os.path.exists(filename):
-                back_transform_df = pd.read_csv(filename)
                 print("applying back-transform")
-                obsnmes = back_transform_df.obsnme.unique()
-                back_vals = [inverse_normal_score_transform(
-                                                back_transform_df.loc[back_transform_df['obsnme']==o,'nstval'].values, 
-                                                back_transform_df.loc[back_transform_df['obsnme']==o,'sorted_values'].values, 
-                                                sim_vals.loc[o].mn
+                backtransformvals = np.load("dsi_obs_backtransformvals.npy")
+                backtransformobsnmes = np.load("dsi_obs_backtransformobsnmes.npy",allow_pickle=True)
+                obsnmes = np.unique(backtransformobsnmes)
+                back_vals = [
+                            inverse_normal_score_transform(
+                                                backtransformvals[np.where(backtransformobsnmes==o)][:,1],
+                                                backtransformvals[np.where(backtransformobsnmes==o)][:,0],
+                                                sim_vals.loc[o].mn,
+                                                extrap=None
                                                 )[0] 
-                             for o in obsnmes]       
+                            for o in obsnmes
+                            ]     
                 sim_vals.loc[obsnmes,'mn'] = back_vals
             if os.path.exists("dsi_obs_transform.csv"):
                 print("reversing log-transform")
-                obs = pd.read_csv("dsi_obs_transform.csv")
-                obsnmes = obs.loc[obs.obstransform=='log'].obsnme.unique()
-                back_vals = [
-                            (10**sim_vals.loc[x].mn) - obs.loc[obs.obsnme==x,'offset'].values[0] 
-                            for x in obsnmes
-                            ]
-                sim_vals.loc[obsnmes,'mn'] = back_vals
+                offset = np.load("dsi_obs_offset.npy")
+                log_trans = np.load("dsi_obs_log.npy")
+                assert log_trans.shape[0] == sim_vals.mn.values.shape[0], f"log transform shape mismatch: {log_trans.shape[0]},{sim_vals.mn.values.shape[0]}"
+                assert offset.shape[0] == sim_vals.mn.values.shape[0], f"offset transform shape mismatch: {offset.shape[0]},{sim_vals.mn.values.shape[0]}"
+                vals = sim_vals.mn.values
+                vals[np.where(log_trans==1)] = 10**vals[np.where(log_trans==1)]
+                vals-= offset
+                sim_vals.loc[:,'mn'] = vals
             print(sim_vals)
             sim_vals.to_csv("dsi_sim_vals.csv")
 
@@ -713,9 +759,16 @@ def dsi_forward_run():
         par = pst.parameter_data
         dsi_pars = par.loc[par.parnme.str.startswith("dsi_par"),"parnme"]
         par.loc[dsi_pars,"parval1"] = 0
-        par.loc[dsi_pars,"parubnd"] = 2.5
-        par.loc[dsi_pars,"parlbnd"] = -2.5
+        par.loc[dsi_pars,"parubnd"] = 10.0
+        par.loc[dsi_pars,"parlbnd"] = -10.0
         par.loc[dsi_pars,"partrans"] = "none"
+        with open(os.path.join(t_d,"dsi.unc"),'w') as f:
+            f.write("START STANDARD_DEVIATION\n")
+            for p in dsi_pars:
+                f.write("{0} 1.0\n".format(p))
+            f.write("END STANDARD_DEVIATION")
+        pst.pestpp_options['parcov'] = "dsi.unc"
+
         mn_pars = par.loc[par.parnme.str.startswith("dsi_prmn"),"parnme"]
         par.loc[mn_pars,"partrans"] = "fixed"
         for pname,pval in mn_dict.items():
@@ -731,11 +784,13 @@ def dsi_forward_run():
         pst.model_command = "python forward_run.py"
         self.logger.log("creating Pst")
         import inspect
+        #print([l for l in inspect.getsource(dsi_forward_run).split("\n")])
         lines = [line[12:] for line in inspect.getsource(dsi_forward_run).split("\n")][1:]
-        
-
         with open(os.path.join(t_d,"forward_run.py"),'w') as f:
             for line in lines:
+                if nst_extrap is not None:
+                    if "extrap=None" in line:
+                        line = line.replace("None",f"'{nst_extrap}'") 
                 f.write(line+"\n")
         pst.write(os.path.join(t_d,"dsi.pst"),version=2)
         self.logger.statement("saved pst to {0}".format(os.path.join(t_d,"dsi.pst")))
@@ -747,14 +802,107 @@ def dsi_forward_run():
         return pst
 
 
+def compute_using_z(sim_ensemble):
+    z = sim_ensemble.get_deviations() / np.sqrt(float(sim_ensemble._df.shape[0] - 1))
+    z = z.values
+    u, s, v = np.linalg.svd(z, full_matrices=False)
+    us = np.dot(v.T, np.diag(s))
+    return us,s
+
+def compute_using_ztz(sim_ensemble):
+    # rval are the transformed obs values
+    rval = sim_ensemble._df.copy()
+    #mu2 is the mean of the transformed obs values
+    mu2 = rval.mean()
+    #adjust rval by subtracting mu2
+    rval -= mu2
+    #divide rval by the sqrt of nreal-1
+    nreal = rval.shape[0]
+    rval = rval*np.sqrt(1/(nreal-1))
+    # rval.T to match pest utils implementation
+    z = rval.T.values
+    # Compute the ZtZ matrix
+    ztz = np.dot(z.T,z)
+    assert ztz.shape[0] == z.shape[1], "ZtZ matrix is not square"
+    assert ztz.shape[0] == sim_ensemble.shape[0], "ZtZ matrix is not nreal*nreal"
+
+    #We now do SVD on ZtZ.
+    print("doing SVD on ZtZ")
+    u, s2, v = np.linalg.svd(ztz, full_matrices=False)
+    s =  np.sqrt(s2)
+    s[z.shape[0]:] = 0  #truncation to match compute_using_z()
+
+    # formulate the sqrt of the covariance matrix
+    us = np.dot(z,u)
+    return us, s
+
+def apply_energy_based_truncation(energy,s,us):
+    if energy >= 1.0:
+        print("Warning: energy>=1.0, no truncation applied")
+        return us
+    # Determine where to truncate
+    # Determine nn
+    if us.shape[0]==us.shape[1]:
+        nn = us.shape[0] - 1
+    else:
+        nobs = us.shape[0]
+        nreal = us.shape[1]
+        nn = min(nobs, nreal) - 1
+    # Compute total_energy
+    total_energy = np.sum((np.sqrt(s))[:nn])
+    # Find energy truncation point
+    ntrunc = np.where((np.sqrt(s)).cumsum()/total_energy<=energy)[0].shape[0]
+    # Initialize threshold
+    #s1 = s[0]
+    #thresh = 1.0e-7 * s1 #NOTE: JDoh's implementation uses an additional level of truncation
+    #ntrunc = min(np.where(s>=thresh)[0][0], ntrunc)+1
+    ntrunc=ntrunc+1
+    if ntrunc>=us.shape[1]:
+        print("ntrunc>=us.shape[1], no truncation applied")
+    else:
+        print("truncating to {0} singular values".format(ntrunc))
+        # Apply threshold logic
+        us = us[:,:ntrunc]
+    return us
+
+def moving_average_with_endpoints(y_values, window_size):
+    # Ensure the window size is odd
+    if window_size % 2 == 0:
+        raise ValueError("window_size must be odd")
+    # Calculate half-window size
+    half_window = window_size // 2
+    # Initialize the output array
+    smoothed_y = np.zeros_like(y_values)
+    # Handle the endpoints
+    for i in range(0,half_window):
+        # Start
+        smoothed_y[i] = np.mean(y_values[:i + half_window ])
+    for i in range(1,half_window+1):
+        # End
+        smoothed_y[-i] = np.mean(y_values[::-1][:i + half_window +1])
+    # Handle the middle part with full window
+    for i in range(half_window, len(y_values) - half_window):
+        smoothed_y[i] = np.mean(y_values[i - half_window:i + half_window])
+    #Enforce endoints
+    smoothed_y[0] = y_values[0]
+    smoothed_y[-1] = y_values[-1]
+    # Ensure uniqueness by adding small increments if values are duplicated
+    #NOTE: this is a hack to ensure uniqueness in the normal score transform
+    smoothed_y = make_unique(smoothed_y, delta=1e-10)
+    return smoothed_y
+
+
+def make_unique(arr, delta=1e-10):
+    """
+    Modifies a sorted numpy array in-place to ensure all elements are unique.
+    
+    Parameters:
+    arr (np.ndarray): The sorted numpy array.
+    delta (float): The minimum increment to apply to duplicate elements. 
+                   Default is a very small value (1e-10).
+    """
+    for i in range(1, len(arr)):
+        if arr[i] <= arr[i - 1]:
+            arr[i] = arr[i - 1] + delta
 
-
-
-
-        
-
-
-
-
-
-
+    return arr
diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py
index 07ff93b1..1b73b6b8 100644
--- a/pyemu/utils/helpers.py
+++ b/pyemu/utils/helpers.py
@@ -4017,8 +4017,11 @@ def randrealgen_optimized(nreal, tol=1e-7, max_samples=1000000):
     """
     rval = np.zeros(nreal)
     nsamp = 0
-    numsort = (nreal + 1) // 2
-
+    # if nreal is even add 1
+    if nreal % 2 == 0:
+        numsort = (nreal + 1) // 2
+    else:
+        numsort = nreal // 2
     while nsamp < max_samples:
         nsamp += 1
         work1 = np.random.normal(size=nreal)
@@ -4036,7 +4039,10 @@ def randrealgen_optimized(nreal, tol=1e-7, max_samples=1000000):
             rval[:numsort] = work1[:numsort]
     
     rval[:numsort] /= nsamp
-    rval[numsort:] = -rval[:numsort][::-1]
+    if nreal % 2 == 0:
+        rval[numsort:] = -rval[:numsort][::-1]
+    else:
+        rval[numsort+1:] = -rval[:numsort][::-1]
     
     return rval
 
@@ -4064,10 +4070,10 @@ def normal_score_transform(nstval, val, value):
     nstval = np.asarray(nstval)
     
     # if the value is outside the range of the table, return the first or last value
-    if value <= val[0]:
-        return nstval[0], 0
-    elif value >= val[-1]:
-        return nstval[-1], len(val)
+    assert value >= val[0], "Value is below the minimum value in the table."
+    assert value <= val[-1], "Value is greater than the maximum value in the table."
+    # ensure that val is sorted
+    assert np.all(np.diff(val) > 0), f"Values in the table must be sorted in ascending order:{list(zip(np.diff(val)>0,val))}"
 
     # find the rank of the value in the table
     rank = np.searchsorted(val, value, side='right') - 1
@@ -4087,23 +4093,31 @@ def normal_score_transform(nstval, val, value):
 
 def inverse_normal_score_transform(nstval, val, value, extrap='quadratic'):
     nreal = len(val)
+    # check that nstval is sorted
+    assert np.all(np.diff(nstval) > 0), "Values in the table must be sorted in ascending order"
+    # check that val is sorted
+    assert np.all(np.diff(val) > 0), "Values in the table must be sorted in ascending order"
     
     def linear_extrapolate(x0, y0, x1, y1, x):
         if x1 != x0:
             return y0 + (y1 - y0) / (x1 - x0) * (x - x0)
         return y0
 
-    def quadratic_extrapolate(x0, y0, x1, y1, x2, y2, x):
-        denom = (x0 - x) * (x1 - x) * (x2 - x)
-        if denom == 0:
-            print(x, x0, x1, x2)
+    def quadratic_extrapolate(x1, y1, x2, y2, x3, y3, x4):
+        y12=y1-y2
+        x23=x2-x3
+        y23=y2-y3
+        x12=x1-x2
+        x13=x1-x3
+        if x12==0 or x23==0 or x13==0:
             raise ValueError("Input x values must be distinct")
-
-        a = ((x - x1) * (y2 - y1) - (x - x2) * (y1 - y0)) / denom
-        b = ((x - x2) * (y1 - y0) - (x - x0) * (y2 - y1)) / denom
-        c = y0 - a * x0**2 - b * x0
-        y = a * x**2 + b * x + c
-        return y
+        a = (y12*x23-y23*x12)
+        den = x12*x23*x13
+        a = a/den
+        b = y23/x23 - a*(x2+x3)
+        c=y1-x1*(a*x1+b)
+        y4 = a*x4**2 + b*x4 + c
+        return y4
 
     ilim = 0
     if value in nstval:
@@ -4116,7 +4130,7 @@ def quadratic_extrapolate(x0, y0, x1, y1, x2, y2, x):
             value = val[0]
         elif extrap == 'linear':
             value = linear_extrapolate(nstval[0], val[0], nstval[1], val[1], value)
-            value = min(value, val[0])
+            #value = min(value, val[0])
         elif extrap == 'quadratic' and nreal >= 3:
             y_vals = np.unique(val)[:3]
             idxs = np.searchsorted(val,y_vals)
@@ -4132,7 +4146,7 @@ def quadratic_extrapolate(x0, y0, x1, y1, x2, y2, x):
             value = val[-1]
         elif extrap == 'linear':
             value = linear_extrapolate(nstval[-2], val[-2], nstval[-1], val[-1], value)
-            value = max(value, val[-1])
+            #value = max(value, val[-1])
         elif extrap == 'quadratic' and nreal >= 3:
             y_vals = np.unique(val)[-3:]
             idxs = np.searchsorted(val,y_vals)
@@ -4144,13 +4158,11 @@ def quadratic_extrapolate(x0, y0, x1, y1, x2, y2, x):
 
     else:
         rank = np.searchsorted(nstval, value) - 1
-        nstdiff = nstval[rank + 1] - nstval[rank]
-        diff = val[rank + 1] - val[rank]
-        if nstdiff <= 0.0 or diff <= 0.0:
-            value = val[rank]
-        else:
-            nstdist = value - nstval[rank]
-            value = val[rank] + (nstdist / nstdiff) * diff
+        # Get the bounding x and y values
+        x0, x1 = nstval[rank], nstval[rank + 1]
+        y0, y1 = val[rank], val[rank + 1]
+        # Perform linear interpolation
+        value = y0 + (y1 - y0) * (value - x0) / (x1 - x0)
     
     return value, ilim