diff --git a/Project2/Project2.sdf b/Project2/Project2.sdf
new file mode 100644
index 0000000..e6dd31d
Binary files /dev/null and b/Project2/Project2.sdf differ
diff --git a/Project2/Project2.sln b/Project2/Project2.sln
new file mode 100644
index 0000000..899c5bc
--- /dev/null
+++ b/Project2/Project2.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Project2", "Project2\Project2.vcxproj", "{78336453-2589-4F51-9CB0-FF46B00878A5}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {78336453-2589-4F51-9CB0-FF46B00878A5}.Debug|Win32.ActiveCfg = Debug|Win32
+ {78336453-2589-4F51-9CB0-FF46B00878A5}.Debug|Win32.Build.0 = Debug|Win32
+ {78336453-2589-4F51-9CB0-FF46B00878A5}.Release|Win32.ActiveCfg = Release|Win32
+ {78336453-2589-4F51-9CB0-FF46B00878A5}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/Project2/Project2.suo b/Project2/Project2.suo
new file mode 100644
index 0000000..1bcc0c1
Binary files /dev/null and b/Project2/Project2.suo differ
diff --git a/Project2/Project2/Debug/Project2.Build.CppClean.log b/Project2/Project2/Debug/Project2.Build.CppClean.log
new file mode 100644
index 0000000..d05165b
--- /dev/null
+++ b/Project2/Project2/Debug/Project2.Build.CppClean.log
@@ -0,0 +1,20 @@
+C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\DEBUG\PROJECT2.EXE
+C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\DEBUG\PROJECT2.ILK
+C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\DEBUG\PROJECT2.PDB
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\cl.command.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\CL.read.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\CL.write.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\link.command.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\link.read.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\link.write.1.tlog
+C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\PROJECT2\DEBUG\MAIN.OBJ
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\mt.command.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\mt.read.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\mt.write.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\prefixSum.cu.cache
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\prefixSum.cu.obj
+C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\PROJECT2\DEBUG\PROJECT2.EXE.INTERMEDIATE.MANIFEST
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\Project2.vcxprojResolveAssemblyReference.cache
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\Project2.write.1.tlog
+C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\vc100.idb
+C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\PROJECT2\DEBUG\VC100.PDB
diff --git a/Project2/Project2/Debug/Project2.log b/Project2/Project2/Debug/Project2.log
new file mode 100644
index 0000000..222b5f5
--- /dev/null
+++ b/Project2/Project2/Debug/Project2.log
@@ -0,0 +1,16 @@
+Build started 9/29/2014 6:21:42 PM.
+ 1>Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" on node 2 (clean target(s)).
+ 1>_PrepareForClean:
+ Deleting file "Debug\Project2.lastbuildstate".
+ CudaClean:
+ cmd.exe /C "C:\Users\xinjie\AppData\Local\Temp\tmpe807d7cf708c46feb7b418f14f0a1b53.cmd"
+ "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -g -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debug\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu" -clean
+
+ C:\course\CIS565\Project2-StreamCompaction\Project2\Project2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -g -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debug\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu" -clean
+ prefixSum.cu
+ Deleting file "Debug\prefixSum.cu.deps".
+ 1>Done Building Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" (clean target(s)).
+
+Build succeeded.
+
+Time Elapsed 00:00:00.39
diff --git a/Project2/Project2/PrefixSumCPU.cpp b/Project2/Project2/PrefixSumCPU.cpp
new file mode 100644
index 0000000..cb30149
--- /dev/null
+++ b/Project2/Project2/PrefixSumCPU.cpp
@@ -0,0 +1,11 @@
+#include "PrefixSumCPU.h"
+
+
+PrefixSumCPU::PrefixSumCPU(void)
+{
+}
+
+
+PrefixSumCPU::~PrefixSumCPU(void)
+{
+}
diff --git a/Project2/Project2/PrefixSumCPU.h b/Project2/Project2/PrefixSumCPU.h
new file mode 100644
index 0000000..10ac130
--- /dev/null
+++ b/Project2/Project2/PrefixSumCPU.h
@@ -0,0 +1,8 @@
+#pragma once
+class PrefixSumCPU
+{
+public:
+ PrefixSumCPU(void);
+ ~PrefixSumCPU(void);
+};
+
diff --git a/Project2/Project2/Project2.vcxproj b/Project2/Project2/Project2.vcxproj
new file mode 100644
index 0000000..a61aa4d
--- /dev/null
+++ b/Project2/Project2/Project2.vcxproj
@@ -0,0 +1,81 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Release
+ Win32
+
+
+
+ {78336453-2589-4F51-9CB0-FF46B00878A5}
+ Project2
+
+
+
+ Application
+ true
+ MultiByte
+
+
+ Application
+ false
+ true
+ MultiByte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Level3
+ Disabled
+
+
+ true
+ kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;cudart.lib;%(AdditionalDependencies)
+ Console
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+
+
+ true
+ true
+ true
+ kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;cudart.lib;%(AdditionalDependencies)
+
+
+
+
+
+
+
+ Document
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Project2/Project2/Project2.vcxproj.filters b/Project2/Project2/Project2.vcxproj.filters
new file mode 100644
index 0000000..d55dc0b
--- /dev/null
+++ b/Project2/Project2/Project2.vcxproj.filters
@@ -0,0 +1,32 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hpp;hxx;hm;inl;inc;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/Project2/Project2/Project2.vcxproj.user b/Project2/Project2/Project2.vcxproj.user
new file mode 100644
index 0000000..ace9a86
--- /dev/null
+++ b/Project2/Project2/Project2.vcxproj.user
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/Project2/Project2/Release/Project2.lastbuildstate b/Project2/Project2/Release/Project2.lastbuildstate
new file mode 100644
index 0000000..e711bb6
--- /dev/null
+++ b/Project2/Project2/Release/Project2.lastbuildstate
@@ -0,0 +1,2 @@
+#v4.0:v100
+Release|Win32|C:\course\CIS565\Project2-StreamCompaction\Project2\|
diff --git a/Project2/Project2/Release/Project2.log b/Project2/Project2/Release/Project2.log
new file mode 100644
index 0000000..c1eaf39
--- /dev/null
+++ b/Project2/Project2/Release/Project2.log
@@ -0,0 +1,26 @@
+Build started 9/28/2014 5:07:29 PM.
+ 1>Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" on node 2 (build target(s)).
+ 1>PrepareForBuild:
+ Creating directory "C:\course\CIS565\Project2-StreamCompaction\Project2\Release\".
+ InitializeBuildStatus:
+ Creating "Release\Project2.unsuccessfulbuild" because "AlwaysCreate" was specified.
+ AddCudaCompileDeps:
+ c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\cl.exe /E /nologo /showIncludes /TP /D__CUDACC__ /D_MBCS /I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" /I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin" /I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" /I. /FIcuda_runtime.h /c C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu
+ CudaBuild:
+ Compiling CUDA source file prefixSum.cu...
+ cmd.exe /C "C:\Users\xinjie\AppData\Local\Temp\tmpc1851bb45bf54524907b74801438c7c6.cmd"
+ "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu"
+
+ C:\course\CIS565\Project2-StreamCompaction\Project2\Project2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu"
+ 1>C:/course/CIS565/Project2-StreamCompaction/Project2/Project2/prefixSum.cu(8): error : redefinition of default argument
+
+ 1>C:/course/CIS565/Project2-StreamCompaction/Project2/Project2/prefixSum.cu(24): warning : expression has no effect
+
+ 1 error detected in the compilation of "C:/Users/xinjie/AppData/Local/Temp/tmpxft_00001adc_00000000-5_prefixSum.cpp1.ii".
+ prefixSum.cu
+ 1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 6.5.targets(593,9): error MSB3721: The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu"" exited with code 2.
+ 1>Done Building Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" (build target(s)) -- FAILED.
+
+Build FAILED.
+
+Time Elapsed 00:00:01.67
diff --git a/Project2/Project2/Release/Project2.unsuccessfulbuild b/Project2/Project2/Release/Project2.unsuccessfulbuild
new file mode 100644
index 0000000..e69de29
diff --git a/Project2/Project2/Release/prefixSum.cu.cache b/Project2/Project2/Release/prefixSum.cu.cache
new file mode 100644
index 0000000..ed0f3f2
--- /dev/null
+++ b/Project2/Project2/Release/prefixSum.cu.cache
@@ -0,0 +1,49 @@
+Identity=prefixSum.cu
+AdditionalCompilerOptions=
+AdditionalCompilerOptions=
+AdditionalDependencies=
+AdditionalDeps=
+AdditionalLibraryDirectories=
+AdditionalOptions=
+AdditionalOptions=
+CInterleavedPTX=false
+CodeGeneration=compute_20,sm_20
+CodeGeneration=compute_20,sm_20
+CompileOut=Release\prefixSum.cu.obj
+CudaRuntime=Static
+CudaToolkitCustomDir=
+Defines=;_MBCS;
+Emulation=false
+FastMath=false
+GenerateLineInfo=false
+GenerateRelocatableDeviceCode=false
+GPUDebugInfo=false
+GPUDebugInfo=false
+HostDebugInfo=false
+Include=;;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include
+Inputs=
+Keep=false
+KeepDir=Release
+LinkOut=
+MaxRegCount=0
+NvccCompilation=compile
+NvccPath=
+Optimization=O2
+Optimization=O2
+PerformDeviceLink=
+PtxAsOptionV=false
+RequiredIncludes=
+Runtime=MD
+Runtime=MD
+RuntimeChecks=Default
+RuntimeChecks=Default
+TargetMachinePlatform=32
+TargetMachinePlatform=32
+TypeInfo=
+TypeInfo=
+UseHostDefines=true
+UseHostInclude=true
+UseHostLibraryDependencies=
+UseHostLibraryDirectories=
+Warning=W3
+Warning=W3
diff --git a/Project2/Project2/Release/prefixSum.cu.deps b/Project2/Project2/Release/prefixSum.cu.deps
new file mode 100644
index 0000000..11f09f1
--- /dev/null
+++ b/Project2/Project2/Release/prefixSum.cu.deps
@@ -0,0 +1,386 @@
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda_runtime.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_config.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\sal.h
+c:\program files (x86)\microsoft visual studio 10.0\vc\include\codeanalysis\sourceannotations.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\vadefs.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stddef.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\channel_descriptor.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_runtime_api.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_device_runtime_api.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_runtime_api.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\common_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\string.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wtime.inl
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.inl
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\math_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdlib.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cmath
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\yvals.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\use_ansi.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdlib
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\channel_descriptor.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\channel_descriptor.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_11_atomic_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_12_atomic_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_13_double_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_20_atomic_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_atomic_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_35_atomic_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_atomic_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_20_intrinsics.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_30_intrinsics.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_intrinsics.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_35_intrinsics.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_intrinsics.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_fetch_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_indirect_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_indirect_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_launch_parameters.h
+c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda_runtime.h
+c:\course\cis565\project2-streamcompaction\project2\project2\prefixSum.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\swprintf.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/config.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/simple_defines.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/compiler.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/host_system.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/device_system.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/host_device.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\host_defines.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/debug.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/compiler_fence.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\intrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\setjmp.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\immintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wmmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\nmmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\smmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\tmmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\pmmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\emmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\malloc.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mm3dnow.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/forceinline.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/hd_warning_disable.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/global_workarounds.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/cstdint.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/discard_block_engine.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iostream
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\istream
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ostream
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ios
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocnum
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\climits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdio
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\streambuf
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xiosbase
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocale
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstring
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdexcept
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\exception
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstddef
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstddef
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\eh.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstring
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmemory
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\new
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xutility
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\utility
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iosfwd
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cwchar
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wchar.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdbg.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\type_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ymath.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cfloat
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\float.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtwrn.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xtr1common
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\typeinfo
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ctype.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\locale.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xdebug
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\system_error
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cerrno
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\errno.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h
+c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\share.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/random_core_access.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/discard_block_engine.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/linear_congruential_engine.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_congruential_engine_discard.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/mod.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_congruential_engine.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/linear_feedback_shift_engine.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_feedback_shift_engine_wordmask.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_feedback_shift_engine.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/subtract_with_carry_engine.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/subtract_with_carry_engine.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/xor_combine_engine.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/type_traits.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/type_traits/has_trivial_assign.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/xor_combine_engine_max.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/mpl/math.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/xor_combine_engine.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/uniform_int_distribution.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/pair.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/pair.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/swap.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/integer_traits.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/uniform_int_distribution.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/uniform_real_distribution.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/uniform_real_distribution.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/normal_distribution.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/normal_distribution_base.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/normal_distribution.inl
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\math_constants.h
+C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda.h
diff --git a/Project2/Project2/main.cpp b/Project2/Project2/main.cpp
new file mode 100644
index 0000000..e35db34
--- /dev/null
+++ b/Project2/Project2/main.cpp
@@ -0,0 +1,114 @@
+#include
+#include
+#include
+#include
+#include
+#include "PrefixSum.h"
+using namespace std;
+
+
+int n = 5000;
+#define GLOBAL 1
+#define SHARED 2
+
+void prefixSumCPU(float* arr, float *result, int n){
+ for(int i = 1; i < n; i++){
+ result[i] = arr[i-1] + result[i-1];
+ }
+}
+void scatterCPU(float* arr, float* result, int n, int &length){
+ float* arrcopy = new float[n];
+ for(int i = 0; i < n; i++){
+ arrcopy[i] = arr[i];
+ }
+ int len = 0;
+ for(int i = 0; i < n; i++){
+ if(arrcopy[i] != 0){
+ arrcopy[i] = 1;
+ len++;
+ }
+ }
+ float* arrAfterScan = new float[n];
+ arrAfterScan[0] = 0;
+ prefixSumCPU(arrcopy, arrAfterScan, n);
+
+ for(int i = 0; i < n; i++){
+ if(arrcopy[i] == 1){
+ int newindex = int(arrAfterScan[i]);
+ result[newindex] = arr[i];
+ }
+ }
+ delete[] arrcopy;
+ delete[] arrAfterScan;
+ length = len;
+}
+
+
+void printArr(int n, float* arr){
+ for(int i = 0; i < n; i++){
+ std::cout< h_input(n);
+ thrust::host_vector h_input_bool(n);
+ thrust::host_vector h_map(n);
+ thrust::host_vector h_output(n);
+ for(int i = 0; i < n ; ++i){
+ h_input[i] = arr[i];
+ }
+ for(int i = 0; i < n; i++){
+ if(h_input[i] != 0){
+ h_input_bool[i] = 1;
+ }
+ }
+ thrust::exclusive_scan(h_input_bool.begin(), h_input_bool.end(), h_map.begin());
+ thrust::scatter(h_input.begin(), h_input.end(), h_map.begin(), h_output.begin());
+}
+void main(){
+ //-----------------test case setup-----------------------
+ float *in_arr = new float[n];
+ float *out_arr = new float[n];
+ float *out_arr2 = new float[n];
+ for(int i = 0; i < n; i++){
+ if(i < 10)
+ in_arr[i] = i;
+ else in_arr[i] = i%10;
+ }
+ out_arr[0] = 0;
+ out_arr2[0] = 0;
+ //printArr(n, in_arr);
+ int length = 0;
+ //--------------scatter---------------
+ clock_t begin = clock();
+ for(int i = 0; i < itertimes; i++){
+ scatterCPU(in_arr, out_arr, n, length);
+ }
+ clock_t end = clock();
+ double time = (end - begin)/(CLOCKS_PER_SEC / 1000.0);
+ printf(" %.4f ms \n", time);
+ //printArr(length, out_arr);
+ scatterGPU(n, in_arr, out_arr2);
+ //printArr(length, out_arr2);
+ //-------------scan----------------
+ //clock_t begin = clock();
+ //for(int i = 0; i < itertimes; i++){
+ prefixSumCPU(in_arr, out_arr, n);
+ ////printArr(n, out_arr);
+ //}
+ //clock_t end = clock();
+ //double time = (end - begin)/(CLOCKS_PER_SEC / 10000.0);
+ //printf(" %.4f ms \n", time);
+
+
+ scanGPU(n, in_arr, out_arr2, SHARED);
+ scanGPU(n, in_arr, out_arr2, GLOBAL);
+ //printArr(n, out_arr);
+
+
+ //printArr(n, out_arr2);
+
+
+
+}
\ No newline at end of file
diff --git a/Project2/Project2/prefixSum.cu b/Project2/Project2/prefixSum.cu
new file mode 100644
index 0000000..1aa4889
--- /dev/null
+++ b/Project2/Project2/prefixSum.cu
@@ -0,0 +1,360 @@
+#include
+#include
+#include "prefixSum.h"
+#include
+#include
+float a, b ,c;
+#define blocksize 128
+void checkCUDAError(const char *msg, int line)
+{
+ cudaError_t err = cudaGetLastError();
+ if( cudaSuccess != err)
+ {
+ if( line >= 0 )
+ {
+ fprintf(stderr, "Line %d: ", line);
+ }
+ fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
+ exit(EXIT_FAILURE);
+ }
+}
+
+__global__ void scan(float *arr, float *result, int n){
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+ if(index < n){
+ /*for(int offset = 1; offset < n-1; offset*2){
+ if(index >= offset){
+ result[index] = arr[index - offset] + arr[index];
+ }
+ else {
+ result[index] = arr[index];
+ }
+ __syncthreads();
+
+ float *temp = arr;
+ arr = result;
+ result = temp;
+ }*/
+ int logn = ceil(log(float(n))/log(2.0f));
+ for (int d=1; d<=logn; d++){
+ int offset = powf(2.0f, d-1);
+ if (index >= offset){
+ result[index] = arr[index-offset] + arr[index];
+ }
+ else{
+ result[index] = arr[index];
+ }
+ __syncthreads();
+
+ float* temp = arr;
+ arr = result;
+ result = temp;
+ }
+ }
+}
+
+
+__global__ void scanSharedSingleBlock(float *arr, float *result, int n){
+ extern __shared__ float temp[];
+ int index = threadIdx.x;
+ if(index < n){
+ int pout = 0, pin = 1;
+ temp[pout*n + index] = arr[index];
+ __syncthreads();
+ for(int offset = 1; offset < n; offset *= 2){
+ pout = 1 - pout;
+ pin = 1- pout;
+ if(index >= offset)
+ temp[pout*n + index] = temp[pin*n + index - offset] + temp[pin*n +index];
+ else
+ temp[pout*n + index] = temp[pin*n + index];
+ __syncthreads();
+ }
+ result[index] = temp[pout*n + index];
+ }
+}
+
+__global__ void scanSharedArbitraryLength(float *arr, float *result, int n, float* sums){
+ extern __shared__ float temp[];
+
+ int index = threadIdx.x;
+ int globalIndex = threadIdx.x + (blockIdx.x * blockDim.x);
+ if(globalIndex < n){
+ float *tempIn = &temp[0];
+ float *tempOut = &temp[n];
+
+ tempOut[index] = arr[globalIndex];
+ __syncthreads();
+ for(int offset = 1; offset < n; offset *= 2){
+ float* temp = tempIn;
+ tempIn = tempOut;
+ tempOut = temp;
+ //__syncthreads();
+ if(index >= offset){
+ tempOut[index] = tempIn[index - offset] + tempIn[index];
+ }
+ else{
+ tempOut[index] = tempIn[index];
+ }
+
+ __syncthreads();
+ }
+ result[globalIndex] = tempOut[index];
+ if(index == blocksize -1)
+ sums[blockIdx.x] = tempOut[index];//last element in this block
+ }
+}
+__global__ void getIncr(float* arr, float* result, int n, int d){
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+ if(index < n){
+ if(index >= (int)pow(2.0,d-1)){
+ result[index] = arr[index - d] + arr[index];
+ }
+ else{
+ result[index] = arr[index];
+ }
+
+ }
+}
+__global__ void addIncr(float *Incr, float *arr, int n){
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+ if(index < n){
+ if(blockIdx.x >= 1){
+ arr[index] += Incr[blockIdx.x-1];
+ }
+ }
+}
+__global__ void scatterSetup(float *arr, float *result, int n){
+ __shared__ float temp[blocksize];
+ __shared__ float temp2[blocksize];
+ int globalIndex = threadIdx.x + (blockIdx.x * blockDim.x);
+ int index = threadIdx.x;
+ temp[index] = arr[globalIndex];
+ __syncthreads();
+ if(globalIndex < n){
+ if(temp[index] == 0)
+ temp2[index] = 0;
+ else
+ temp2[index] = 1;
+ __syncthreads();
+
+ for(int offset = 1; offset <= blocksize; offset*=2){
+ if(index >= offset){
+ temp[index] = temp2[index - offset] + temp2[index];
+ }
+ else{
+ temp[index] = temp2[index];
+ }
+ temp2[index] = temp[index];
+ __syncthreads();
+ }
+ result[globalIndex] = temp2[index];
+ }
+}
+__global__ void ScanAdd (float *arr, float *b, int size){
+ __shared__ int temp[blocksize];
+
+ int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+ temp[threadIdx.x] = arr[index];
+ __syncthreads();
+
+ for(int b = 0; b < blockIdx.x ; ++b){
+ temp[threadIdx.x] += arr[ (b + 1) * blocksize - 1];
+ }
+ b[index] = temp[threadIdx.x];
+}
+__global__ void scatterShift(float *arr, float *result, int n){
+
+ //__shared__ int temp[blocksize];
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+
+ if(index == 0){
+ result[index] = 0;
+ }
+ else
+ result[index] = arr[index - 1];
+
+}
+__global__ void scatter(float *arr, float *arr_scan, float *result, int n){
+ __shared__ float temp[blocksize];
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+ temp[threadIdx.x] = arr[index];
+ __syncthreads();
+
+ if(temp[threadIdx.x] != 0){
+ int newindex = (int)arr_scan[index];
+ result[newindex] = temp[threadIdx.x];
+ }
+}
+int padn(int n){
+ int i;
+ for(i = 1; n > i*blocksize; i++){}
+ return i*blocksize;
+}
+void shiftRight(float * arr, int n){
+ for(int i = n-1; i > 0; i--){
+ arr[i] = arr[i-1];
+ }
+ arr[0] = 0;
+}
+void scanGPU(int n, float *in_arr, float *out_arr, int type){
+
+ //dim3 dimBlock(1, 1);//how to decide?
+ //dim3 dimGrid(n, 1);
+
+ dim3 fullBlocksPerGrid((int)ceil(float(n)/float(blocksize)));
+ dim3 threadsPerBlock(blocksize);
+
+ int size =n*sizeof(float);
+ float *in_arr_d, *out_arr_d;
+ cudaMalloc((void**)&in_arr_d, size);
+ cudaMemcpy(in_arr_d, in_arr, size, cudaMemcpyHostToDevice);
+ checkCUDAErrorWithLine("Kernel failed!");
+ cudaMalloc((void**)&out_arr_d, size);
+ checkCUDAErrorWithLine("Kernel failed!");
+ int n_round = padn(n);
+ //-----------------naive global-----------------------------------------
+ if(type == 1){
+ cudaEvent_t start, stop;
+ cudaEventCreate(&start);
+ cudaEventCreate(&stop);
+ cudaEventRecord( start, 0);
+ for(int i = 0; i < itertimes; i++)
+ scan<<>>(in_arr_d, out_arr_d, n_round);
+ cudaEventRecord( stop, 0);
+ cudaEventSynchronize( stop );
+ float time = 0.0f;
+ cudaEventElapsedTime( &time, start, stop);
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ printf(" %.4f ms \n", time);
+ }
+ //----------------shared single block-----------------------------------
+ if(type == 2){
+
+
+
+ int sumNum = (int)ceil(float(n)/float(blocksize));
+ if(sumNum < 1){
+ cudaEvent_t start, stop;
+ cudaEventCreate(&start);
+ cudaEventCreate(&stop);
+ cudaEventRecord( start, 0);
+ for(int i = 0; i < itertimes; i++){
+ scanSharedSingleBlock<<>>(in_arr_d, out_arr_d, n);
+ }
+ cudaEventRecord( stop, 0);
+ cudaEventSynchronize( stop );
+ float time = 0.0f;
+ cudaEventElapsedTime( &time, start, stop);
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ printf(" %.4f ms \n", time);
+ checkCUDAErrorWithLine("Kernel failed!");
+ }
+ //----------------shared arbitrary length-------------------------------
+ else{
+ float *sums_d, *incr_d;
+ int sumsize = sumNum * sizeof(float);
+ cudaMalloc((void**)&sums_d, sumsize);
+ cudaMalloc((void**)&incr_d, sumsize);
+
+ cudaEvent_t start, stop;
+ cudaEventCreate(&start);
+ cudaEventCreate(&stop);
+ cudaEventRecord( start, 0);
+ for(int i = 0; i < itertimes; i++){
+ scanSharedArbitraryLength<<>>(in_arr_d, out_arr_d, n_round, sums_d);
+ }
+ cudaEventRecord( stop, 0);
+ cudaEventSynchronize( stop );
+ float time = 0.0f;
+ cudaEventElapsedTime( &time, start, stop);
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ printf(" %.4f ms \n", time);
+ checkCUDAErrorWithLine("Kernel failed!");
+ int sumNum_round = padn(sumNum);
+ scan<<>>(sums_d, incr_d, sumNum_round);
+
+ /*dim3 sumBlocksPerGrid((int)ceil(sumNum/(float)blocksize));
+ for(int d = 1; (int)pow(2.0,d-1) <= sumNum ;d++){
+ getIncr<<>>(sums_d, incr_d, sumNum, d);
+ cudaThreadSynchronize();
+ float *temp = sums_d;
+ sums_d = incr_d;
+ incr_d = temp;
+ }*/
+ checkCUDAErrorWithLine("Kernel failed!");
+ addIncr<<>>(incr_d, out_arr_d, n);
+ checkCUDAErrorWithLine("Kernel failed!");
+ cudaDeviceSynchronize();
+ cudaFree(sums_d);
+ cudaFree(incr_d);
+ }
+
+
+ }
+
+
+
+ //----------------copy to host and shift------------------------------------
+ cudaMemcpy(out_arr, out_arr_d, size, cudaMemcpyDeviceToHost);
+ checkCUDAErrorWithLine("Kernel failed!");
+ shiftRight(out_arr, n);
+ checkCUDAErrorWithLine("Kernel failed!");
+
+ cudaFree(in_arr_d);
+ cudaFree(out_arr_d);
+
+}
+
+void scatterGPU(int n, float *in_arr, float *out_arr){
+ dim3 fullBlocksPerGrid((int)ceil(float(n)/float(blocksize)));
+ dim3 threadsPerBlock(blocksize);
+
+ int size =n*sizeof(float);
+ float *in_arr_d, *out_arr_d, *arr_preScan, *arr_postScan, *arr_scan;
+ cudaMalloc((void**)&in_arr_d, size);
+ cudaMemcpy(in_arr_d, in_arr, size, cudaMemcpyHostToDevice);
+ checkCUDAErrorWithLine("Kernel failed!");
+ cudaMalloc((void**)&out_arr_d, size);
+ checkCUDAErrorWithLine("Kernel failed!");
+ cudaMalloc((void**)&arr_preScan, size);
+ cudaMalloc((void**)&arr_postScan, size);
+ cudaMalloc((void**)&arr_scan, size);
+
+ cudaEvent_t start, stop;
+ cudaEventCreate(&start);
+ cudaEventCreate(&stop);
+ cudaEventRecord( start, 0);
+ for(int i = 0; i < itertimes; i++){
+ scatterSetup<<>>(in_arr_d, arr_preScan, n);
+ ScanAdd<<>>(arr_preScan, arr_postScan, n);
+ scatterShift<<>>(arr_postScan, arr_scan, n);
+ scatter<<>>(in_arr_d, arr_scan, out_arr_d, n);
+ }
+ cudaEventRecord( stop, 0);
+ cudaEventSynchronize( stop );
+ float time = 0.0f;
+ cudaEventElapsedTime( &time, start, stop);
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ printf(" %.4f ms \n", time);
+
+ checkCUDAErrorWithLine("Kernel failed!");
+ //cudaDeviceSynchronize();
+ cudaMemcpy(out_arr, out_arr_d, size, cudaMemcpyDeviceToHost);
+
+ cudaFree(in_arr_d);
+ cudaFree(out_arr_d);
+ cudaFree(arr_preScan);
+ cudaFree(arr_postScan);
+ cudaFree(arr_scan);
+
+}
diff --git a/Project2/Project2/prefixSum.h b/Project2/Project2/prefixSum.h
new file mode 100644
index 0000000..fb4efbe
--- /dev/null
+++ b/Project2/Project2/prefixSum.h
@@ -0,0 +1,19 @@
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include
+#include
+#include
+#include
+#include
+
+
+
+#define checkCUDAErrorWithLine(msg) checkCUDAError(msg, __LINE__)
+#define itertimes 1000
+void scanGPU(int n, float *in_arr, float *out_arr, int type);
+void checkCUDAError(const char *msg, int line = -1);
+void shiftRight(float * arr, int n);
+
+void scatterGPU(int n, float *in_arr, float *out_arr);
+#endif
\ No newline at end of file
diff --git a/Project2/Project2/vc100.pdb b/Project2/Project2/vc100.pdb
new file mode 100644
index 0000000..e920d38
Binary files /dev/null and b/Project2/Project2/vc100.pdb differ
diff --git a/Project2/ipch/project2-66cb7659/project2-706ddc75.ipch b/Project2/ipch/project2-66cb7659/project2-706ddc75.ipch
new file mode 100644
index 0000000..8f40925
Binary files /dev/null and b/Project2/ipch/project2-66cb7659/project2-706ddc75.ipch differ
diff --git a/questions.pdf b/questions.pdf
new file mode 100644
index 0000000..a6dcff4
Binary files /dev/null and b/questions.pdf differ