diff --git a/Project2/Project2.sdf b/Project2/Project2.sdf new file mode 100644 index 0000000..e6dd31d Binary files /dev/null and b/Project2/Project2.sdf differ diff --git a/Project2/Project2.sln b/Project2/Project2.sln new file mode 100644 index 0000000..899c5bc --- /dev/null +++ b/Project2/Project2.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Project2", "Project2\Project2.vcxproj", "{78336453-2589-4F51-9CB0-FF46B00878A5}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {78336453-2589-4F51-9CB0-FF46B00878A5}.Debug|Win32.ActiveCfg = Debug|Win32 + {78336453-2589-4F51-9CB0-FF46B00878A5}.Debug|Win32.Build.0 = Debug|Win32 + {78336453-2589-4F51-9CB0-FF46B00878A5}.Release|Win32.ActiveCfg = Release|Win32 + {78336453-2589-4F51-9CB0-FF46B00878A5}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Project2/Project2.suo b/Project2/Project2.suo new file mode 100644 index 0000000..1bcc0c1 Binary files /dev/null and b/Project2/Project2.suo differ diff --git a/Project2/Project2/Debug/Project2.Build.CppClean.log b/Project2/Project2/Debug/Project2.Build.CppClean.log new file mode 100644 index 0000000..d05165b --- /dev/null +++ b/Project2/Project2/Debug/Project2.Build.CppClean.log @@ -0,0 +1,20 @@ +C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\DEBUG\PROJECT2.EXE +C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\DEBUG\PROJECT2.ILK +C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\DEBUG\PROJECT2.PDB +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\cl.command.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\CL.read.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\CL.write.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\link.command.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\link.read.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\link.write.1.tlog +C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\PROJECT2\DEBUG\MAIN.OBJ +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\mt.command.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\mt.read.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\mt.write.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\prefixSum.cu.cache +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\prefixSum.cu.obj +C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\PROJECT2\DEBUG\PROJECT2.EXE.INTERMEDIATE.MANIFEST +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\Project2.vcxprojResolveAssemblyReference.cache +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\Project2.write.1.tlog +C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Debug\vc100.idb +C:\COURSE\CIS565\PROJECT2-STREAMCOMPACTION\PROJECT2\PROJECT2\DEBUG\VC100.PDB diff --git a/Project2/Project2/Debug/Project2.log b/Project2/Project2/Debug/Project2.log new file mode 100644 index 0000000..222b5f5 --- /dev/null +++ b/Project2/Project2/Debug/Project2.log @@ -0,0 +1,16 @@ +Build started 9/29/2014 6:21:42 PM. + 1>Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" on node 2 (clean target(s)). + 1>_PrepareForClean: + Deleting file "Debug\Project2.lastbuildstate". + CudaClean: + cmd.exe /C "C:\Users\xinjie\AppData\Local\Temp\tmpe807d7cf708c46feb7b418f14f0a1b53.cmd" + "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -g -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debug\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu" -clean + + C:\course\CIS565\Project2-StreamCompaction\Project2\Project2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -g -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debug\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu" -clean + prefixSum.cu + Deleting file "Debug\prefixSum.cu.deps". + 1>Done Building Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" (clean target(s)). + +Build succeeded. + +Time Elapsed 00:00:00.39 diff --git a/Project2/Project2/PrefixSumCPU.cpp b/Project2/Project2/PrefixSumCPU.cpp new file mode 100644 index 0000000..cb30149 --- /dev/null +++ b/Project2/Project2/PrefixSumCPU.cpp @@ -0,0 +1,11 @@ +#include "PrefixSumCPU.h" + + +PrefixSumCPU::PrefixSumCPU(void) +{ +} + + +PrefixSumCPU::~PrefixSumCPU(void) +{ +} diff --git a/Project2/Project2/PrefixSumCPU.h b/Project2/Project2/PrefixSumCPU.h new file mode 100644 index 0000000..10ac130 --- /dev/null +++ b/Project2/Project2/PrefixSumCPU.h @@ -0,0 +1,8 @@ +#pragma once +class PrefixSumCPU +{ +public: + PrefixSumCPU(void); + ~PrefixSumCPU(void); +}; + diff --git a/Project2/Project2/Project2.vcxproj b/Project2/Project2/Project2.vcxproj new file mode 100644 index 0000000..a61aa4d --- /dev/null +++ b/Project2/Project2/Project2.vcxproj @@ -0,0 +1,81 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {78336453-2589-4F51-9CB0-FF46B00878A5} + Project2 + + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + + Level3 + Disabled + + + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;cudart.lib;%(AdditionalDependencies) + Console + + + + + Level3 + MaxSpeed + true + true + + + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;cudart.lib;%(AdditionalDependencies) + + + + + + + + Document + + + + + + + + + + \ No newline at end of file diff --git a/Project2/Project2/Project2.vcxproj.filters b/Project2/Project2/Project2.vcxproj.filters new file mode 100644 index 0000000..d55dc0b --- /dev/null +++ b/Project2/Project2/Project2.vcxproj.filters @@ -0,0 +1,32 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/Project2/Project2/Project2.vcxproj.user b/Project2/Project2/Project2.vcxproj.user new file mode 100644 index 0000000..ace9a86 --- /dev/null +++ b/Project2/Project2/Project2.vcxproj.user @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Project2/Project2/Release/Project2.lastbuildstate b/Project2/Project2/Release/Project2.lastbuildstate new file mode 100644 index 0000000..e711bb6 --- /dev/null +++ b/Project2/Project2/Release/Project2.lastbuildstate @@ -0,0 +1,2 @@ +#v4.0:v100 +Release|Win32|C:\course\CIS565\Project2-StreamCompaction\Project2\| diff --git a/Project2/Project2/Release/Project2.log b/Project2/Project2/Release/Project2.log new file mode 100644 index 0000000..c1eaf39 --- /dev/null +++ b/Project2/Project2/Release/Project2.log @@ -0,0 +1,26 @@ +Build started 9/28/2014 5:07:29 PM. + 1>Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" on node 2 (build target(s)). + 1>PrepareForBuild: + Creating directory "C:\course\CIS565\Project2-StreamCompaction\Project2\Release\". + InitializeBuildStatus: + Creating "Release\Project2.unsuccessfulbuild" because "AlwaysCreate" was specified. + AddCudaCompileDeps: + c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\cl.exe /E /nologo /showIncludes /TP /D__CUDACC__ /D_MBCS /I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" /I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin" /I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" /I. /FIcuda_runtime.h /c C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu + CudaBuild: + Compiling CUDA source file prefixSum.cu... + cmd.exe /C "C:\Users\xinjie\AppData\Local\Temp\tmpc1851bb45bf54524907b74801438c7c6.cmd" + "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu" + + C:\course\CIS565\Project2-StreamCompaction\Project2\Project2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu" + 1>C:/course/CIS565/Project2-StreamCompaction/Project2/Project2/prefixSum.cu(8): error : redefinition of default argument + + 1>C:/course/CIS565/Project2-StreamCompaction/Project2/Project2/prefixSum.cu(24): warning : expression has no effect + + 1 error detected in the compilation of "C:/Users/xinjie/AppData/Local/Temp/tmpxft_00001adc_00000000-5_prefixSum.cpp1.ii". + prefixSum.cu + 1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 6.5.targets(593,9): error MSB3721: The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\prefixSum.cu.obj "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\prefixSum.cu"" exited with code 2. + 1>Done Building Project "C:\course\CIS565\Project2-StreamCompaction\Project2\Project2\Project2.vcxproj" (build target(s)) -- FAILED. + +Build FAILED. + +Time Elapsed 00:00:01.67 diff --git a/Project2/Project2/Release/Project2.unsuccessfulbuild b/Project2/Project2/Release/Project2.unsuccessfulbuild new file mode 100644 index 0000000..e69de29 diff --git a/Project2/Project2/Release/prefixSum.cu.cache b/Project2/Project2/Release/prefixSum.cu.cache new file mode 100644 index 0000000..ed0f3f2 --- /dev/null +++ b/Project2/Project2/Release/prefixSum.cu.cache @@ -0,0 +1,49 @@ +Identity=prefixSum.cu +AdditionalCompilerOptions= +AdditionalCompilerOptions= +AdditionalDependencies= +AdditionalDeps= +AdditionalLibraryDirectories= +AdditionalOptions= +AdditionalOptions= +CInterleavedPTX=false +CodeGeneration=compute_20,sm_20 +CodeGeneration=compute_20,sm_20 +CompileOut=Release\prefixSum.cu.obj +CudaRuntime=Static +CudaToolkitCustomDir= +Defines=;_MBCS; +Emulation=false +FastMath=false +GenerateLineInfo=false +GenerateRelocatableDeviceCode=false +GPUDebugInfo=false +GPUDebugInfo=false +HostDebugInfo=false +Include=;;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include +Inputs= +Keep=false +KeepDir=Release +LinkOut= +MaxRegCount=0 +NvccCompilation=compile +NvccPath= +Optimization=O2 +Optimization=O2 +PerformDeviceLink= +PtxAsOptionV=false +RequiredIncludes= +Runtime=MD +Runtime=MD +RuntimeChecks=Default +RuntimeChecks=Default +TargetMachinePlatform=32 +TargetMachinePlatform=32 +TypeInfo= +TypeInfo= +UseHostDefines=true +UseHostInclude=true +UseHostLibraryDependencies= +UseHostLibraryDirectories= +Warning=W3 +Warning=W3 diff --git a/Project2/Project2/Release/prefixSum.cu.deps b/Project2/Project2/Release/prefixSum.cu.deps new file mode 100644 index 0000000..11f09f1 --- /dev/null +++ b/Project2/Project2/Release/prefixSum.cu.deps @@ -0,0 +1,386 @@ +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda_runtime.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_config.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\sal.h +c:\program files (x86)\microsoft visual studio 10.0\vc\include\codeanalysis\sourceannotations.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\vadefs.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stddef.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\channel_descriptor.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_runtime_api.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_device_runtime_api.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_runtime_api.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\common_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\string.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wtime.inl +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.inl +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\math_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdlib.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cmath +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\yvals.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\use_ansi.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\math.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdlib +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\channel_descriptor.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\channel_descriptor.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_11_atomic_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_12_atomic_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_13_double_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_20_atomic_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_atomic_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_35_atomic_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_atomic_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_20_intrinsics.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_30_intrinsics.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_intrinsics.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_35_intrinsics.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\sm_32_intrinsics.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_fetch_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\cuda_texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_indirect_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_indirect_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\builtin_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\driver_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\surface_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\texture_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\host_defines.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_functions.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\device_launch_parameters.h +c:\program files\nvidia gpu computing toolkit\cuda\v6.5\include\vector_types.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda_runtime.h +c:\course\cis565\project2-streamcompaction\project2\project2\prefixSum.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\swprintf.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/config.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/simple_defines.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/compiler.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/host_system.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/device_system.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/host_device.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\host_defines.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/debug.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/compiler_fence.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\intrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\setjmp.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\immintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\nmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\smmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\tmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\pmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\emmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\malloc.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mm3dnow.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\mmintrin.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/forceinline.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/hd_warning_disable.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/config/global_workarounds.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/cstdint.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/discard_block_engine.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iostream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\istream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ostream +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ios +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocnum +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\climits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstdio +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\streambuf +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xiosbase +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocale +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstring +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdexcept +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\exception +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstddef +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cstddef +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\eh.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xstring +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xmemory +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\new +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xutility +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\utility +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\iosfwd +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cwchar +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\wchar.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdbg.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\type_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\limits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ymath.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cfloat +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\float.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtwrn.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xtr1common +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xfwrap1 +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xxtype_traits +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\typeinfo +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xlocinfo.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\ctype.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\locale.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\xdebug +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\system_error +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\cerrno +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\errno.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\crtdefs.h +c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\share.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/random_core_access.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/discard_block_engine.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/linear_congruential_engine.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_congruential_engine_discard.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/mod.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_congruential_engine.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/linear_feedback_shift_engine.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_feedback_shift_engine_wordmask.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/linear_feedback_shift_engine.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/subtract_with_carry_engine.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/subtract_with_carry_engine.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/xor_combine_engine.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/type_traits.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/type_traits/has_trivial_assign.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/xor_combine_engine_max.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/mpl/math.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/xor_combine_engine.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/uniform_int_distribution.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/pair.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/pair.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/swap.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/detail/integer_traits.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/uniform_int_distribution.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/uniform_real_distribution.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/uniform_real_distribution.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/normal_distribution.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/normal_distribution_base.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\thrust/random/detail/normal_distribution.inl +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\math_constants.h +C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\include\cuda.h diff --git a/Project2/Project2/main.cpp b/Project2/Project2/main.cpp new file mode 100644 index 0000000..e35db34 --- /dev/null +++ b/Project2/Project2/main.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include "PrefixSum.h" +using namespace std; + + +int n = 5000; +#define GLOBAL 1 +#define SHARED 2 + +void prefixSumCPU(float* arr, float *result, int n){ + for(int i = 1; i < n; i++){ + result[i] = arr[i-1] + result[i-1]; + } +} +void scatterCPU(float* arr, float* result, int n, int &length){ + float* arrcopy = new float[n]; + for(int i = 0; i < n; i++){ + arrcopy[i] = arr[i]; + } + int len = 0; + for(int i = 0; i < n; i++){ + if(arrcopy[i] != 0){ + arrcopy[i] = 1; + len++; + } + } + float* arrAfterScan = new float[n]; + arrAfterScan[0] = 0; + prefixSumCPU(arrcopy, arrAfterScan, n); + + for(int i = 0; i < n; i++){ + if(arrcopy[i] == 1){ + int newindex = int(arrAfterScan[i]); + result[newindex] = arr[i]; + } + } + delete[] arrcopy; + delete[] arrAfterScan; + length = len; +} + + +void printArr(int n, float* arr){ + for(int i = 0; i < n; i++){ + std::cout< h_input(n); + thrust::host_vector h_input_bool(n); + thrust::host_vector h_map(n); + thrust::host_vector h_output(n); + for(int i = 0; i < n ; ++i){ + h_input[i] = arr[i]; + } + for(int i = 0; i < n; i++){ + if(h_input[i] != 0){ + h_input_bool[i] = 1; + } + } + thrust::exclusive_scan(h_input_bool.begin(), h_input_bool.end(), h_map.begin()); + thrust::scatter(h_input.begin(), h_input.end(), h_map.begin(), h_output.begin()); +} +void main(){ + //-----------------test case setup----------------------- + float *in_arr = new float[n]; + float *out_arr = new float[n]; + float *out_arr2 = new float[n]; + for(int i = 0; i < n; i++){ + if(i < 10) + in_arr[i] = i; + else in_arr[i] = i%10; + } + out_arr[0] = 0; + out_arr2[0] = 0; + //printArr(n, in_arr); + int length = 0; + //--------------scatter--------------- + clock_t begin = clock(); + for(int i = 0; i < itertimes; i++){ + scatterCPU(in_arr, out_arr, n, length); + } + clock_t end = clock(); + double time = (end - begin)/(CLOCKS_PER_SEC / 1000.0); + printf(" %.4f ms \n", time); + //printArr(length, out_arr); + scatterGPU(n, in_arr, out_arr2); + //printArr(length, out_arr2); + //-------------scan---------------- + //clock_t begin = clock(); + //for(int i = 0; i < itertimes; i++){ + prefixSumCPU(in_arr, out_arr, n); + ////printArr(n, out_arr); + //} + //clock_t end = clock(); + //double time = (end - begin)/(CLOCKS_PER_SEC / 10000.0); + //printf(" %.4f ms \n", time); + + + scanGPU(n, in_arr, out_arr2, SHARED); + scanGPU(n, in_arr, out_arr2, GLOBAL); + //printArr(n, out_arr); + + + //printArr(n, out_arr2); + + + +} \ No newline at end of file diff --git a/Project2/Project2/prefixSum.cu b/Project2/Project2/prefixSum.cu new file mode 100644 index 0000000..1aa4889 --- /dev/null +++ b/Project2/Project2/prefixSum.cu @@ -0,0 +1,360 @@ +#include +#include +#include "prefixSum.h" +#include +#include +float a, b ,c; +#define blocksize 128 +void checkCUDAError(const char *msg, int line) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) + { + if( line >= 0 ) + { + fprintf(stderr, "Line %d: ", line); + } + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + exit(EXIT_FAILURE); + } +} + +__global__ void scan(float *arr, float *result, int n){ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if(index < n){ + /*for(int offset = 1; offset < n-1; offset*2){ + if(index >= offset){ + result[index] = arr[index - offset] + arr[index]; + } + else { + result[index] = arr[index]; + } + __syncthreads(); + + float *temp = arr; + arr = result; + result = temp; + }*/ + int logn = ceil(log(float(n))/log(2.0f)); + for (int d=1; d<=logn; d++){ + int offset = powf(2.0f, d-1); + if (index >= offset){ + result[index] = arr[index-offset] + arr[index]; + } + else{ + result[index] = arr[index]; + } + __syncthreads(); + + float* temp = arr; + arr = result; + result = temp; + } + } +} + + +__global__ void scanSharedSingleBlock(float *arr, float *result, int n){ + extern __shared__ float temp[]; + int index = threadIdx.x; + if(index < n){ + int pout = 0, pin = 1; + temp[pout*n + index] = arr[index]; + __syncthreads(); + for(int offset = 1; offset < n; offset *= 2){ + pout = 1 - pout; + pin = 1- pout; + if(index >= offset) + temp[pout*n + index] = temp[pin*n + index - offset] + temp[pin*n +index]; + else + temp[pout*n + index] = temp[pin*n + index]; + __syncthreads(); + } + result[index] = temp[pout*n + index]; + } +} + +__global__ void scanSharedArbitraryLength(float *arr, float *result, int n, float* sums){ + extern __shared__ float temp[]; + + int index = threadIdx.x; + int globalIndex = threadIdx.x + (blockIdx.x * blockDim.x); + if(globalIndex < n){ + float *tempIn = &temp[0]; + float *tempOut = &temp[n]; + + tempOut[index] = arr[globalIndex]; + __syncthreads(); + for(int offset = 1; offset < n; offset *= 2){ + float* temp = tempIn; + tempIn = tempOut; + tempOut = temp; + //__syncthreads(); + if(index >= offset){ + tempOut[index] = tempIn[index - offset] + tempIn[index]; + } + else{ + tempOut[index] = tempIn[index]; + } + + __syncthreads(); + } + result[globalIndex] = tempOut[index]; + if(index == blocksize -1) + sums[blockIdx.x] = tempOut[index];//last element in this block + } +} +__global__ void getIncr(float* arr, float* result, int n, int d){ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if(index < n){ + if(index >= (int)pow(2.0,d-1)){ + result[index] = arr[index - d] + arr[index]; + } + else{ + result[index] = arr[index]; + } + + } +} +__global__ void addIncr(float *Incr, float *arr, int n){ + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if(index < n){ + if(blockIdx.x >= 1){ + arr[index] += Incr[blockIdx.x-1]; + } + } +} +__global__ void scatterSetup(float *arr, float *result, int n){ + __shared__ float temp[blocksize]; + __shared__ float temp2[blocksize]; + int globalIndex = threadIdx.x + (blockIdx.x * blockDim.x); + int index = threadIdx.x; + temp[index] = arr[globalIndex]; + __syncthreads(); + if(globalIndex < n){ + if(temp[index] == 0) + temp2[index] = 0; + else + temp2[index] = 1; + __syncthreads(); + + for(int offset = 1; offset <= blocksize; offset*=2){ + if(index >= offset){ + temp[index] = temp2[index - offset] + temp2[index]; + } + else{ + temp[index] = temp2[index]; + } + temp2[index] = temp[index]; + __syncthreads(); + } + result[globalIndex] = temp2[index]; + } +} +__global__ void ScanAdd (float *arr, float *b, int size){ + __shared__ int temp[blocksize]; + + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + temp[threadIdx.x] = arr[index]; + __syncthreads(); + + for(int b = 0; b < blockIdx.x ; ++b){ + temp[threadIdx.x] += arr[ (b + 1) * blocksize - 1]; + } + b[index] = temp[threadIdx.x]; +} +__global__ void scatterShift(float *arr, float *result, int n){ + + //__shared__ int temp[blocksize]; + int index = threadIdx.x + (blockIdx.x * blockDim.x); + + if(index == 0){ + result[index] = 0; + } + else + result[index] = arr[index - 1]; + +} +__global__ void scatter(float *arr, float *arr_scan, float *result, int n){ + __shared__ float temp[blocksize]; + int index = threadIdx.x + (blockIdx.x * blockDim.x); + temp[threadIdx.x] = arr[index]; + __syncthreads(); + + if(temp[threadIdx.x] != 0){ + int newindex = (int)arr_scan[index]; + result[newindex] = temp[threadIdx.x]; + } +} +int padn(int n){ + int i; + for(i = 1; n > i*blocksize; i++){} + return i*blocksize; +} +void shiftRight(float * arr, int n){ + for(int i = n-1; i > 0; i--){ + arr[i] = arr[i-1]; + } + arr[0] = 0; +} +void scanGPU(int n, float *in_arr, float *out_arr, int type){ + + //dim3 dimBlock(1, 1);//how to decide? + //dim3 dimGrid(n, 1); + + dim3 fullBlocksPerGrid((int)ceil(float(n)/float(blocksize))); + dim3 threadsPerBlock(blocksize); + + int size =n*sizeof(float); + float *in_arr_d, *out_arr_d; + cudaMalloc((void**)&in_arr_d, size); + cudaMemcpy(in_arr_d, in_arr, size, cudaMemcpyHostToDevice); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMalloc((void**)&out_arr_d, size); + checkCUDAErrorWithLine("Kernel failed!"); + int n_round = padn(n); + //-----------------naive global----------------------------------------- + if(type == 1){ + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord( start, 0); + for(int i = 0; i < itertimes; i++) + scan<<>>(in_arr_d, out_arr_d, n_round); + cudaEventRecord( stop, 0); + cudaEventSynchronize( stop ); + float time = 0.0f; + cudaEventElapsedTime( &time, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + + printf(" %.4f ms \n", time); + } + //----------------shared single block----------------------------------- + if(type == 2){ + + + + int sumNum = (int)ceil(float(n)/float(blocksize)); + if(sumNum < 1){ + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord( start, 0); + for(int i = 0; i < itertimes; i++){ + scanSharedSingleBlock<<>>(in_arr_d, out_arr_d, n); + } + cudaEventRecord( stop, 0); + cudaEventSynchronize( stop ); + float time = 0.0f; + cudaEventElapsedTime( &time, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + + printf(" %.4f ms \n", time); + checkCUDAErrorWithLine("Kernel failed!"); + } + //----------------shared arbitrary length------------------------------- + else{ + float *sums_d, *incr_d; + int sumsize = sumNum * sizeof(float); + cudaMalloc((void**)&sums_d, sumsize); + cudaMalloc((void**)&incr_d, sumsize); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord( start, 0); + for(int i = 0; i < itertimes; i++){ + scanSharedArbitraryLength<<>>(in_arr_d, out_arr_d, n_round, sums_d); + } + cudaEventRecord( stop, 0); + cudaEventSynchronize( stop ); + float time = 0.0f; + cudaEventElapsedTime( &time, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + + printf(" %.4f ms \n", time); + checkCUDAErrorWithLine("Kernel failed!"); + int sumNum_round = padn(sumNum); + scan<<>>(sums_d, incr_d, sumNum_round); + + /*dim3 sumBlocksPerGrid((int)ceil(sumNum/(float)blocksize)); + for(int d = 1; (int)pow(2.0,d-1) <= sumNum ;d++){ + getIncr<<>>(sums_d, incr_d, sumNum, d); + cudaThreadSynchronize(); + float *temp = sums_d; + sums_d = incr_d; + incr_d = temp; + }*/ + checkCUDAErrorWithLine("Kernel failed!"); + addIncr<<>>(incr_d, out_arr_d, n); + checkCUDAErrorWithLine("Kernel failed!"); + cudaDeviceSynchronize(); + cudaFree(sums_d); + cudaFree(incr_d); + } + + + } + + + + //----------------copy to host and shift------------------------------------ + cudaMemcpy(out_arr, out_arr_d, size, cudaMemcpyDeviceToHost); + checkCUDAErrorWithLine("Kernel failed!"); + shiftRight(out_arr, n); + checkCUDAErrorWithLine("Kernel failed!"); + + cudaFree(in_arr_d); + cudaFree(out_arr_d); + +} + +void scatterGPU(int n, float *in_arr, float *out_arr){ + dim3 fullBlocksPerGrid((int)ceil(float(n)/float(blocksize))); + dim3 threadsPerBlock(blocksize); + + int size =n*sizeof(float); + float *in_arr_d, *out_arr_d, *arr_preScan, *arr_postScan, *arr_scan; + cudaMalloc((void**)&in_arr_d, size); + cudaMemcpy(in_arr_d, in_arr, size, cudaMemcpyHostToDevice); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMalloc((void**)&out_arr_d, size); + checkCUDAErrorWithLine("Kernel failed!"); + cudaMalloc((void**)&arr_preScan, size); + cudaMalloc((void**)&arr_postScan, size); + cudaMalloc((void**)&arr_scan, size); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord( start, 0); + for(int i = 0; i < itertimes; i++){ + scatterSetup<<>>(in_arr_d, arr_preScan, n); + ScanAdd<<>>(arr_preScan, arr_postScan, n); + scatterShift<<>>(arr_postScan, arr_scan, n); + scatter<<>>(in_arr_d, arr_scan, out_arr_d, n); + } + cudaEventRecord( stop, 0); + cudaEventSynchronize( stop ); + float time = 0.0f; + cudaEventElapsedTime( &time, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + + printf(" %.4f ms \n", time); + + checkCUDAErrorWithLine("Kernel failed!"); + //cudaDeviceSynchronize(); + cudaMemcpy(out_arr, out_arr_d, size, cudaMemcpyDeviceToHost); + + cudaFree(in_arr_d); + cudaFree(out_arr_d); + cudaFree(arr_preScan); + cudaFree(arr_postScan); + cudaFree(arr_scan); + +} diff --git a/Project2/Project2/prefixSum.h b/Project2/Project2/prefixSum.h new file mode 100644 index 0000000..fb4efbe --- /dev/null +++ b/Project2/Project2/prefixSum.h @@ -0,0 +1,19 @@ +#ifndef KERNEL_H +#define KERNEL_H + +#include +#include +#include +#include +#include + + + +#define checkCUDAErrorWithLine(msg) checkCUDAError(msg, __LINE__) +#define itertimes 1000 +void scanGPU(int n, float *in_arr, float *out_arr, int type); +void checkCUDAError(const char *msg, int line = -1); +void shiftRight(float * arr, int n); + +void scatterGPU(int n, float *in_arr, float *out_arr); +#endif \ No newline at end of file diff --git a/Project2/Project2/vc100.pdb b/Project2/Project2/vc100.pdb new file mode 100644 index 0000000..e920d38 Binary files /dev/null and b/Project2/Project2/vc100.pdb differ diff --git a/Project2/ipch/project2-66cb7659/project2-706ddc75.ipch b/Project2/ipch/project2-66cb7659/project2-706ddc75.ipch new file mode 100644 index 0000000..8f40925 Binary files /dev/null and b/Project2/ipch/project2-66cb7659/project2-706ddc75.ipch differ diff --git a/questions.pdf b/questions.pdf new file mode 100644 index 0000000..a6dcff4 Binary files /dev/null and b/questions.pdf differ