From 34561ef307820c1ce41ad2fa57c61804d6c68e09 Mon Sep 17 00:00:00 2001 From: Michelle McDaniel Date: Thu, 5 Apr 2018 11:20:05 -0700 Subject: [PATCH] Add Word2Vec Benchmark Harness (#17350) * Add Word2Vec Benchmark Harness This change adds an additional scenario benchmark, the Word2Vec benchmark. The harness pulls down Word2Vec.Net from eabdullin, applies a patch of changes that we made to work with netcoreapp21, harness the word training and search, and then runs the benchmark. It also updates the timeout for running benchmarks, since the training scenario on a 100M file takes about 7 minutes locally. --- .gitattributes | 1 + tests/scripts/run-xunit-perf.py | 4 +- .../JitBench/Benchmarks/MLBenchmark.cs | 256 ++++++++ .../Scenario/JitBench/JitBench.csproj | 3 + .../JitBench/Resources/word2vecnet.patch | 605 ++++++++++++++++++ .../Scenario/JitBench/Runner/Benchmark.cs | 3 +- .../Scenario/JitBench/Utilities/FileTasks.cs | 46 ++ .../JitBench/Utilities/ProcessRunner.cs | 2 +- .../unofficial_dotnet/JitBench.csproj | 6 + 9 files changed, 922 insertions(+), 4 deletions(-) create mode 100644 tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs create mode 100644 tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch diff --git a/.gitattributes b/.gitattributes index 27e670318539..4777297037cf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -75,4 +75,5 @@ tests/src/JIT/Performance/CodeQuality/BenchmarksGame/reverse-complement/revcomp- tests/src/JIT/Performance/CodeQuality/BenchmarksGame/reverse-complement/revcomp-input25000.txt text eol=lf tests/src/JIT/Performance/CodeQuality/BenchmarksGame/k-nucleotide/knucleotide-input.txt text eol=lf tests/src/JIT/Performance/CodeQuality/BenchmarksGame/k-nucleotide/knucleotide-input-big.txt text eol=lf +tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch text eol=lf diff --git a/tests/scripts/run-xunit-perf.py b/tests/scripts/run-xunit-perf.py index 3c1cb89dca37..18a0b8e446f8 100755 --- a/tests/scripts/run-xunit-perf.py +++ b/tests/scripts/run-xunit-perf.py @@ -182,9 +182,9 @@ def run_benchmark(benchname, benchdir, env, sandboxDir, benchmarkOutputDir, test myEnv = dict(env) benchnameWithExt = benchname + '.' + testFileExt fullPath = os.path.join(benchdir, benchnameWithExt) - shutil.copy2(fullPath, sandboxDir) - files = glob.iglob(os.path.join(benchdir, "*.txt")) + # Copy all files in the benchmark directory to the sandbox + files = glob.iglob(os.path.join(benchdir, "*.*")) for filename in files: if os.path.isfile(filename): shutil.copy2(filename, sandboxDir) diff --git a/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs b/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs new file mode 100644 index 000000000000..ce4a45d83fcc --- /dev/null +++ b/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs @@ -0,0 +1,256 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using System.Reflection; +using Microsoft.Xunit.Performance.Api; + +namespace JitBench +{ + class Word2VecBenchmark : MLBenchmark + { + public Word2VecBenchmark() : base("Word2Vec") { } + + protected override string ExecutableName => "Word2VecScenario.dll"; + + protected override string GetWord2VecNetSrcDirectory(string outputDir) + { + return Path.Combine(GetWord2VecNetRepoRootDir(outputDir), "Word2VecScenario"); + } + } + + abstract class MLBenchmark : Benchmark + { + private static readonly HashSet DefaultExitCodes = new HashSet(new[] { 0 }); + + public MLBenchmark(string name) : base(name) + { + ExePath = ExecutableName; + } + + protected abstract string ExecutableName { get; } + + public override async Task Setup(DotNetInstallation dotNetInstall, string outputDir, bool useExistingSetup, ITestOutputHelper output) + { + if(!useExistingSetup) + { + using (var setupSection = new IndentedTestOutputHelper("Setup " + Name, output)) + { + await CloneWord2VecNetRepo(outputDir, setupSection); + await Publish(dotNetInstall, outputDir, setupSection); + await DownloadAndExtractTextCorpus(dotNetInstall, outputDir, setupSection); + } + } + string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion); + WorkingDirPath = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + } + + async Task CloneWord2VecNetRepo(string outputDir, ITestOutputHelper output) + { + // If the repo already exists, we delete it and extract it again. + string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir); + FileTasks.DeleteDirectory(word2VecNetRepoRootDir, output); + + string word2VecPatchFullPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), Word2VecNetPatch); + + await ExecuteGitCommand($"clone {Word2VecNetRepoUrl} {word2VecNetRepoRootDir}", output); + await ExecuteGitCommand($"checkout {Word2VecNetCommitSha1Id}", output, workingDirectory: word2VecNetRepoRootDir); + await ExecuteGitCommand($"apply {word2VecPatchFullPath}", output, workingDirectory: word2VecNetRepoRootDir); + } + + async Task ExecuteGitCommand(string arguments, ITestOutputHelper output, string workingDirectory = null) + { + int exitCode = await new ProcessRunner("git", arguments).WithLog(output).WithWorkingDirectory(workingDirectory).Run(); + + if (!DefaultExitCodes.Contains(exitCode)) + throw new Exception($"git {arguments} has failed, the exit code was {exitCode}"); + } + + async Task DownloadAndExtractTextCorpus(DotNetInstallation dotNetInstall, string outputDir, ITestOutputHelper output) + { + // If the file already exists, exit + string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir); + string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion); + string word2VecNetPublishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + + // Download the corpus of text. This is a zip file that contains a text file of 100M of text from Wikipedia + var url = "http://mattmahoney.net/dc/text8.zip"; + await FileTasks.DownloadAndUnzip(url, word2VecNetRepoRootDir + "_temp", output); + + FileTasks.MoveFile(Path.Combine(word2VecNetRepoRootDir + "_temp", "text8"), + Path.Combine(word2VecNetPublishDir, "Corpus.txt"), output); + } + + private async Task Publish(DotNetInstallation dotNetInstall, string outputDir, ITestOutputHelper output) + { + string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion); + string publishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + if (publishDir != null) + { + FileTasks.DeleteDirectory(publishDir, output); + } + string dotNetExePath = dotNetInstall.DotNetExe; + await new ProcessRunner(dotNetExePath, $"publish -c Release -f {tfm}") + .WithWorkingDirectory(GetWord2VecNetSrcDirectory(outputDir)) + .WithEnvironmentVariable("DOTNET_MULTILEVEL_LOOKUP", "0") + .WithEnvironmentVariable("WORD2VEC_FRAMEWORK_VERSION", dotNetInstall.FrameworkVersion) + .WithEnvironmentVariable("UseSharedCompilation", "false") + .WithLog(output) + .Run(); + + publishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + if (publishDir == null) + { + throw new DirectoryNotFoundException("Could not find 'publish' directory"); + } + return publishDir; + } + + public override Metric[] GetDefaultDisplayMetrics() + { + return new Metric[] + { + TrainingMetric, + FirstSearchMetric, + MedianSearchMetric + }; + } + + protected override IterationResult RecordIterationMetrics(ScenarioExecutionResult scenarioIteration, string stdout, string stderr, ITestOutputHelper output) + { + IterationResult result = base.RecordIterationMetrics(scenarioIteration, stdout, stderr, output); + AddConsoleMetrics(result, stdout, output); + return result; + } + + void AddConsoleMetrics(IterationResult result, string stdout, ITestOutputHelper output) + { + output.WriteLine("Processing iteration results."); + + double? trainingTime = null; + double? firstSearchTime = null; + double? steadyStateMedianTime = null; + + using (var reader = new StringReader(stdout)) + { + string line; + while ((line = reader.ReadLine()) != null) + { + Match match = Regex.Match(line, @"^Training took \s*(\d+)ms$"); + if (match.Success && match.Groups.Count == 2) + { + trainingTime = Convert.ToDouble(match.Groups[1].Value); + continue; + } + + match = Regex.Match(line, @"^Search took \s*(\d+)ms$"); + if (match.Success && match.Groups.Count == 2) + { + firstSearchTime = Convert.ToDouble(match.Groups[1].Value); + continue; + } + + match = Regex.Match(line, @"^Steadystate median search time: \s*(\d+\.\d+)ms$"); + if (match.Success && match.Groups.Count == 2) + { + //many lines will match, but the final values of these variables will be from the last batch which is presumably the + //best measurement of steady state performance + steadyStateMedianTime = Convert.ToDouble(match.Groups[1].Value); + continue; + } + } + } + + if (!trainingTime.HasValue) + throw new FormatException("Training time was not found."); + if (!firstSearchTime.HasValue) + throw new FormatException("First Search time was not found."); + if (!steadyStateMedianTime.HasValue) + throw new FormatException("Steady state median response time not found."); + + + result.Measurements.Add(TrainingMetric, trainingTime.Value); + result.Measurements.Add(FirstSearchMetric, firstSearchTime.Value); + result.Measurements.Add(MedianSearchMetric, steadyStateMedianTime.Value); + + output.WriteLine($"Training took {trainingTime}ms"); + output.WriteLine($"Search took {firstSearchTime}ms"); + output.WriteLine($"Median steady state search {steadyStateMedianTime.Value}ms"); + } + + /// + /// When serializing the result data to benchview this is called to determine if any of the metrics should be reported differently + /// than they were collected. Both web apps use this to collect several measurements in each iteration, then present those measurements + /// to benchview as if each was the Duration metric of a distinct scenario test with its own set of iterations. + /// + public override bool TryGetBenchviewCustomMetricReporting(Metric originalMetric, out Metric newMetric, out string newScenarioModelName) + { + if(originalMetric.Equals(TrainingMetric)) + { + newScenarioModelName = "Training"; + } + else if (originalMetric.Equals(FirstSearchMetric)) + { + newScenarioModelName = "First Search"; + } + else if (originalMetric.Equals(MedianSearchMetric)) + { + newScenarioModelName = "Median Search"; + } + else + { + return base.TryGetBenchviewCustomMetricReporting(originalMetric, out newMetric, out newScenarioModelName); + } + newMetric = Metric.ElapsedTimeMilliseconds; + return true; + } + + protected static string GetWord2VecNetRepoRootDir(string outputDir) + { + return Path.Combine(outputDir, "W"); + } + + protected abstract string GetWord2VecNetSrcDirectory(string outputDir); + + string GetWord2VecNetPublishDirectory(DotNetInstallation dotNetInstall, string outputDir, string tfm) + { + string dir = Path.Combine(GetWord2VecNetSrcDirectory(outputDir), "bin", dotNetInstall.Architecture, "Release", tfm, "publish"); + if (Directory.Exists(dir)) + { + return dir; + } + + dir = Path.Combine(GetWord2VecNetSrcDirectory(outputDir), "bin", "Release", tfm, "publish"); + if (Directory.Exists(dir)) + { + return dir; + } + + return null; + } + + string GetCoreClrRoot() + { + string currentDirectory = Directory.GetCurrentDirectory(); + string workspace = Environment.GetEnvironmentVariable("CORECLR_REPO"); + if (workspace == null) + { + workspace = currentDirectory; + } + + return workspace; + } + + private const string Word2VecNetRepoUrl = "https://github.com/eabdullin/Word2Vec.Net"; + private const string Word2VecNetCommitSha1Id = "6012a2b5b886926918d51b1b56387d785115f448"; + private const string Word2VecNetPatch = "word2vecnet.patch"; + private const string EnvironmentFileName = "Word2VecNetEnvironment.txt"; + private const string StoreDirName = ".store"; + private readonly Metric TrainingMetric = new Metric("Training", "ms"); + private readonly Metric FirstSearchMetric = new Metric("First Search", "ms"); + private readonly Metric MedianSearchMetric = new Metric("Median Search", "ms"); + private readonly Metric MeanSearchMetric = new Metric("Mean Search", "ms"); + } +} + diff --git a/tests/src/performance/Scenario/JitBench/JitBench.csproj b/tests/src/performance/Scenario/JitBench/JitBench.csproj index 2e384d183ecc..0d1f4fb0dfcd 100644 --- a/tests/src/performance/Scenario/JitBench/JitBench.csproj +++ b/tests/src/performance/Scenario/JitBench/JitBench.csproj @@ -54,5 +54,8 @@ Overwrite="true" Encoding="Unicode"/> + + + diff --git a/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch new file mode 100644 index 000000000000..dbad57ba5f95 --- /dev/null +++ b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch @@ -0,0 +1,605 @@ +diff --git a/.gitignore b/.gitignore +index 8098fe2..7c82f99 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -17,7 +17,6 @@ + [Rr]eleases/ + x64/ + x86/ +-build/ + bld/ + [Bb]in/ + [Oo]bj/ +diff --git a/NuGet.config b/NuGet.config +new file mode 100644 +index 0000000..bd3a6f8 +--- /dev/null ++++ b/NuGet.config +@@ -0,0 +1,8 @@ ++ ++ ++ ++ ++ ++ ++ ++ +\ No newline at end of file +diff --git a/Word2LibConsole/Word2LibConsole.vcxproj b/Word2LibConsole/Word2LibConsole.vcxproj +index 2caa5a0..03b8ada 100644 +--- a/Word2LibConsole/Word2LibConsole.vcxproj ++++ b/Word2LibConsole/Word2LibConsole.vcxproj +@@ -1,5 +1,5 @@ +  +- ++ + + + Debug +@@ -14,19 +14,19 @@ + {9C719670-3571-4B68-A3DA-053B18C654A0} + Win32Proj + Word2LibConsole +- 8.1 ++ 10.0.16299.0 + + + + Application + true +- v140 ++ v141 + Unicode + + + Application + false +- v140 ++ v141 + true + Unicode + +diff --git a/Word2Vec.Net/Distance.cs b/Word2Vec.Net/Distance.cs +index f2c3cdc..32929cd 100644 +--- a/Word2Vec.Net/Distance.cs ++++ b/Word2Vec.Net/Distance.cs +@@ -46,7 +46,7 @@ namespace Word2Vec.Net + } + if (b == Words) b = -1; + bi[a] = b; +- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); ++ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); + if (b == -1) + { + Console.Write("Out of dictionary word!\n"); +@@ -99,4 +99,4 @@ namespace Word2Vec.Net + public string Word { get; set; } + public float Distance { get; set; } + } +-} +\ No newline at end of file ++} +diff --git a/Word2Vec.Net/Properties/AssemblyInfo.cs b/Word2Vec.Net/Properties/AssemblyInfo.cs +deleted file mode 100644 +index 89452bf..0000000 +--- a/Word2Vec.Net/Properties/AssemblyInfo.cs ++++ /dev/null +@@ -1,36 +0,0 @@ +-using System.Reflection; +-using System.Runtime.CompilerServices; +-using System.Runtime.InteropServices; +- +-// General Information about an assembly is controlled through the following +-// set of attributes. Change these attribute values to modify the information +-// associated with an assembly. +-[assembly: AssemblyTitle("Word2Vec.Net")] +-[assembly: AssemblyDescription("")] +-[assembly: AssemblyConfiguration("")] +-[assembly: AssemblyCompany("")] +-[assembly: AssemblyProduct("Word2Vec.Net")] +-[assembly: AssemblyCopyright("Copyright © 2015")] +-[assembly: AssemblyTrademark("")] +-[assembly: AssemblyCulture("")] +- +-// Setting ComVisible to false makes the types in this assembly not visible +-// to COM components. If you need to access a type in this assembly from +-// COM, set the ComVisible attribute to true on that type. +-[assembly: ComVisible(false)] +- +-// The following GUID is for the ID of the typelib if this project is exposed to COM +-[assembly: Guid("b2bcc46d-a28b-40a4-a873-f0b1ffe65181")] +- +-// Version information for an assembly consists of the following four values: +-// +-// Major Version +-// Minor Version +-// Build Number +-// Revision +-// +-// You can specify all the values or you can default the Build and Revision Numbers +-// by using the '*' as shown below: +-// [assembly: AssemblyVersion("1.0.*")] +-[assembly: AssemblyVersion("1.0.0.0")] +-[assembly: AssemblyFileVersion("1.0.0.0")] +diff --git a/Word2Vec.Net/Word2Vec.Net.csproj b/Word2Vec.Net/Word2Vec.Net.csproj +index ee3ddb9..52cc678 100644 +--- a/Word2Vec.Net/Word2Vec.Net.csproj ++++ b/Word2Vec.Net/Word2Vec.Net.csproj +@@ -1,62 +1,26 @@ +- +- +- ++ ++ + +- Debug +- AnyCPU +- {FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F} +- Library +- Properties +- Word2Vec.Net ++ Word2Vec.Net ++ netcoreapp2.1 ++ $(DefineConstants);DEMO ++ true ++ true + Word2Vec.Net +- v4.5 +- 512 ++ library ++ ++ ++ false ++ ++ ++ $(WORD2VEC_FRAMEWORK_VERSION) + +- +- true +- full +- false +- bin\Debug\ +- DEBUG;TRACE +- prompt +- 4 +- bin\Debug\Word2Vec.Net.XML +- AnyCPU ++ ++ ++ 2.1.0-* + +- +- pdbonly +- true +- bin\Release\ +- TRACE +- prompt +- 4 ++ ++ ++ $(DefineConstants);RELEASE + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +\ No newline at end of file ++ +diff --git a/Word2Vec.Net/Word2Vec.cs b/Word2Vec.Net/Word2Vec.cs +index 968bf88..4142c7b 100644 +--- a/Word2Vec.Net/Word2Vec.cs ++++ b/Word2Vec.Net/Word2Vec.cs +@@ -57,7 +57,7 @@ namespace Word2Vec.Net + private const int TableSize = (int) 1e8; + private int[] _table; + +- internal Word2Vec( ++ public Word2Vec( + string trainFileName, + string outPutfileName, + string saveVocabFileName, +@@ -186,7 +186,7 @@ namespace Word2Vec.Net + for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; + int size = _vocabSize; + _trainWords = 0; +- for (var a = 0; a < size; a++) ++ /*for (var a = 0; a < size; a++) + { + // Words occuring less than min_count times will be discarded from the vocab + if (_vocab[a].Cn < _minCount && (a != 0)) +@@ -203,7 +203,7 @@ namespace Word2Vec.Net + _trainWords += _vocab[a].Cn; + } + } +- Array.Resize(ref _vocab, _vocabSize + 1); ++ Array.Resize(ref _vocab, _vocabSize + 1);*/ + + // Allocate memory for the binary tree construction + for (var a = 0; a < _vocabSize; a++) +@@ -331,56 +331,48 @@ namespace Word2Vec.Net + + private void LearnVocabFromTrainFile() + { +- int i; +- for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; +- using (var fin = File.OpenText(_trainFile)) ++ int i; ++ for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; ++ string[] fin = System.IO.File.ReadAllLines(_trainFile); ++ _vocabSize = 0; ++ ++ Regex regex = new Regex("\\s"); ++ AddWordToVocab(""); ++ foreach (string line in fin) ++ { ++ string[] words = regex.Split(line); ++ ++ foreach (var word in words) + { +- if (fin == StreamReader.Null) ++ if(string.IsNullOrWhiteSpace(word)) continue; ++ _trainWords++; ++ if ((_debugMode > 1) && (_trainWords%100000 == 0)) + { +- throw new InvalidOperationException("ERROR: training data file not found!\n"); ++ Console.Write("{0}K \r", _trainWords/1000); ++ //printf("%lldK%c", train_words / 1000, 13); ++ //fflush(stdout); + } +- _vocabSize = 0; +- +- string line; +- Regex regex = new Regex("\\s"); +- AddWordToVocab(""); +- while ((line = fin.ReadLine()) != null) +- { +- if (fin.EndOfStream) break; +- string[] words = regex.Split(line); +- +- foreach (var word in words) +- { +- if(string.IsNullOrWhiteSpace(word)) continue; +- _trainWords++; +- if ((_debugMode > 1) && (_trainWords%100000 == 0)) +- { +- Console.Write("{0}K \r", _trainWords/1000); +- //printf("%lldK%c", train_words / 1000, 13); +- //fflush(stdout); +- } +- i = SearchVocab(word); +- if (i == -1) ++ i = SearchVocab(word); ++ if (i == -1) + { + var a = AddWordToVocab(word); + _vocab[a].Cn = 1; + } +- else +- _vocab[i].Cn++; +- if (_vocabSize > VocabHashSize*0.7) +- ReduceVocab(); +- } +- } +- SortVocab(); +- if (_debugMode > 0) +- { +- Console.WriteLine("Vocab size: {0}", _vocabSize); +- Console.WriteLine("Words in train file: {0}", _trainWords); +- } +- //file_size = ftell(fin); +- _fileSize = new FileInfo(_trainFile).Length; ++ else ++ _vocab[i].Cn++; ++ if (_vocabSize > VocabHashSize*0.7) ++ ReduceVocab(); + } + } ++ SortVocab(); ++ if (_debugMode > 0) ++ { ++ Console.WriteLine("Vocab size: {0}", _vocabSize); ++ Console.WriteLine("Words in train file: {0}", _trainWords); ++ } ++ //file_size = ftell(fin); ++ _fileSize = new FileInfo(_trainFile).Length; ++ } + + private void SaveVocab() + { +diff --git a/Word2Vec.Net/WordAnalogy.cs b/Word2Vec.Net/WordAnalogy.cs +index eaa35bf..8347c0f 100644 +--- a/Word2Vec.Net/WordAnalogy.cs ++++ b/Word2Vec.Net/WordAnalogy.cs +@@ -24,7 +24,7 @@ namespace Word2Vec.Net + for (b = 0; b < Words; b++) if (!new string(Vocab, (int)(b * max_w), (int)max_w).Equals(st[a])) break; + if (b == Words) b = -1; + bi[a] = b; +- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); ++ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); + if (b == -1) + { + Console.Write("Out of dictionary word!\n"); +diff --git a/Word2VecScenario/App.config b/Word2VecScenario/App.config +new file mode 100644 +index 0000000..e8482b1 +--- /dev/null ++++ b/Word2VecScenario/App.config +@@ -0,0 +1,12 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +\ No newline at end of file +diff --git a/Word2VecScenario/Corpus.txt.ReadMe.txt b/Word2VecScenario/Corpus.txt.ReadMe.txt +new file mode 100644 +index 0000000..82c04e5 +--- /dev/null ++++ b/Word2VecScenario/Corpus.txt.ReadMe.txt +@@ -0,0 +1,5 @@ ++Please download and rename the following file: ++ ++http://mattmahoney.net/dc/text8.zip ++ ++Renaming the file inside the zip to: Corpus.txt - In place of this File! +diff --git a/Word2VecScenario/Program.cs b/Word2VecScenario/Program.cs +new file mode 100644 +index 0000000..7e9ad31 +--- /dev/null ++++ b/Word2VecScenario/Program.cs +@@ -0,0 +1,161 @@ ++namespace Word2VecScenario ++{ ++ using System; ++ using System.Diagnostics; ++ using System.Linq; ++ using Word2Vec.Net; ++ ++ class Program ++ { ++ static string path = @"Word2VectorOutputFile.bin"; ++ static Distance distance = null; ++ static WordAnalogy wordAnalogy = null; ++ ++ static void Main(string[] args) ++ { ++ // -train Use text data from to train the model ++ string train = "Corpus.txt"; ++ ++ // -output Use to save the resulting word vectors / word clusters ++ string output = "Vectors.bin"; ++ ++ // -save-vocab The vocabulary will be saved to ++ string savevocab = ""; ++ ++ // -read-vocab The vocabulary will be read from , not constructed from the training data ++ string readvocab = ""; ++ ++ // -size Set size of word vectors; default is 100 ++ int size = 100; ++ ++ // -debug Set the debug mode (default = 2 = more info during training) ++ int debug = 1; ++ ++ // -binary Save the resulting vectors in binary moded; default is 0 (off) ++ int binary = 1; ++ ++ // -cbow Use the continuous bag of words model; default is 1 (use 0 for skip-gram model) ++ int cbow = 1; ++ ++ // -alpha Set the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW ++ float alpha = 0.05f; ++ ++ // -sample Set threshold for occurrence of words. Those that appear with higher frequency in the training data ++ float sample = 1e-4f; ++ ++ // -hs Use Hierarchical Softmax; default is 0 (not used) ++ int hs = 0; ++ ++ // -negative Number of negative examples; default is 5, common values are 3 - 10 (0 = not used) ++ int negative = 5; ++ ++ // -threads Use threads (default 12) ++ int threads = 12; ++ ++ // -iter Run more training iterations (default 5) ++ long iter = 15; ++ ++ // -min-count This will discard words that appear less than times; default is 5 ++ int mincount = 5; ++ ++ // -classes Output word classes rather than word vectors; default number of classes is 0 (vectors are written) ++ long classes = 0; ++ ++ // -window Set max skip length between words; default is 5 ++ int window = 12; ++ ++ Word2Vec word2Vec = new Word2Vec(train, output, savevocab, readvocab, size, debug, binary, cbow, alpha, sample, hs, negative, threads, iter, mincount, classes, window); ++ ++ var totalTime = Stopwatch.StartNew(); ++ var highRes = Stopwatch.IsHighResolution; ++ ++ word2Vec.TrainModel(); ++ ++ totalTime.Stop(); ++ ++ var trainingTime = totalTime.ElapsedMilliseconds; ++ Console.WriteLine("Training took {0}ms", trainingTime); ++ ++ path = @"Vectors.bin"; ++ distance = new Distance(path); ++ wordAnalogy = new WordAnalogy(path); ++ ++ string[] wordList = new string[] {"paris france madrid" }; ++ ++ var searchTime = Stopwatch.StartNew(); ++ ++ foreach (string word in wordList) ++ { ++ distance.Search(word); ++ wordAnalogy.Search(word); ++ } ++ ++ searchTime.Stop(); ++ var firstSearchTime = searchTime.ElapsedMilliseconds; ++ Console.WriteLine("Search took {0}ms", firstSearchTime); ++ ++ int outerN = 5; ++ ++ for (int outer = 0; outer < outerN; outer++) ++ { ++ foreach (string word in wordList) ++ { ++ int N = 11; ++ var minSearchTime = long.MaxValue; ++ var maxSearchTime = long.MinValue; ++ long[] searchTimes = new long[N]; ++ ++ Console.WriteLine($"Batch {outer}, searching {word}: running {N} searches"); ++ ++ for (int inner = 0; inner < N; inner++) ++ { ++ searchTime.Restart(); ++ distance.Search(word); ++ BestWord[] result = wordAnalogy.Search(word); ++ searchTime.Stop(); ++ ++ /*foreach (var bestWord in result) ++ { ++ Console.WriteLine("{0}\t\t{1}", bestWord.Word, bestWord.Distance); ++ }*/ ++ ++ long interval = highRes ? searchTime.ElapsedTicks : searchTime.ElapsedMilliseconds; ++ searchTimes[inner] = interval; ++ ++ if (interval < minSearchTime) ++ { ++ minSearchTime = interval; ++ } ++ if (interval > maxSearchTime) ++ { ++ maxSearchTime = interval; ++ } ++ } ++ ++ if (highRes) ++ { ++ double averageSearch = 1000 * ((double)searchTimes.Sum() / N / Stopwatch.Frequency); ++ double medianSearch = 1000 * ((double)searchTimes.OrderBy(t => t).ElementAt(N / 2) / Stopwatch.Frequency); ++ Console.WriteLine("Steadystate min search time: {0:F2}ms", (1000 * minSearchTime) / Stopwatch.Frequency); ++ Console.WriteLine("Steadystate max search time: {0:F2}ms", (1000 * maxSearchTime) / Stopwatch.Frequency); ++ Console.WriteLine("Steadystate average search time: {0:F2}ms", averageSearch); ++ Console.WriteLine("Steadystate median search time: {0:F2}ms", medianSearch); ++ } ++ else ++ { ++ long averageSearch = searchTimes.Sum() / N; ++ long medianSearch = searchTimes.OrderBy(t => t).ElementAt(N / 2); ++ Console.WriteLine("Steadystate min search time: {0}ms", minSearchTime); ++ Console.WriteLine("Steadystate max search time: {0}ms", maxSearchTime); ++ Console.WriteLine("Steadystate average search time: {0}ms", (int)averageSearch); ++ Console.WriteLine("Steadystate median search time: {0}ms", (int)medianSearch); ++ } ++ ++ Console.WriteLine(""); ++ } ++ } ++ } ++ ++ } ++ ++} +diff --git a/Word2VecScenario/Word2VecScenario.csproj b/Word2VecScenario/Word2VecScenario.csproj +new file mode 100644 +index 0000000..cacd48a +--- /dev/null ++++ b/Word2VecScenario/Word2VecScenario.csproj +@@ -0,0 +1,34 @@ ++ ++ ++ ++ Test for Word2Vec ++ netcoreapp2.1 ++ $(DefineConstants);DEMO ++ true ++ true ++ Word2VecScenario ++ Exe ++ ++ ++ false ++ ++ ++ $(WORD2VEC_FRAMEWORK_VERSION) ++ ++ ++ ++ 2.1.0-* ++ ++ ++ ++ $(DefineConstants);RELEASE ++ ++ ++ ++ ++ Word2Vec.Net ++ ++ ++ ++ ++ +diff --git a/build/common.props b/build/common.props +new file mode 100644 +index 0000000..36d884c +--- /dev/null ++++ b/build/common.props +@@ -0,0 +1,5 @@ ++ ++ ++ ++ ++ +diff --git a/build/dependencies.props b/build/dependencies.props +new file mode 100644 +index 0000000..95d79b3 +--- /dev/null ++++ b/build/dependencies.props +@@ -0,0 +1,5 @@ ++ ++ ++ 2.0.0-* ++ ++ diff --git a/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs b/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs index 206cd556da79..d47d12785167 100644 --- a/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs +++ b/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs @@ -84,7 +84,7 @@ BenchmarkRunResult MeasureIterations(TestRun run, BenchmarkConfiguration config, BenchmarkRunResult result = new BenchmarkRunResult(this, config); StringBuilder stderr = new StringBuilder(); StringBuilder stdout = new StringBuilder(); - var scenarioConfiguration = new ScenarioTestConfiguration(TimeSpan.FromMinutes(1), startInfo) + var scenarioConfiguration = new ScenarioTestConfiguration(TimeSpan.FromMinutes(20), startInfo) { //XUnitPerformanceHarness writes files to disk starting with {runid}-{ScenarioBenchmarkName}-{TestName} TestName = (Name + "-" + config.Name).Replace(' ', '_'), @@ -143,6 +143,7 @@ protected static void AddEtwData( "dotnet.exe", "MusicStore.dll", "AllReady.dll", + "Word2VecScenario.dll", "ntoskrnl.exe", "System.Private.CoreLib.dll", "Unknown", diff --git a/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs b/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs index 5e9efa2ffbcf..e5391cafd0ad 100644 --- a/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs +++ b/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs @@ -178,6 +178,9 @@ public static void DeleteDirectory(string path, ITestOutputHelper output) } try { + // On some systems, directories/files created programmatically are created with attributes + // that prevent them from being deleted. Set those attributes to be normal + SetAttributesNormal(path); Directory.Delete(path, true); return; } @@ -194,6 +197,18 @@ public static void DeleteDirectory(string path, ITestOutputHelper output) } } + public static void SetAttributesNormal(string path) + { + foreach (var subDir in Directory.GetDirectories(path)) + { + SetAttributesNormal(subDir); + } + foreach (var file in Directory.GetFiles(path)) + { + File.SetAttributes(file, FileAttributes.Normal); + } + } + public static void MoveDirectory(string sourceDirName, string destDirName, ITestOutputHelper output) { if (output != null) @@ -225,6 +240,37 @@ public static void MoveDirectory(string sourceDirName, string destDirName, ITest } } + public static void MoveFile(string sourceFileName, string destFileName, ITestOutputHelper output) + { + if (output != null) + { + output.WriteLine("Moving " + sourceFileName + " -> " + destFileName); + } + int retries = 10; + for (int i = 0; i < retries; i++) + { + if (!File.Exists(sourceFileName) && File.Exists(destFileName)) + { + return; + } + try + { + File.Move(sourceFileName, destFileName); + return; + } + catch (IOException e) when (i < retries - 1) + { + output.WriteLine($" Attempt #{i + 1} failed: {e.Message}"); + } + catch (UnauthorizedAccessException e) when (i < retries - 1) + { + output.WriteLine($" Attempt #{i + 1} failed: {e.Message}"); + } + // if something has a transient lock on the file waiting may resolve the issue + Thread.Sleep((i + 1) * 10); + } + } + public static void CreateDirectory(string path, ITestOutputHelper output) { output.WriteLine("Creating " + path); diff --git a/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs b/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs index 60d30e1af6df..467ba7dbf771 100644 --- a/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs +++ b/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs @@ -67,7 +67,7 @@ public ProcessRunner(string exePath, string arguments, string replayCommand = nu _p.StartInfo = psi; _p.EnableRaisingEvents = false; _loggers = new List(); - _timeout = TimeSpan.FromMinutes(10); + _timeout = TimeSpan.FromMinutes(60); _cancelSource = new CancellationTokenSource(); _killReason = null; _waitForProcessStartTaskSource = new TaskCompletionSource(); diff --git a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj index 009949ab1b39..74e633a32719 100644 --- a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj +++ b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj @@ -55,4 +55,10 @@ + + + + + +