diff --git a/.gitattributes b/.gitattributes index 27e670318539..4777297037cf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -75,4 +75,5 @@ tests/src/JIT/Performance/CodeQuality/BenchmarksGame/reverse-complement/revcomp- tests/src/JIT/Performance/CodeQuality/BenchmarksGame/reverse-complement/revcomp-input25000.txt text eol=lf tests/src/JIT/Performance/CodeQuality/BenchmarksGame/k-nucleotide/knucleotide-input.txt text eol=lf tests/src/JIT/Performance/CodeQuality/BenchmarksGame/k-nucleotide/knucleotide-input-big.txt text eol=lf +tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch text eol=lf diff --git a/tests/scripts/run-xunit-perf.py b/tests/scripts/run-xunit-perf.py index 3c1cb89dca37..18a0b8e446f8 100755 --- a/tests/scripts/run-xunit-perf.py +++ b/tests/scripts/run-xunit-perf.py @@ -182,9 +182,9 @@ def run_benchmark(benchname, benchdir, env, sandboxDir, benchmarkOutputDir, test myEnv = dict(env) benchnameWithExt = benchname + '.' + testFileExt fullPath = os.path.join(benchdir, benchnameWithExt) - shutil.copy2(fullPath, sandboxDir) - files = glob.iglob(os.path.join(benchdir, "*.txt")) + # Copy all files in the benchmark directory to the sandbox + files = glob.iglob(os.path.join(benchdir, "*.*")) for filename in files: if os.path.isfile(filename): shutil.copy2(filename, sandboxDir) diff --git a/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs b/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs new file mode 100644 index 000000000000..ce4a45d83fcc --- /dev/null +++ b/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs @@ -0,0 +1,256 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using System.Reflection; +using Microsoft.Xunit.Performance.Api; + +namespace JitBench +{ + class Word2VecBenchmark : MLBenchmark + { + public Word2VecBenchmark() : base("Word2Vec") { } + + protected override string ExecutableName => "Word2VecScenario.dll"; + + protected override string GetWord2VecNetSrcDirectory(string outputDir) + { + return Path.Combine(GetWord2VecNetRepoRootDir(outputDir), "Word2VecScenario"); + } + } + + abstract class MLBenchmark : Benchmark + { + private static readonly HashSet DefaultExitCodes = new HashSet(new[] { 0 }); + + public MLBenchmark(string name) : base(name) + { + ExePath = ExecutableName; + } + + protected abstract string ExecutableName { get; } + + public override async Task Setup(DotNetInstallation dotNetInstall, string outputDir, bool useExistingSetup, ITestOutputHelper output) + { + if(!useExistingSetup) + { + using (var setupSection = new IndentedTestOutputHelper("Setup " + Name, output)) + { + await CloneWord2VecNetRepo(outputDir, setupSection); + await Publish(dotNetInstall, outputDir, setupSection); + await DownloadAndExtractTextCorpus(dotNetInstall, outputDir, setupSection); + } + } + string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion); + WorkingDirPath = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + } + + async Task CloneWord2VecNetRepo(string outputDir, ITestOutputHelper output) + { + // If the repo already exists, we delete it and extract it again. + string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir); + FileTasks.DeleteDirectory(word2VecNetRepoRootDir, output); + + string word2VecPatchFullPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), Word2VecNetPatch); + + await ExecuteGitCommand($"clone {Word2VecNetRepoUrl} {word2VecNetRepoRootDir}", output); + await ExecuteGitCommand($"checkout {Word2VecNetCommitSha1Id}", output, workingDirectory: word2VecNetRepoRootDir); + await ExecuteGitCommand($"apply {word2VecPatchFullPath}", output, workingDirectory: word2VecNetRepoRootDir); + } + + async Task ExecuteGitCommand(string arguments, ITestOutputHelper output, string workingDirectory = null) + { + int exitCode = await new ProcessRunner("git", arguments).WithLog(output).WithWorkingDirectory(workingDirectory).Run(); + + if (!DefaultExitCodes.Contains(exitCode)) + throw new Exception($"git {arguments} has failed, the exit code was {exitCode}"); + } + + async Task DownloadAndExtractTextCorpus(DotNetInstallation dotNetInstall, string outputDir, ITestOutputHelper output) + { + // If the file already exists, exit + string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir); + string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion); + string word2VecNetPublishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + + // Download the corpus of text. This is a zip file that contains a text file of 100M of text from Wikipedia + var url = "http://mattmahoney.net/dc/text8.zip"; + await FileTasks.DownloadAndUnzip(url, word2VecNetRepoRootDir + "_temp", output); + + FileTasks.MoveFile(Path.Combine(word2VecNetRepoRootDir + "_temp", "text8"), + Path.Combine(word2VecNetPublishDir, "Corpus.txt"), output); + } + + private async Task Publish(DotNetInstallation dotNetInstall, string outputDir, ITestOutputHelper output) + { + string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion); + string publishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + if (publishDir != null) + { + FileTasks.DeleteDirectory(publishDir, output); + } + string dotNetExePath = dotNetInstall.DotNetExe; + await new ProcessRunner(dotNetExePath, $"publish -c Release -f {tfm}") + .WithWorkingDirectory(GetWord2VecNetSrcDirectory(outputDir)) + .WithEnvironmentVariable("DOTNET_MULTILEVEL_LOOKUP", "0") + .WithEnvironmentVariable("WORD2VEC_FRAMEWORK_VERSION", dotNetInstall.FrameworkVersion) + .WithEnvironmentVariable("UseSharedCompilation", "false") + .WithLog(output) + .Run(); + + publishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm); + if (publishDir == null) + { + throw new DirectoryNotFoundException("Could not find 'publish' directory"); + } + return publishDir; + } + + public override Metric[] GetDefaultDisplayMetrics() + { + return new Metric[] + { + TrainingMetric, + FirstSearchMetric, + MedianSearchMetric + }; + } + + protected override IterationResult RecordIterationMetrics(ScenarioExecutionResult scenarioIteration, string stdout, string stderr, ITestOutputHelper output) + { + IterationResult result = base.RecordIterationMetrics(scenarioIteration, stdout, stderr, output); + AddConsoleMetrics(result, stdout, output); + return result; + } + + void AddConsoleMetrics(IterationResult result, string stdout, ITestOutputHelper output) + { + output.WriteLine("Processing iteration results."); + + double? trainingTime = null; + double? firstSearchTime = null; + double? steadyStateMedianTime = null; + + using (var reader = new StringReader(stdout)) + { + string line; + while ((line = reader.ReadLine()) != null) + { + Match match = Regex.Match(line, @"^Training took \s*(\d+)ms$"); + if (match.Success && match.Groups.Count == 2) + { + trainingTime = Convert.ToDouble(match.Groups[1].Value); + continue; + } + + match = Regex.Match(line, @"^Search took \s*(\d+)ms$"); + if (match.Success && match.Groups.Count == 2) + { + firstSearchTime = Convert.ToDouble(match.Groups[1].Value); + continue; + } + + match = Regex.Match(line, @"^Steadystate median search time: \s*(\d+\.\d+)ms$"); + if (match.Success && match.Groups.Count == 2) + { + //many lines will match, but the final values of these variables will be from the last batch which is presumably the + //best measurement of steady state performance + steadyStateMedianTime = Convert.ToDouble(match.Groups[1].Value); + continue; + } + } + } + + if (!trainingTime.HasValue) + throw new FormatException("Training time was not found."); + if (!firstSearchTime.HasValue) + throw new FormatException("First Search time was not found."); + if (!steadyStateMedianTime.HasValue) + throw new FormatException("Steady state median response time not found."); + + + result.Measurements.Add(TrainingMetric, trainingTime.Value); + result.Measurements.Add(FirstSearchMetric, firstSearchTime.Value); + result.Measurements.Add(MedianSearchMetric, steadyStateMedianTime.Value); + + output.WriteLine($"Training took {trainingTime}ms"); + output.WriteLine($"Search took {firstSearchTime}ms"); + output.WriteLine($"Median steady state search {steadyStateMedianTime.Value}ms"); + } + + /// + /// When serializing the result data to benchview this is called to determine if any of the metrics should be reported differently + /// than they were collected. Both web apps use this to collect several measurements in each iteration, then present those measurements + /// to benchview as if each was the Duration metric of a distinct scenario test with its own set of iterations. + /// + public override bool TryGetBenchviewCustomMetricReporting(Metric originalMetric, out Metric newMetric, out string newScenarioModelName) + { + if(originalMetric.Equals(TrainingMetric)) + { + newScenarioModelName = "Training"; + } + else if (originalMetric.Equals(FirstSearchMetric)) + { + newScenarioModelName = "First Search"; + } + else if (originalMetric.Equals(MedianSearchMetric)) + { + newScenarioModelName = "Median Search"; + } + else + { + return base.TryGetBenchviewCustomMetricReporting(originalMetric, out newMetric, out newScenarioModelName); + } + newMetric = Metric.ElapsedTimeMilliseconds; + return true; + } + + protected static string GetWord2VecNetRepoRootDir(string outputDir) + { + return Path.Combine(outputDir, "W"); + } + + protected abstract string GetWord2VecNetSrcDirectory(string outputDir); + + string GetWord2VecNetPublishDirectory(DotNetInstallation dotNetInstall, string outputDir, string tfm) + { + string dir = Path.Combine(GetWord2VecNetSrcDirectory(outputDir), "bin", dotNetInstall.Architecture, "Release", tfm, "publish"); + if (Directory.Exists(dir)) + { + return dir; + } + + dir = Path.Combine(GetWord2VecNetSrcDirectory(outputDir), "bin", "Release", tfm, "publish"); + if (Directory.Exists(dir)) + { + return dir; + } + + return null; + } + + string GetCoreClrRoot() + { + string currentDirectory = Directory.GetCurrentDirectory(); + string workspace = Environment.GetEnvironmentVariable("CORECLR_REPO"); + if (workspace == null) + { + workspace = currentDirectory; + } + + return workspace; + } + + private const string Word2VecNetRepoUrl = "https://github.com/eabdullin/Word2Vec.Net"; + private const string Word2VecNetCommitSha1Id = "6012a2b5b886926918d51b1b56387d785115f448"; + private const string Word2VecNetPatch = "word2vecnet.patch"; + private const string EnvironmentFileName = "Word2VecNetEnvironment.txt"; + private const string StoreDirName = ".store"; + private readonly Metric TrainingMetric = new Metric("Training", "ms"); + private readonly Metric FirstSearchMetric = new Metric("First Search", "ms"); + private readonly Metric MedianSearchMetric = new Metric("Median Search", "ms"); + private readonly Metric MeanSearchMetric = new Metric("Mean Search", "ms"); + } +} + diff --git a/tests/src/performance/Scenario/JitBench/JitBench.csproj b/tests/src/performance/Scenario/JitBench/JitBench.csproj index 2e384d183ecc..0d1f4fb0dfcd 100644 --- a/tests/src/performance/Scenario/JitBench/JitBench.csproj +++ b/tests/src/performance/Scenario/JitBench/JitBench.csproj @@ -54,5 +54,8 @@ Overwrite="true" Encoding="Unicode"/> + + + diff --git a/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch new file mode 100644 index 000000000000..dbad57ba5f95 --- /dev/null +++ b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch @@ -0,0 +1,605 @@ +diff --git a/.gitignore b/.gitignore +index 8098fe2..7c82f99 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -17,7 +17,6 @@ + [Rr]eleases/ + x64/ + x86/ +-build/ + bld/ + [Bb]in/ + [Oo]bj/ +diff --git a/NuGet.config b/NuGet.config +new file mode 100644 +index 0000000..bd3a6f8 +--- /dev/null ++++ b/NuGet.config +@@ -0,0 +1,8 @@ ++ ++ ++ ++ ++ ++ ++ ++ +\ No newline at end of file +diff --git a/Word2LibConsole/Word2LibConsole.vcxproj b/Word2LibConsole/Word2LibConsole.vcxproj +index 2caa5a0..03b8ada 100644 +--- a/Word2LibConsole/Word2LibConsole.vcxproj ++++ b/Word2LibConsole/Word2LibConsole.vcxproj +@@ -1,5 +1,5 @@ +  +- ++ + + + Debug +@@ -14,19 +14,19 @@ + {9C719670-3571-4B68-A3DA-053B18C654A0} + Win32Proj + Word2LibConsole +- 8.1 ++ 10.0.16299.0 + + + + Application + true +- v140 ++ v141 + Unicode + + + Application + false +- v140 ++ v141 + true + Unicode + +diff --git a/Word2Vec.Net/Distance.cs b/Word2Vec.Net/Distance.cs +index f2c3cdc..32929cd 100644 +--- a/Word2Vec.Net/Distance.cs ++++ b/Word2Vec.Net/Distance.cs +@@ -46,7 +46,7 @@ namespace Word2Vec.Net + } + if (b == Words) b = -1; + bi[a] = b; +- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); ++ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); + if (b == -1) + { + Console.Write("Out of dictionary word!\n"); +@@ -99,4 +99,4 @@ namespace Word2Vec.Net + public string Word { get; set; } + public float Distance { get; set; } + } +-} +\ No newline at end of file ++} +diff --git a/Word2Vec.Net/Properties/AssemblyInfo.cs b/Word2Vec.Net/Properties/AssemblyInfo.cs +deleted file mode 100644 +index 89452bf..0000000 +--- a/Word2Vec.Net/Properties/AssemblyInfo.cs ++++ /dev/null +@@ -1,36 +0,0 @@ +-using System.Reflection; +-using System.Runtime.CompilerServices; +-using System.Runtime.InteropServices; +- +-// General Information about an assembly is controlled through the following +-// set of attributes. Change these attribute values to modify the information +-// associated with an assembly. +-[assembly: AssemblyTitle("Word2Vec.Net")] +-[assembly: AssemblyDescription("")] +-[assembly: AssemblyConfiguration("")] +-[assembly: AssemblyCompany("")] +-[assembly: AssemblyProduct("Word2Vec.Net")] +-[assembly: AssemblyCopyright("Copyright © 2015")] +-[assembly: AssemblyTrademark("")] +-[assembly: AssemblyCulture("")] +- +-// Setting ComVisible to false makes the types in this assembly not visible +-// to COM components. If you need to access a type in this assembly from +-// COM, set the ComVisible attribute to true on that type. +-[assembly: ComVisible(false)] +- +-// The following GUID is for the ID of the typelib if this project is exposed to COM +-[assembly: Guid("b2bcc46d-a28b-40a4-a873-f0b1ffe65181")] +- +-// Version information for an assembly consists of the following four values: +-// +-// Major Version +-// Minor Version +-// Build Number +-// Revision +-// +-// You can specify all the values or you can default the Build and Revision Numbers +-// by using the '*' as shown below: +-// [assembly: AssemblyVersion("1.0.*")] +-[assembly: AssemblyVersion("1.0.0.0")] +-[assembly: AssemblyFileVersion("1.0.0.0")] +diff --git a/Word2Vec.Net/Word2Vec.Net.csproj b/Word2Vec.Net/Word2Vec.Net.csproj +index ee3ddb9..52cc678 100644 +--- a/Word2Vec.Net/Word2Vec.Net.csproj ++++ b/Word2Vec.Net/Word2Vec.Net.csproj +@@ -1,62 +1,26 @@ +- +- +- ++ ++ + +- Debug +- AnyCPU +- {FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F} +- Library +- Properties +- Word2Vec.Net ++ Word2Vec.Net ++ netcoreapp2.1 ++ $(DefineConstants);DEMO ++ true ++ true + Word2Vec.Net +- v4.5 +- 512 ++ library ++ ++ ++ false ++ ++ ++ $(WORD2VEC_FRAMEWORK_VERSION) + +- +- true +- full +- false +- bin\Debug\ +- DEBUG;TRACE +- prompt +- 4 +- bin\Debug\Word2Vec.Net.XML +- AnyCPU ++ ++ ++ 2.1.0-* + +- +- pdbonly +- true +- bin\Release\ +- TRACE +- prompt +- 4 ++ ++ ++ $(DefineConstants);RELEASE + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +\ No newline at end of file ++ +diff --git a/Word2Vec.Net/Word2Vec.cs b/Word2Vec.Net/Word2Vec.cs +index 968bf88..4142c7b 100644 +--- a/Word2Vec.Net/Word2Vec.cs ++++ b/Word2Vec.Net/Word2Vec.cs +@@ -57,7 +57,7 @@ namespace Word2Vec.Net + private const int TableSize = (int) 1e8; + private int[] _table; + +- internal Word2Vec( ++ public Word2Vec( + string trainFileName, + string outPutfileName, + string saveVocabFileName, +@@ -186,7 +186,7 @@ namespace Word2Vec.Net + for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; + int size = _vocabSize; + _trainWords = 0; +- for (var a = 0; a < size; a++) ++ /*for (var a = 0; a < size; a++) + { + // Words occuring less than min_count times will be discarded from the vocab + if (_vocab[a].Cn < _minCount && (a != 0)) +@@ -203,7 +203,7 @@ namespace Word2Vec.Net + _trainWords += _vocab[a].Cn; + } + } +- Array.Resize(ref _vocab, _vocabSize + 1); ++ Array.Resize(ref _vocab, _vocabSize + 1);*/ + + // Allocate memory for the binary tree construction + for (var a = 0; a < _vocabSize; a++) +@@ -331,56 +331,48 @@ namespace Word2Vec.Net + + private void LearnVocabFromTrainFile() + { +- int i; +- for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; +- using (var fin = File.OpenText(_trainFile)) ++ int i; ++ for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; ++ string[] fin = System.IO.File.ReadAllLines(_trainFile); ++ _vocabSize = 0; ++ ++ Regex regex = new Regex("\\s"); ++ AddWordToVocab(""); ++ foreach (string line in fin) ++ { ++ string[] words = regex.Split(line); ++ ++ foreach (var word in words) + { +- if (fin == StreamReader.Null) ++ if(string.IsNullOrWhiteSpace(word)) continue; ++ _trainWords++; ++ if ((_debugMode > 1) && (_trainWords%100000 == 0)) + { +- throw new InvalidOperationException("ERROR: training data file not found!\n"); ++ Console.Write("{0}K \r", _trainWords/1000); ++ //printf("%lldK%c", train_words / 1000, 13); ++ //fflush(stdout); + } +- _vocabSize = 0; +- +- string line; +- Regex regex = new Regex("\\s"); +- AddWordToVocab(""); +- while ((line = fin.ReadLine()) != null) +- { +- if (fin.EndOfStream) break; +- string[] words = regex.Split(line); +- +- foreach (var word in words) +- { +- if(string.IsNullOrWhiteSpace(word)) continue; +- _trainWords++; +- if ((_debugMode > 1) && (_trainWords%100000 == 0)) +- { +- Console.Write("{0}K \r", _trainWords/1000); +- //printf("%lldK%c", train_words / 1000, 13); +- //fflush(stdout); +- } +- i = SearchVocab(word); +- if (i == -1) ++ i = SearchVocab(word); ++ if (i == -1) + { + var a = AddWordToVocab(word); + _vocab[a].Cn = 1; + } +- else +- _vocab[i].Cn++; +- if (_vocabSize > VocabHashSize*0.7) +- ReduceVocab(); +- } +- } +- SortVocab(); +- if (_debugMode > 0) +- { +- Console.WriteLine("Vocab size: {0}", _vocabSize); +- Console.WriteLine("Words in train file: {0}", _trainWords); +- } +- //file_size = ftell(fin); +- _fileSize = new FileInfo(_trainFile).Length; ++ else ++ _vocab[i].Cn++; ++ if (_vocabSize > VocabHashSize*0.7) ++ ReduceVocab(); + } + } ++ SortVocab(); ++ if (_debugMode > 0) ++ { ++ Console.WriteLine("Vocab size: {0}", _vocabSize); ++ Console.WriteLine("Words in train file: {0}", _trainWords); ++ } ++ //file_size = ftell(fin); ++ _fileSize = new FileInfo(_trainFile).Length; ++ } + + private void SaveVocab() + { +diff --git a/Word2Vec.Net/WordAnalogy.cs b/Word2Vec.Net/WordAnalogy.cs +index eaa35bf..8347c0f 100644 +--- a/Word2Vec.Net/WordAnalogy.cs ++++ b/Word2Vec.Net/WordAnalogy.cs +@@ -24,7 +24,7 @@ namespace Word2Vec.Net + for (b = 0; b < Words; b++) if (!new string(Vocab, (int)(b * max_w), (int)max_w).Equals(st[a])) break; + if (b == Words) b = -1; + bi[a] = b; +- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); ++ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); + if (b == -1) + { + Console.Write("Out of dictionary word!\n"); +diff --git a/Word2VecScenario/App.config b/Word2VecScenario/App.config +new file mode 100644 +index 0000000..e8482b1 +--- /dev/null ++++ b/Word2VecScenario/App.config +@@ -0,0 +1,12 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +\ No newline at end of file +diff --git a/Word2VecScenario/Corpus.txt.ReadMe.txt b/Word2VecScenario/Corpus.txt.ReadMe.txt +new file mode 100644 +index 0000000..82c04e5 +--- /dev/null ++++ b/Word2VecScenario/Corpus.txt.ReadMe.txt +@@ -0,0 +1,5 @@ ++Please download and rename the following file: ++ ++http://mattmahoney.net/dc/text8.zip ++ ++Renaming the file inside the zip to: Corpus.txt - In place of this File! +diff --git a/Word2VecScenario/Program.cs b/Word2VecScenario/Program.cs +new file mode 100644 +index 0000000..7e9ad31 +--- /dev/null ++++ b/Word2VecScenario/Program.cs +@@ -0,0 +1,161 @@ ++namespace Word2VecScenario ++{ ++ using System; ++ using System.Diagnostics; ++ using System.Linq; ++ using Word2Vec.Net; ++ ++ class Program ++ { ++ static string path = @"Word2VectorOutputFile.bin"; ++ static Distance distance = null; ++ static WordAnalogy wordAnalogy = null; ++ ++ static void Main(string[] args) ++ { ++ // -train Use text data from to train the model ++ string train = "Corpus.txt"; ++ ++ // -output Use to save the resulting word vectors / word clusters ++ string output = "Vectors.bin"; ++ ++ // -save-vocab The vocabulary will be saved to ++ string savevocab = ""; ++ ++ // -read-vocab The vocabulary will be read from , not constructed from the training data ++ string readvocab = ""; ++ ++ // -size Set size of word vectors; default is 100 ++ int size = 100; ++ ++ // -debug Set the debug mode (default = 2 = more info during training) ++ int debug = 1; ++ ++ // -binary Save the resulting vectors in binary moded; default is 0 (off) ++ int binary = 1; ++ ++ // -cbow Use the continuous bag of words model; default is 1 (use 0 for skip-gram model) ++ int cbow = 1; ++ ++ // -alpha Set the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW ++ float alpha = 0.05f; ++ ++ // -sample Set threshold for occurrence of words. Those that appear with higher frequency in the training data ++ float sample = 1e-4f; ++ ++ // -hs Use Hierarchical Softmax; default is 0 (not used) ++ int hs = 0; ++ ++ // -negative Number of negative examples; default is 5, common values are 3 - 10 (0 = not used) ++ int negative = 5; ++ ++ // -threads Use threads (default 12) ++ int threads = 12; ++ ++ // -iter Run more training iterations (default 5) ++ long iter = 15; ++ ++ // -min-count This will discard words that appear less than times; default is 5 ++ int mincount = 5; ++ ++ // -classes Output word classes rather than word vectors; default number of classes is 0 (vectors are written) ++ long classes = 0; ++ ++ // -window Set max skip length between words; default is 5 ++ int window = 12; ++ ++ Word2Vec word2Vec = new Word2Vec(train, output, savevocab, readvocab, size, debug, binary, cbow, alpha, sample, hs, negative, threads, iter, mincount, classes, window); ++ ++ var totalTime = Stopwatch.StartNew(); ++ var highRes = Stopwatch.IsHighResolution; ++ ++ word2Vec.TrainModel(); ++ ++ totalTime.Stop(); ++ ++ var trainingTime = totalTime.ElapsedMilliseconds; ++ Console.WriteLine("Training took {0}ms", trainingTime); ++ ++ path = @"Vectors.bin"; ++ distance = new Distance(path); ++ wordAnalogy = new WordAnalogy(path); ++ ++ string[] wordList = new string[] {"paris france madrid" }; ++ ++ var searchTime = Stopwatch.StartNew(); ++ ++ foreach (string word in wordList) ++ { ++ distance.Search(word); ++ wordAnalogy.Search(word); ++ } ++ ++ searchTime.Stop(); ++ var firstSearchTime = searchTime.ElapsedMilliseconds; ++ Console.WriteLine("Search took {0}ms", firstSearchTime); ++ ++ int outerN = 5; ++ ++ for (int outer = 0; outer < outerN; outer++) ++ { ++ foreach (string word in wordList) ++ { ++ int N = 11; ++ var minSearchTime = long.MaxValue; ++ var maxSearchTime = long.MinValue; ++ long[] searchTimes = new long[N]; ++ ++ Console.WriteLine($"Batch {outer}, searching {word}: running {N} searches"); ++ ++ for (int inner = 0; inner < N; inner++) ++ { ++ searchTime.Restart(); ++ distance.Search(word); ++ BestWord[] result = wordAnalogy.Search(word); ++ searchTime.Stop(); ++ ++ /*foreach (var bestWord in result) ++ { ++ Console.WriteLine("{0}\t\t{1}", bestWord.Word, bestWord.Distance); ++ }*/ ++ ++ long interval = highRes ? searchTime.ElapsedTicks : searchTime.ElapsedMilliseconds; ++ searchTimes[inner] = interval; ++ ++ if (interval < minSearchTime) ++ { ++ minSearchTime = interval; ++ } ++ if (interval > maxSearchTime) ++ { ++ maxSearchTime = interval; ++ } ++ } ++ ++ if (highRes) ++ { ++ double averageSearch = 1000 * ((double)searchTimes.Sum() / N / Stopwatch.Frequency); ++ double medianSearch = 1000 * ((double)searchTimes.OrderBy(t => t).ElementAt(N / 2) / Stopwatch.Frequency); ++ Console.WriteLine("Steadystate min search time: {0:F2}ms", (1000 * minSearchTime) / Stopwatch.Frequency); ++ Console.WriteLine("Steadystate max search time: {0:F2}ms", (1000 * maxSearchTime) / Stopwatch.Frequency); ++ Console.WriteLine("Steadystate average search time: {0:F2}ms", averageSearch); ++ Console.WriteLine("Steadystate median search time: {0:F2}ms", medianSearch); ++ } ++ else ++ { ++ long averageSearch = searchTimes.Sum() / N; ++ long medianSearch = searchTimes.OrderBy(t => t).ElementAt(N / 2); ++ Console.WriteLine("Steadystate min search time: {0}ms", minSearchTime); ++ Console.WriteLine("Steadystate max search time: {0}ms", maxSearchTime); ++ Console.WriteLine("Steadystate average search time: {0}ms", (int)averageSearch); ++ Console.WriteLine("Steadystate median search time: {0}ms", (int)medianSearch); ++ } ++ ++ Console.WriteLine(""); ++ } ++ } ++ } ++ ++ } ++ ++} +diff --git a/Word2VecScenario/Word2VecScenario.csproj b/Word2VecScenario/Word2VecScenario.csproj +new file mode 100644 +index 0000000..cacd48a +--- /dev/null ++++ b/Word2VecScenario/Word2VecScenario.csproj +@@ -0,0 +1,34 @@ ++ ++ ++ ++ Test for Word2Vec ++ netcoreapp2.1 ++ $(DefineConstants);DEMO ++ true ++ true ++ Word2VecScenario ++ Exe ++ ++ ++ false ++ ++ ++ $(WORD2VEC_FRAMEWORK_VERSION) ++ ++ ++ ++ 2.1.0-* ++ ++ ++ ++ $(DefineConstants);RELEASE ++ ++ ++ ++ ++ Word2Vec.Net ++ ++ ++ ++ ++ +diff --git a/build/common.props b/build/common.props +new file mode 100644 +index 0000000..36d884c +--- /dev/null ++++ b/build/common.props +@@ -0,0 +1,5 @@ ++ ++ ++ ++ ++ +diff --git a/build/dependencies.props b/build/dependencies.props +new file mode 100644 +index 0000000..95d79b3 +--- /dev/null ++++ b/build/dependencies.props +@@ -0,0 +1,5 @@ ++ ++ ++ 2.0.0-* ++ ++ diff --git a/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs b/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs index 206cd556da79..d47d12785167 100644 --- a/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs +++ b/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs @@ -84,7 +84,7 @@ BenchmarkRunResult MeasureIterations(TestRun run, BenchmarkConfiguration config, BenchmarkRunResult result = new BenchmarkRunResult(this, config); StringBuilder stderr = new StringBuilder(); StringBuilder stdout = new StringBuilder(); - var scenarioConfiguration = new ScenarioTestConfiguration(TimeSpan.FromMinutes(1), startInfo) + var scenarioConfiguration = new ScenarioTestConfiguration(TimeSpan.FromMinutes(20), startInfo) { //XUnitPerformanceHarness writes files to disk starting with {runid}-{ScenarioBenchmarkName}-{TestName} TestName = (Name + "-" + config.Name).Replace(' ', '_'), @@ -143,6 +143,7 @@ protected static void AddEtwData( "dotnet.exe", "MusicStore.dll", "AllReady.dll", + "Word2VecScenario.dll", "ntoskrnl.exe", "System.Private.CoreLib.dll", "Unknown", diff --git a/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs b/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs index 5e9efa2ffbcf..e5391cafd0ad 100644 --- a/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs +++ b/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs @@ -178,6 +178,9 @@ public static void DeleteDirectory(string path, ITestOutputHelper output) } try { + // On some systems, directories/files created programmatically are created with attributes + // that prevent them from being deleted. Set those attributes to be normal + SetAttributesNormal(path); Directory.Delete(path, true); return; } @@ -194,6 +197,18 @@ public static void DeleteDirectory(string path, ITestOutputHelper output) } } + public static void SetAttributesNormal(string path) + { + foreach (var subDir in Directory.GetDirectories(path)) + { + SetAttributesNormal(subDir); + } + foreach (var file in Directory.GetFiles(path)) + { + File.SetAttributes(file, FileAttributes.Normal); + } + } + public static void MoveDirectory(string sourceDirName, string destDirName, ITestOutputHelper output) { if (output != null) @@ -225,6 +240,37 @@ public static void MoveDirectory(string sourceDirName, string destDirName, ITest } } + public static void MoveFile(string sourceFileName, string destFileName, ITestOutputHelper output) + { + if (output != null) + { + output.WriteLine("Moving " + sourceFileName + " -> " + destFileName); + } + int retries = 10; + for (int i = 0; i < retries; i++) + { + if (!File.Exists(sourceFileName) && File.Exists(destFileName)) + { + return; + } + try + { + File.Move(sourceFileName, destFileName); + return; + } + catch (IOException e) when (i < retries - 1) + { + output.WriteLine($" Attempt #{i + 1} failed: {e.Message}"); + } + catch (UnauthorizedAccessException e) when (i < retries - 1) + { + output.WriteLine($" Attempt #{i + 1} failed: {e.Message}"); + } + // if something has a transient lock on the file waiting may resolve the issue + Thread.Sleep((i + 1) * 10); + } + } + public static void CreateDirectory(string path, ITestOutputHelper output) { output.WriteLine("Creating " + path); diff --git a/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs b/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs index 60d30e1af6df..467ba7dbf771 100644 --- a/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs +++ b/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs @@ -67,7 +67,7 @@ public ProcessRunner(string exePath, string arguments, string replayCommand = nu _p.StartInfo = psi; _p.EnableRaisingEvents = false; _loggers = new List(); - _timeout = TimeSpan.FromMinutes(10); + _timeout = TimeSpan.FromMinutes(60); _cancelSource = new CancellationTokenSource(); _killReason = null; _waitForProcessStartTaskSource = new TaskCompletionSource(); diff --git a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj index 009949ab1b39..74e633a32719 100644 --- a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj +++ b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj @@ -55,4 +55,10 @@ + + + + + +