Skip to content

Commit

Permalink
feat: Chnaged Node children from list to array
Browse files Browse the repository at this point in the history
  • Loading branch information
kirill-iag committed Mar 29, 2024
1 parent ce611a3 commit 58a2cfb
Show file tree
Hide file tree
Showing 8 changed files with 370,198 additions and 72 deletions.
31 changes: 16 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Trie
------
**Trie** (a.k.a. prefix tree) is an ordered tree data structure that is used to store an associative array where the keys are usually strings. All the descendants of a node have a common prefix of the string associated with that node, and the root is associated with the empty string.
*Reference*: [Wikipedia – trie](http://en.wikipedia.org/wiki/Trie)
*Reference*: [Wikipedia](http://en.wikipedia.org/wiki/Trie)

[![CI Build](https://github.com/kpol/trie/workflows/CI%20Build/badge.svg)](https://github.com/kpol/trie/actions?query=workflow%3A%22CI+Build%22)
[![Nuget](https://img.shields.io/nuget/v/KTrie.svg?logo=nuget)](https://www.nuget.org/packages/KTrie)
Expand Down Expand Up @@ -67,20 +67,21 @@ There are two overloads of the `StartsWith` method:

Benchmark tests
------
For performance tests I used 58110 English words of length from 2 to 22 chars. The table below shows prefix lookup time comparing to the Linq `Where` and `string.StartsWith`. Number of prefixes: 10

| Method | Mean | Error | StdDev | Allocated |
|------------------------------- |--------------:|------------:|------------:|----------:|
| Trie_StartsWith | 1,663.334 us | 25.0298 us | 22.1883 us | 782258 B |
| LinqSimple_StartsWith | 17,899.727 us | 178.2255 us | 157.9923 us | 675940 B |
| Linq_StartsWith | 1,880.081 us | 22.4351 us | 20.9858 us | 676893 B |
| Linq_DictionaryWithAllPrefixes | 775.352 us | 7.5212 us | 6.6673 us | 673053 B |
| Trie_Matches | 5.389 us | 0.0623 us | 0.0583 us | 9096 B |
| Trie_PatternStartsWith | 10.924 us | 0.2181 us | 0.4455 us | 14896 B |
| String_PatternMatching | 116.097 us | 2.0039 us | 2.6057 us | 416 B |
| String_PrefixPatternMatching | 108.479 us | 1.8731 us | 1.7521 us | 3432 B |
| Regex_PatternMatching | 4,410.587 us | 87.8454 us | 90.2107 us | 419 B |
| Regex_PrefixPatternMatching | 4,309.215 us | 54.2987 us | 50.7910 us | 3435 B |
For performance tests I used 370105 English words (from: https://github.com/dwyl/english-words).

| Method | Mean | Error | StdDev | Allocated |
|------------------------------------- |--------------:|-------------:|-------------:|------------:|
| Load_Trie | 217,385.15 us | 4,059.770 us | 4,343.909 us | 72741.36 KB |
| Trie_StartsWith | 11,394.07 us | 219.067 us | 466.849 us | 3604.64 KB |
| Linq_StartsWith | 113,231.21 us | 780.126 us | 729.730 us | 2843.55 KB |
| Linq_GroupedByFirstLetter_StartsWith | 10,244.17 us | 91.502 us | 85.591 us | 2844.41 KB |
| Linq_DictionaryWithAllPrefixes | 4,194.10 us | 41.829 us | 39.127 us | 2840.66 KB |
| Trie_Matches | 15.03 us | 0.287 us | 0.268 us | 18.05 KB |
| Trie_PatternStartsWith | 62.98 us | 0.482 us | 0.451 us | 65.65 KB |
| String_PatternMatching | 875.47 us | 7.239 us | 6.045 us | 1.56 KB |
| String_PrefixPatternMatching | 895.72 us | 3.407 us | 2.660 us | 33.72 KB |
| Regex_PatternMatching | 26,587.50 us | 206.420 us | 182.986 us | 1.57 KB |
| Regex_PrefixPatternMatching | 27,545.88 us | 188.291 us | 176.127 us | 33.73 KB |

------
© Kirill Polishchuk
5 changes: 4 additions & 1 deletion src/KTrie.TestBenchmark/KTrie.TestBenchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<IsPackable >False</IsPackable>
<IsPackable>False</IsPackable>
</PropertyGroup>

<ItemGroup>
Expand All @@ -22,6 +22,9 @@
<None Update="TestData\vocabulary.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\words_alpha.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
3 changes: 1 addition & 2 deletions src/KTrie.TestBenchmark/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@
using KTrie.TestBenchmark;


var _ = BenchmarkRunner.Run<StringTrieTest>();

BenchmarkRunner.Run<StringTrieTest>();
24 changes: 16 additions & 8 deletions src/KTrie.TestBenchmark/StringTrieTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ namespace KTrie.TestBenchmark;
[MemoryDiagnoser(false)]
public class StringTrieTest
{
private readonly string[] _words;
private readonly ILookup<char, string> _wordGroups;
private readonly Dictionary<string, List<string>> _dictWithAllPrefixes;
private readonly Trie _trie;
private string[] _words;
private ILookup<char, string> _wordGroups;
private Dictionary<string, List<string>> _dictWithAllPrefixes;
private Trie _trie;

private readonly string[] _prefixes =
[
Expand All @@ -38,7 +38,8 @@ public class StringTrieTest
"ve"
];

public StringTrieTest()
[GlobalSetup]
public void GlobalSetup()
{
_words = GetWords();
_wordGroups = PreprocessWords();
Expand All @@ -47,6 +48,12 @@ public StringTrieTest()
_trie = [.. _words];
}

[Benchmark]
public void Load_Trie()
{
Trie _ = [.. _words];
}

[Benchmark]
public ICollection<string> Trie_StartsWith()
{
Expand All @@ -64,7 +71,7 @@ public ICollection<string> Trie_StartsWith()
}

[Benchmark]
public ICollection<string> LinqSimple_StartsWith()
public ICollection<string> Linq_StartsWith()
{
HashSet<string> result = [];

Expand All @@ -80,13 +87,14 @@ public ICollection<string> LinqSimple_StartsWith()
}

[Benchmark]
public ICollection<string> Linq_StartsWith()
public ICollection<string> Linq_GroupedByFirstLetter_StartsWith()
{
HashSet<string> result = [];

foreach (var prefix in _prefixes)
{
var firstLetter = prefix[0];

foreach (var word in _wordGroups[firstLetter].Where(w => w.StartsWith(prefix)))
{
result.Add(word);
Expand Down Expand Up @@ -139,7 +147,7 @@ public ICollection<string> Regex_PatternMatching() =>
public ICollection<string> Regex_PrefixPatternMatching() =>
_words.Where(word => Regex.IsMatch(word, "^.c.{2}t")).ToHashSet();

private static string[] GetWords() => File.ReadAllLines("TestData/vocabulary.txt");
private static string[] GetWords() => File.ReadAllLines("TestData/words_alpha.txt");

private ILookup<char, string> PreprocessWords()
{
Expand Down
Loading

0 comments on commit 58a2cfb

Please sign in to comment.