Skip to content

Commit

Permalink
Merge pull request #4 from gmelodie/trie
Browse files Browse the repository at this point in the history
Add Trie implementation
  • Loading branch information
ektagarg authored Oct 27, 2019
2 parents 721e30b + 60b554c commit 14e62f3
Show file tree
Hide file tree
Showing 5 changed files with 364 additions and 0 deletions.
53 changes: 53 additions & 0 deletions tree/Readme.md → trees/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,56 @@ Process:

Time complexity: o(n)
Space complexity: o(n)


### Trie

Suppose you want to code a dictionary to compete with [Oxford's dictionary](https://www.lexico.com/en). In your dictionary, the user would type in a word and the code would look up its definition (similar to `Ctrl-F`).
How would you code this?
Well, one thing you could do is put every word in a list and iterate through this list word by word, and each word letter by letter:

```
for every word in the dictionary
for every letter in the word
is letter in dict == lookup word letter?
```

That's not a very optimized way of doing this -- in other words, it's slow as hell!
Let's say your dictionary has `N` words and its biggest word is `M` letters long.
This means that in the worst case the time complexity of your lookup is `O(N*M)`.
How can we make this faster?


A [Trie](https://en.wikipedia.org/wiki/Trie) is a *prefix tree*, meaning it finds information (usually strings) by looking at the prefix of the data being looked up.
In our dictionary example, instead of putting our words inside a list, we could create a trie with them.
In a trie, every time you go down a level you get closer to the "answer".
Imagine our dictionary is very small and contains only the words "apple", "an", "as" and "hand".
The structure would look somewhat like this:

```
R <-- Level-0 (Root is always empty)
/ \
a hand <-- Level-1
/ | \
an apple as <-- Level-2
```

**Obs:** Depending on the implementation the order of the nodes could be different.

Suppose now that you insert hard in this trie, here's what it would look like:

```
R <-- Level-0 (Root is always empty)
/ \
a hand <-- Level-1
/ | \
an apple as <-- Level-2
```

- Insertion time complexity: o(w)
- Trie creation time complexity: o(n*m)
- Deletion time complexity: o(w)
- Lookup time complexity: o(w)
- Space complexity: o(n*m)

**Obs:** *w* is the length of the input (e.g. number of letters in the lookup string), *n* is the number of words in the trie, and *m* is the average length of each word in the trie
File renamed without changes.
File renamed without changes.
250 changes: 250 additions & 0 deletions trees/trie/trie.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
package trie

import (
"errors"
"fmt"
"strings"
)

// Data is the data type the trie holds
type Data int

// Trie is the root of the tree.
// It doesn't have any data or prefixes.
type Trie struct {
root *Node
}

// Node is a node of the Trie.
type Node struct {
data Data
isTerminal bool
prefix rune
children []*Node
}

// CreateTrie creates an empty Trie tree and returns it.
func CreateTrie() *Trie {
return &Trie{&Node{}}
}

// CreateNode creates a new node.
// Returns a pointer to that node.
func CreateNode(data Data, isTerminal bool, prefix rune) *Node {
return &Node{data, isTerminal, prefix, nil}
}

// Insert appends a node (n) containing data and prefix to the trie.
// Returns an error if the node already exists.
func (t *Trie) Insert(data Data, prefix []rune) error {

if len(prefix) == 0 {
return errors.New("Can't insert node with empty prefix")
}

if t == nil {
return errors.New("Can't insert on nil trie")
}

n := t.root
lastChar := 0
// Move to the last existing node
for i, c := range prefix {
aux := n.hasChildWithPrefix(c)
if aux == nil { // doesn't have child, insert it
lastChar = i
break // Stops when child with prefix char doesn't exist
}
n = aux // has child, go down in tree
}

remainingPrefix := []rune(prefix)[lastChar:]
return n.createSubTree(data, remainingPrefix)
}

// createSubTree inserts the rest of a prefix beginning
// in the Node n.
func (n *Node) createSubTree(data Data, prefix []rune) error {
var newNode *Node
var newNodeDad = n

// Node already exists, make terminal
if len(prefix) == 1 && n.prefix == prefix[0] {
n.isTerminal = true
n.data = data
return nil
}

for _, c := range prefix {
newNode = CreateNode(-1, false, c)
(*newNodeDad).children = append((*newNodeDad).children, newNode)
newNodeDad = newNode
}

// Insert data in last node
newNode.isTerminal = true
newNode.data = data

return nil
}

// hasChildWithPrefix returns a *Node containing
// the child of n that has a prefix of c.
// Returns false otherwise.
func (n *Node) hasChildWithPrefix(c rune) *Node {

if n.children == nil {
return nil
}

for _, child := range n.children {
if child.prefix == c {
return child
}
}

return nil
}

// Delete searches for a prefix in the Trie.
// Removes the node and rearranges the tree if prefix exists.
// Returns an error if prefix doesn't exist.
func (t *Trie) Delete(prefix []rune) error {

if t == nil {
return errors.New("Can't delete in nil trie")
}

if prefix == nil {
return errors.New("Can't delete nil prefix")
}

lookup := t.root
lastUseful := t.root
deleteIDX := -1

// Get last useful node (which we can't delete)
for i, c := range prefix {
// We can't delete lookup if it has more than 1 child
// or if it is terminal for another prefix
if len(lookup.children) > 1 ||
(lookup.isTerminal && i != len(prefix)-1) {
lastUseful = lookup
}
// Go down in tree
for j, n := range lookup.children {
if n.prefix == c {
if lastUseful == lookup {
deleteIDX = j
}
lookup = n
break
}
if j == len(lookup.children)-1 {
return errors.New("Didn't find prefix")
}
}
}

if len(lookup.children) > 0 {
lookup.isTerminal = false
lookup.data = -1
} else {
// Remove subtree below lastUseful at index deleteIDX
lastUseful.children[deleteIDX] =
lastUseful.children[len(lastUseful.children)-1]

(*lastUseful).children =
(*lastUseful).children[:len((*lastUseful).children)-1]
}

return nil
}

// Update searches for a prefix in the Trie.
// Updates the node if prefix exists.
// Returns an error if prefix doesn't exist.
func (t *Trie) Update(prefix []rune, data Data) error {
n, err := t.searchNode(prefix)

if n != nil {
n.data = data
return nil
}

return err
}

// Search looks for the node indexed by prefix.
// Returns a string containing the data if prefix exists.
// Returns an empty string and error if prefix doesn't exist.
func (t *Trie) Search(prefix []rune) (Data, error) {
if t == nil {
return 0, errors.New("Can't search in nil trie")
}

if prefix == nil {
return 0, errors.New("Can't search nil prefix")
}

n, err := t.searchNode(prefix)

if n != nil {
return n.data, nil
}

return 0, err
}

// searchNode returns the node containing the data for prefix.
// Returns an error if the prefix doesn't exist in the tree.
func (t *Trie) searchNode(prefix []rune) (*Node, error) {
// Lookup node starts at root
lookup := t.root

for _, c := range prefix {
for i, n := range lookup.children {
if n.prefix == c { // found prefix, update lookup (go down)
lookup = n
break
}
if i == len(lookup.children)-1 { // at the last child of lookup
return nil, errors.New("Didn't find prefix")
}
}
}

if lookup.isTerminal {
return lookup, nil
}

return nil, errors.New("Didn't find prefix")
}

// PrintTrie prints trie showing parent-child relationships
func (t *Trie) PrintTrie() error {
if t.root.children == nil {
return nil
}

for _, n := range t.root.children {
fmt.Printf("%s: %d\n", string(n.prefix), int(n.data))
n.printSubTree(1)
}

return nil
}

func (n *Node) printSubTree(tabs int) error {
if n.children == nil {
return nil
}

for _, aux := range n.children {
fmt.Printf(strings.Repeat(" ", tabs))
fmt.Printf("%s: %d\n", string(aux.prefix), int(aux.data))
aux.printSubTree(tabs + 1)
}

return nil
}
61 changes: 61 additions & 0 deletions trees/trie/trie_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package trie

import (
"testing"

"github.com/stretchr/testify/assert"
)

var trie = CreateTrie()
var insertEntries = []string{
"and",
"a",
"b",
"c",
"andromeda",
"as",
"assign",
"ah don't know",
}

var deleteEntries = []string{
"and",
"assign",
}

var updateEntries = []string{
"andromeda",
"as",
"b",
"ah don't know",
}

func TestInsert(t *testing.T) {
for i, entry := range insertEntries {
trie.Insert(Data(i), []rune(entry))
}
trie.PrintTrie()
}

func TestSearch(t *testing.T) {
for expected, lookup := range insertEntries {
ans, err := trie.Search([]rune(lookup))
assert.Equal(t, Data(expected), ans, "should be equal")
assert.Equal(t, nil, err, "should be equal")
}
}

func TestDelete(t *testing.T) {
for _, entry := range deleteEntries {
trie.Delete([]rune(entry))
}
trie.PrintTrie()
}

func TestUpdate(t *testing.T) {
for _, entry := range updateEntries {
trie.Update([]rune(entry), 1234)
ans, _ := trie.Search([]rune(entry))
assert.Equal(t, Data(1234), ans, "should be equal")
}
}

0 comments on commit 14e62f3

Please sign in to comment.