diff --git a/model/data_utils.py b/model/data_utils.py
index ede3c1f..c3c6d39 100644
--- a/model/data_utils.py
+++ b/model/data_utils.py
@@ -80,7 +80,6 @@ def __iter__(self):
                     words += [word]
                     tags += [tag]
 
-
     def __len__(self):
         """Iterates once over the corpus to set and store length"""
         if self.length is None:
@@ -122,11 +121,11 @@ def get_char_vocab(dataset):
         a set of all the characters in the dataset
 
     """
-    vocab_char = set()
+    vocab_char = set(())
     for words, _ in dataset:
         for word in words:
-            vocab_char.update(word)
-
+            for letter in word.decode('utf-8'):
+                vocab_char.add(letter.encode('utf-8'))
     return vocab_char
 
 
@@ -252,7 +251,8 @@ def f(word):
         # 0. get chars of words
         if vocab_chars is not None and chars == True:
             char_ids = []
-            for char in word:
+            for char in word.decode("utf-8"):
+                char = char.encode("utf-8")
                 # ignore chars out of vocabulary
                 if char in vocab_chars:
                     char_ids += [vocab_chars[char]]
@@ -424,4 +424,4 @@ def get_chunks(seq, tags):
         chunk = (chunk_type, chunk_start, len(seq))
         chunks.append(chunk)
 
-    return chunks
+    return chunks
\ No newline at end of file