diff --git a/tcmid.py b/tcmid.py
new file mode 100644
index 0000000..222f9bb
--- /dev/null
+++ b/tcmid.py
@@ -0,0 +1,134 @@
+# A script to convert TCMID to AtomSpace representation in Scheme
+#
+# Some additional dependencies may be required:
+#   apt install unrar
+#   python3 -m pip install rarfile
+#
+# To run the script:
+#   PYTHONIOENCODING=utf-8 python3 tcmid.py
+
+import os
+import rarfile
+import wget
+from datetime import date
+
+rarfile.UNRAR_TOOL = "unrar"
+
+output_file = "dataset/tcmid_{}.scm".format(str(date.today()))
+tcmid_prescription = "prescription-TCMID.v2.01.rar"
+tcmid_herb = "herb-TCMID.v2.01.rar"
+tcmid_network = "ingredient_targets_disease_drug-TCMID.v2.03.rar"
+tcmid_gnsp = "Ingredient_MS-TCMID.v2.01.rar"
+tcmid_spectrum = "Herb_MS-TCMID.v2.01.rar"
+tcmid_source_rars = [
+  # Need to know the properties of the herb first, so tcmid_herb
+  # should be processed before tcmid_prescription
+  tcmid_herb,
+  tcmid_prescription,
+  tcmid_gnsp,
+  tcmid_spectrum,
+#  tcmid_network
+]
+tcmid_base_url = "http://119.3.41.228:8000/static/download/"
+
+if os.path.exists(os.path.join(os.getcwd(), output_file)):
+  os.remove(output_file)
+
+def evalink(pred, node_type1, node_type2, node1, node2):
+  print("--- Creating EvaluationLink with:\npredicate = {}\nnode1 = {}\nnode2 = {}\n".format(pred, node1, node2))
+  out_fp.write("(EvaluationLink\n")
+  out_fp.write("\t(PredicateNode \"" + pred + "\")\n")
+  out_fp.write("\t(ListLink\n")
+  out_fp.write("\t\t(" + node_type1 + " \"" + node1 + "\")\n")
+  out_fp.write("\t\t(" + node_type2 + " \"" + node2 + "\")\n")
+  out_fp.write("\t)\n")
+  out_fp.write(")\n")
+
+def memblink(node1, node2):
+  print("--- Creating MemberLink with:\nnode1 = {}\nnode2 = {}\n".format(node1, node2))
+  out_fp.write("(MemberLink\n")
+  out_fp.write("\t(ConceptNode \"" + node1 + "\")\n")
+  out_fp.write("\t(ConceptNode \"" + node2 + "\")\n")
+  out_fp.write(")\n")
+
+def is_available(entry):
+  return False if entry.strip() == "" or entry.strip().lower() == "na" or entry.strip().lower() == "n/a" else True
+
+# ----------
+# Keep a record of which part of a herb would be used in a formula
+herb_part_dict = {}
+
+out_fp = open(output_file, "a", encoding='utf8')
+
+for rar_name in tcmid_source_rars:
+  rar_path = "raw_data/{}".format(rar_name)
+
+  with rarfile.RarFile(rar_file) as rf:
+    # There should only be one file per RAR file
+    # Decode using UTF-8 for the Chinese characters
+    lines = rf.read(rf.infolist()[0]).decode("utf-8", "ignore").split("\n")
+
+    if rar_file.endswith(tcmid_herb):
+      # Skip the first line (columns) in this file
+      for line in lines[1:]:
+        print("--- Reading line: " + line)
+        if is_available(line):
+          contents = line.split("\t")
+          pinyin_name = contents[0]
+          english_name = contents[2]
+          properties = [x.lower().strip() for x in contents[4].split(",")]
+          meridians = [x.lower().strip() for x in contents[5].split(",")]
+          use_part = contents[6]
+          if is_available(pinyin_name) and is_available(english_name):
+            evalink("has_name", "ConceptNode", "ConceptNode", pinyin_name, english_name)
+          if is_available(pinyin_name) and is_available(use_part):
+            use_part_full_name = "{} {}".format(pinyin_name, use_part)
+            herb_part_dict[pinyin_name] = use_part_full_name
+            evalink("has_part", "ConceptNode", "ConceptNode", pinyin_name, use_part_full_name)
+          for prop in properties:
+            if is_available(pinyin_name) and is_available(prop):
+              evalink("has_property", "ConceptNode", "ConceptNode", pinyin_name, "TCM:" + prop)
+          for meri in meridians:
+            if is_available(pinyin_name) and is_available(meri):
+              evalink("meridian_affinity", "ConceptNode", "ConceptNode", pinyin_name, "TCM:" + meri)
+
+    elif rar_file.endswith(tcmid_prescription):
+      # Skip the first line (columns) in this file
+      for line in lines[1:]:
+        print("--- Reading line: " + line)
+        if is_available(line):
+          contents = line.split("\t")
+          prescription = contents[0]
+          composition = contents[3].split(",")
+          for compo in composition:
+            if is_available(compo) and is_available(prescription):
+              compo_part = herb_part_dict[compo] if compo in herb_part_dict else compo
+              evalink("composition", "ConceptNode", "ConceptNode", compo_part, prescription)
+              memblink(compo, "herb")
+          if is_available(prescription):
+            memblink(prescription, "prescription")
+
+    elif rar_file.endswith(tcmid_spectrum):
+      # Skip the first line (columns) in this file
+      for line in lines[1:]:
+        print("--- Reading line: " + line)
+        if is_available(line):
+          contents = line.split("\t")
+          pinyin_name = contents[1]
+          spectrum_description = [x.lower().strip() for x in contents[6].split(";")]
+          for sd in spectrum_description:
+            if is_available(sd) and is_available(pinyin_name):
+              evalink("has_hplc_description", "ConceptNode", "ConceptNode", pinyin_name, sd)
+
+    elif rar_file.endswith(tcmid_gnsp):
+      # Skip the first line (columns) in this file
+      for line in lines[1:]:
+        print("--- Reading line: " + line)
+        if is_available(line):
+          contents = line.split("\t")
+          ingredient = contents[0].replace("\"", "").lower().strip()
+          gnsp_id = contents[1].replace("\"", "").strip()
+          if is_available(ingredient) and is_available(gnsp_id):
+            evalink("has_gnsp_id", "MoleculeNode", "ConceptNode", ingredient, gnsp_id)
+
+out_fp.close()