-
Notifications
You must be signed in to change notification settings - Fork 77
/
data.py
58 lines (52 loc) · 1.78 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
'''This file adds data to the weaviate instance'''
import pickle
import weaviate
import uuid
import datetime
import base64, json, os
def generate_uuid(class_name: str, identifier: str,
test: str = 'teststrong') -> str:
""" Generate a uuid based on an identifier
:param identifier: characters used to generate the uuid
:type identifier: str, required
:param class_name: classname of the object to create a uuid for
:type class_name: str, required
"""
test = 'overwritten'
return str(uuid.uuid5(uuid.NAMESPACE_DNS, class_name + identifier))
client = weaviate.Client("http://localhost:8080")
print("Client created (data.py)")
#Checking if caption schema already exists, then delete it
current_schemas = client.schema.get()['classes']
for schema in current_schemas:
if schema['class']=='Caption':
client.schema.delete_class('Caption')
# Here I have created a simple schema to add some data
class_obj = {
"class": "Comment",
"properties": [{
"name": "content",
"dataType": ["text"],
}]
}
client.schema.create_class(class_obj)
print("Schema class created")
# Here I add some data. This is like a data of say people in a group
# and we want to figure out and collect their names, the place where they live
# and the organization that they work for. We use the NER module for this.
comments = [
'I am John and I live in USA. I work at Microsoft',
'I am James and I am studying in London. I am an intern at Google',
'My name is Jason and I work at Facebook,London',
'Peter here, I am an engineer at Apple, California',
]
for com in comments:
data_obj = {
"content": com
}
client.data_object.create(
data_obj,
"Comment",
generate_uuid('Comment',com)
)
print("Finished importing data")