@@ -91,7 +91,42 @@ def generate_rdf_graph(self, merge: bool = False):
91
91
"""
92
92
if len (self .graph ) != 0 :
93
93
return
94
- # preprocess
94
+
95
+ # generate dataset entity and has_source property
96
+ citation_dict = {}
97
+ uns = self .ea .enricher_manager .anndata .uns
98
+ if citation_field_name in uns .keys ():
99
+ citation_dict = parse_citation_field_into_dict (uns [citation_field_name ])
100
+ cxg_versioned_dataset_id = (
101
+ citation_dict .get ("download_link" ).split ("/" )[- 1 ].split ("." )[0 ]
102
+ )
103
+ dataset_class = URIRef (get_cxg_dataset_url (cxg_versioned_dataset_id ))
104
+ else :
105
+ # if citation_field_name doesn't exist we use random uuid as cxg_versioned_dataset_id
106
+ cxg_versioned_dataset_id = str (uuid .uuid4 ())
107
+ dataset_class = URIRef (self .ns [cxg_versioned_dataset_id ])
108
+ self .graph .add ((dataset_class , RDF .type , URIRef (DATASET .get ("iri" ))))
109
+ self .graph .add ((dataset_class , RDFS .label , Literal (DATASET .get ("label" ))))
110
+ for key , value in uns .items ():
111
+ if not isinstance (value , str ):
112
+ continue
113
+ if key == citation_field_name :
114
+ for citation_key , citation_value in citation_dict .items ():
115
+ self .graph .add (
116
+ (
117
+ dataset_class ,
118
+ URIRef (self .ns [remove_special_characters (citation_key )]),
119
+ Literal (citation_value ),
120
+ )
121
+ )
122
+
123
+ self .graph .add (
124
+ (dataset_class , URIRef (self .ns [remove_special_characters (key )]), Literal (value ))
125
+ )
126
+ has_source = URIRef (HAS_SOURCE ["iri" ])
127
+ self .graph .add ((has_source , RDFS .label , Literal (HAS_SOURCE ["label" ])))
128
+
129
+ # preprocess for cell clusters
95
130
column_group = ["field_name1" , "value1" ]
96
131
df = self .df .sort_values (by = column_group ).reset_index (drop = True )
97
132
grouped_df = df .groupby (column_group )
@@ -124,38 +159,9 @@ def generate_rdf_graph(self, merge: bool = False):
124
159
) else temp_dict .update ({key : value })
125
160
126
161
if temp_dict not in grouped_dict_uuid .values ():
127
- grouped_dict_uuid [str (uuid .uuid4 ())] = temp_dict
128
-
129
- # generate dataset entity and has_source property
130
- uns = self .ea .enricher_manager .anndata .uns
131
- citation_dict = {}
132
- if citation_field_name in uns .keys ():
133
- citation_dict = parse_citation_field_into_dict (uns [citation_field_name ])
134
- dataset_class = URIRef (
135
- get_cxg_dataset_url (citation_dict .get ("download_link" ).split ("/" )[- 1 ].split ("." )[0 ])
136
- )
137
- else :
138
- dataset_class = URIRef (self .ns [str (uuid .uuid4 ())])
139
- self .graph .add ((dataset_class , RDF .type , URIRef (DATASET .get ("iri" ))))
140
- self .graph .add ((dataset_class , RDFS .label , Literal (DATASET .get ("label" ))))
141
- for key , value in uns .items ():
142
- if not isinstance (value , str ):
143
- continue
144
- if key == citation_field_name :
145
- for citation_key , citation_value in citation_dict .items ():
146
- self .graph .add (
147
- (
148
- dataset_class ,
149
- URIRef (self .ns [remove_special_characters (citation_key )]),
150
- Literal (citation_value ),
151
- )
152
- )
153
-
154
- self .graph .add (
155
- (dataset_class , URIRef (self .ns [remove_special_characters (key )]), Literal (value ))
156
- )
157
- has_source = URIRef (HAS_SOURCE ["iri" ])
158
- self .graph .add ((has_source , RDFS .label , Literal (HAS_SOURCE ["label" ])))
162
+ grouped_dict_uuid [
163
+ str (uuid .uuid5 (uuid .UUID (cxg_versioned_dataset_id ), str (temp_dict )))
164
+ ] = temp_dict
159
165
160
166
# generate a resource for each free-text cell_type annotation and cell_type_ontology_term annotation
161
167
cell_set_class = URIRef (CLUSTER .get ("iri" ))
0 commit comments