Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Will testing #13

Merged
merged 39 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e54fdc6
Adding additions to temp3 - more work needing to be done. Framework s…
wdrav Dec 9, 2022
63af587
Adding more features to the post-processing tool - Still need to work…
wdrav Dec 20, 2022
eb9a23e
Removed the template property from components which had their cd /Us…
wdrav Dec 22, 2022
23f2e45
Adding full support of components, need to finish the uris checker an…
wdrav Dec 22, 2022
de318d6
Added collection creation and uri referencing updating
wdrav Jan 13, 2023
2fcc9b4
Updated the check_name function to allow hyphens in the name.
wdrav Jun 8, 2023
8ab7091
Update to post-processing of combinatorial derivations
wdrav Jun 19, 2023
d2b3895
Added correct namespace for sequences and made the string literal of …
wdrav Jun 20, 2023
100a754
Addition of output from temp3.py including updates
wdrav Aug 31, 2023
3625e54
Changes to stabilize ordering of subcomponents
wdrav Sep 5, 2023
41aa9da
Edit to replace periods with underscores
wdrav Sep 5, 2023
1a9244f
Updates to give default descriptions to parts.
wdrav Oct 10, 2023
ffb4eb0
Adding two backbones template, output (two_backbones.nt) and post-pro…
wdrav Dec 7, 2023
cb76a96
Added implementation for multiple backbones.
wdrav Feb 15, 2024
4a8ade0
Recent updates to temp3 and two_backbones
wdrav May 7, 2024
b5c4c58
Worked on changing namespace for GenBank and FASTA
wdrav May 7, 2024
627d543
Example output for simple_library
wdrav May 7, 2024
470295c
Adding additions to temp3 - more work needing to be done. Framework s…
wdrav Dec 9, 2022
537d75d
Adding more features to the post-processing tool - Still need to work…
wdrav Dec 20, 2022
bdb1df6
Removed the template property from components which had their cd /Us…
wdrav Dec 22, 2022
0131a33
Adding full support of components, need to finish the uris checker an…
wdrav Dec 22, 2022
08ed1e5
Added collection creation and uri referencing updating
wdrav Jan 13, 2023
5c50a7d
Updated the check_name function to allow hyphens in the name.
wdrav Jun 8, 2023
8f73847
Update to post-processing of combinatorial derivations
wdrav Jun 19, 2023
4a7d9ea
Added correct namespace for sequences and made the string literal of …
wdrav Jun 20, 2023
17c8c2a
Addition of output from temp3.py including updates
wdrav Aug 31, 2023
f311c97
Changes to stabilize ordering of subcomponents
wdrav Sep 5, 2023
aa4dbee
Edit to replace periods with underscores
wdrav Sep 5, 2023
64be600
Updates to give default descriptions to parts.
wdrav Oct 10, 2023
12d989f
Adding two backbones template, output (two_backbones.nt) and post-pro…
wdrav Dec 7, 2023
df4e106
Added implementation for multiple backbones.
wdrav Feb 15, 2024
513ee7a
Recent updates to temp3 and two_backbones
wdrav May 7, 2024
2007184
Worked on changing namespace for GenBank and FASTA
wdrav May 7, 2024
d4d5bdb
Example output for simple_library
wdrav May 7, 2024
abf6339
Merge branch 'will_testing' of https://github.com/SynBioDex/Excel-uti…
tayasherstyukova Jan 22, 2025
95d3758
fixed helpers package error
tayasherstyukova Jan 23, 2025
756b276
Revert "fixed helpers package error"
tayasherstyukova Jan 23, 2025
c3debd3
fixed helpers package error
tayasherstyukova Jan 23, 2025
d5d7766
Merge pull request #15 from SynBioDex/master
cjmyers Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
544 changes: 297 additions & 247 deletions SBOL3_simple_library4.nt

Large diffs are not rendered by default.

Binary file modified SBOL3_simple_library4.xlsx
Binary file not shown.
685 changes: 685 additions & 0 deletions SampleTemp3Output.nt

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions excelutils/excel_sbol_utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import string
import rdflib
from openpyxl.worksheet import cell_range, worksheet
from openpyxl import load_workbook
from pathlib import Path

def check_name(nm_to_chck):
Expand All @@ -22,6 +23,11 @@ def check_name(nm_to_chck):
for ltr in nm_to_chck:
if ord(ltr) == 32:
nm_to_chck = nm_to_chck.replace(ltr, "_")
elif ord(ltr) == 45:
# Allow hyphens to be reinterpreted as underscores
nm_to_chck = nm_to_chck.replace(ltr, "_")
elif ord(ltr) == 46:
nm_to_chck = nm_to_chck.replace(ltr, "_")
elif ord(ltr) > 122 or ord(ltr) < 48:
# 122 is the highest decimal code number
# for common latin ltrs or arabic numbers
Expand Down Expand Up @@ -124,7 +130,7 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
LAST_VARIANT_ROW = 35

print(f'Loading workbook "{excel_file}"')
work_book = openpyxl.load_workbook(excel_file, data_only=True)
work_book = load_workbook(excel_file, data_only=True)
sheet = work_book[VARIANTS_SHEET]

# First, get the library name
Expand All @@ -133,7 +139,7 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:

# Then get the base sequence
print('Extracting base sequence')
first_aa_column = get_column_number(FIRST_AMINO_ACID_COLUMN)
first_aa_column = col_to_num(FIRST_AMINO_ACID_COLUMN)
last_aa_column = row_ends(sheet, ORIGINAL_AMINO_ACID_ROW, first_aa_column)
# Get row from sheet and concatenate it into a string
row_iterator = sheet.iter_rows(min_row=ORIGINAL_AMINO_ACID_ROW, max_row=ORIGINAL_AMINO_ACID_ROW,
Expand Down
3 changes: 1 addition & 2 deletions excelutils/excel_sbol_utils/library2.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,7 @@ def subcomponents(rowobj):
template.assemblePrimaryStructure(comp_list)
#template.compile(assembly_method=None)

f#or comp in non_var_comps:

#or comp in non_var_comps:

rowobj.obj.masterTemplate = template
for var in variant_comps:
Expand Down
157 changes: 128 additions & 29 deletions excelutils/excel_sbol_utils/library3.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def addToDescription(rowobj):
'different from': sbol3.SBOL_DIFFERENT_FROM,
'same orientation as': sbol3.SBOL_SAME_ORIENTATION_AS,
'different orientation from': sbol3.SBOL_SAME_ORIENTATION_AS}

def make_constraint(constraint, part_list, template):
m = constraint_pattern.match(constraint)
if not m:
Expand Down Expand Up @@ -63,16 +64,58 @@ def subcomponents(rowobj): #UPDATE TO WORK WITH CELL DICT, ALLOW CONSTRAINTS


if 'backbone' in rowobj.col_cell_dict:
temp = sbol3.Component(identity=f'{rowobj.obj.displayId}_ins_template', types=sbol3.SBO_DNA, name=f'{rowobj.obj.displayId}_ins_template')
newobj = sbol3.CombinatorialDerivation(identity=f'{rowobj.obj.displayId}_ins', template=temp, name=f'{rowobj.obj.displayId}_ins', strategy=sbol3.SBOL_ENUMERATE)
rowobj.doc.add(temp)
rowobj.doc.add(newobj)
rowobj.obj_dict[temp.display_id] = {'uri': temp.type_uri, 'object': temp,
'displayId': temp.display_id}
backbones = list(rowobj.col_cell_dict['backbone'].values())
back = True
oldobj = rowobj.obj
rowobj.obj = newobj
# If this row has a backbone, create a new combinatorial derivation

# Determine if there are multiple comps per part
multiple = False

for sub in rowobj.col_cell_dict['subcomp']:
if "," in rowobj.col_cell_dict['subcomp'][sub]:
multiple = True
break
else:
multiple = False

# 1. If there are multiple comps per part, create ins_templat

if multiple:
temp = sbol3.Component(identity=f'{rowobj.obj.displayId}_ins_template', types=sbol3.SBO_DNA)

newobj = sbol3.CombinatorialDerivation(identity=f'{rowobj.obj.displayId}_ins', template=temp, name=f'{rowobj.obj.name} insert', \
strategy=sbol3.SBOL_ENUMERATE, description=rowobj.obj.description)

rowobj.obj.description = None

rowobj.doc.add(temp) # Add the template
rowobj.doc.add(newobj) # Add the combdev _ins to the document connected to the template

rowobj.obj_dict[temp.display_id] = {'uri': temp.type_uri, 'object': temp,
'displayId': temp.display_id}
backbones = list(rowobj.col_cell_dict['backbone'].values())
backbones = backbones[0].split(", ")

back = True
oldobj = rowobj.obj
rowobj.obj = newobj
else:
# 2. Otherwise, create _ins without the template

# Create new component _ins without the template
newobj = sbol3.Component(identity=f'{rowobj.obj.displayId}_ins', name=f'{rowobj.obj.name} insert', \
description=rowobj.obj.description, types=sbol3.SBO_DNA, roles=sbol3.SO_ENGINEERED_REGION)

# Set description to None
rowobj.obj.description = None

rowobj.doc.add(newobj)

backbones = list(rowobj.col_cell_dict['backbone'].values())
backbones = backbones[0].split(", ")


back = True
oldobj = rowobj.obj
rowobj.obj = newobj
else:
back = False

Expand All @@ -89,19 +132,40 @@ def subcomponents(rowobj): #UPDATE TO WORK WITH CELL DICT, ALLOW CONSTRAINTS
variant_comps = []
comp_ind = 0

# Need to update for multiple backbones, as well as remove hardcoding
# Check SBOL Utilities for reasoning for multiple backbones

if back:
# Currently this code creates a template for the insertion of the backbone into the main combinatorialderivation
tempObj = rowobj.obj_dict[f'{oldobj.display_id}_template']['object']
sub = sbol3.LocalSubComponent(types=sbol3.SBO_DNA, name="Inserted construct")

sub = sbol3.LocalSubComponent(types=sbol3.SBO_DNA, name="Inserted Construct")
tempObj.features.append(sub)
backbone_sub = sbol3.VariableFeature(cardinality=sbol3.SBOL_ONE, variable=sub, variant_derivations=rowobj.obj)
oldobj.variable_features.append(backbone_sub)

subComp = sbol3.SubComponent(instance_of=rowobj.obj_dict[backbones[0]]['object'])
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].features.append(subComp)
constr1 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=subComp, subject=sub)
constr2 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=sub, subject=subComp)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr1)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr2)
if len(backbones) == 1:

subComp = sbol3.SubComponent(instance_of=rowobj.obj_dict[backbones[0]]['object'])
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].features.append(subComp)
constr1 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=subComp, subject=sub)
constr2 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=sub, subject=subComp)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr1)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr2)
else:

newLocalSub = sbol3.LocalSubComponent(name="Vector", types=sbol3.SBO_DNA)
tempObj.features.append(newLocalSub)

newVarFeature = sbol3.VariableFeature(variable=newLocalSub, variants=(rowobj.obj_dict[i]['object'] for i in backbones), cardinality=sbol3.SBOL_ONE)
oldobj.variable_features.append(newVarFeature)

constr1 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=newLocalSub, subject=sub)
constr2 = sbol3.Constraint(restriction=sbol3.SBOL_MEETS, object=sub, subject=newLocalSub)

rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr1)
rowobj.obj_dict[f'{oldobj.display_id}_template']['object'].constraints.append(constr2)


else:
temp = rowobj.obj_dict[f'{rowobj.obj.display_id}_template']['object']
Expand All @@ -124,7 +188,7 @@ def subcomponents(rowobj): #UPDATE TO WORK WITH CELL DICT, ALLOW CONSTRAINTS

comp_ind += 1
else:
tempSub = sbol3.SubComponent(name=f'Part {comp_ind}', instance_of=f'{rowobj.obj_dict[comp]["uri"]}', orientation=sbol3.SBOL_INLINE)
tempSub = sbol3.SubComponent(name=f'Part {comp_ind + 1}', instance_of=f'{rowobj.obj_dict[comp]["uri"]}', orientation=sbol3.SBOL_INLINE)
temp.features.append(tempSub)
variant_comps.append(tempSub)
if comp_ind != 0:
Expand Down Expand Up @@ -202,6 +266,34 @@ def dataSource(rowobj):
rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
rowobj.data_source_id_to_update[old_id] = new_identity

if pref == 'URL for GenBank file' or pref == 'URL for FASTA file':
# Namespace is everything except the last part of the url
# Loop backward through the value until a '/' is found
# Everything before the '/' is the namespace
old_val = val

# Loop through the string backwards
for i in range(len(val) - 1, 0, -1):
if val[i] == '/':
# Everything before the '/' is the namespace
ns = val[:i]

# Everything after the '/' is the display id
val = val[i+1:len(val) - 3]

break
old_id = rowobj.obj.identity
rowobj.doc.change_object_namespace([rowobj.obj], ns)
new_id = rowobj.obj.identity
rowobj.data_source_id_to_update[old_id] = new_id
rowobj.obj.derived_from = [old_val]
if val != rowobj.obj.display_id:
new_identity = str(rowobj.obj.identity).replace(rowobj.obj.display_id, helpers.check_name(val))
id_map = {rowobj.obj.identity:new_identity}
rowobj.obj.set_identity(new_identity)
rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
rowobj.data_source_id_to_update[old_id] = new_identity

def sequence(rowobj):
for col in rowobj.col_cell_dict.keys():
val = rowobj.col_cell_dict[col]
Expand All @@ -218,13 +310,13 @@ def sequence(rowobj):

# removes spaces, enters, and makes all lower case
val = "".join(val.split())
val = val.replace(u"\ufeff", "").lower()
val = val.replace(u"\ufeff", "").upper()

# create sequence object
sequence = sbol3.Sequence(f"{rowobj.obj.displayId}_sequence",
elements=val)
if rowobj.obj.name is not None:
sequence.name = f"{rowobj.obj.name} Sequence"
sequence = sbol3.Sequence(f"{rowobj.obj.namespace}/{rowobj.obj.display_id}_sequence",
elements=val, encoding=sbol3.IUPAC_DNA_ENCODING, namespace=rowobj.obj.namespace)
# if rowobj.obj.name is not None:
# sequence.name = f"{rowobj.obj.name} Sequence"

rowobj.doc.add(sequence)

Expand All @@ -239,6 +331,11 @@ def sequence(rowobj):

def circular(rowobj): # NOT IMPLEMENTED
# if false add to linear collection if true add to types

tempObj = rowobj.obj
if rowobj.col_cell_dict['Circular'] not in tempObj.types:
tempObj.types.append(rowobj.col_cell_dict['Circular'])

pass

def finalProduct(rowobj):
Expand All @@ -253,28 +350,30 @@ def finalProduct(rowobj):

sbol_objs = doc.objects
sbol_objs_names = [x.name for x in sbol_objs]
if 'FinalProducts' not in sbol_objs_names:
colec = sbol3.Collection('FinalProducts', name='FinalProducts')
if 'Final Products' not in sbol_objs_names:
colec = sbol3.Collection('FinalProducts', name='Final Products')
colec.description = 'Final products desired for actual fabrication'

sbol_objs = doc.objects
sbol_objs_names = [x.name for x in sbol_objs]

doc.add(colec)
colec.members.append(rowobj.obj_uri)
else:
colec = sbol_objs[sbol_objs_names.index('FinalProducts')]
colec = sbol_objs[sbol_objs_names.index('Final Products')]
colec.members.append(rowobj.obj_uri)

if 'LinearDNAProducts' not in sbol_objs_names:
colec = sbol3.Collection('LinearDNAProducts', name='LinearDNAProducts')
if 'Linear DNA Products' not in sbol_objs_names:
colec = sbol3.Collection('LinearDNAProducts', name='Linear DNA Products')
colec.description = 'Linear DNA constructs to be fabricated'

sbol_objs = doc.objects
sbol_objs_names = [x.name for x in sbol_objs]

doc.add(colec)
colec.members.append(rowobj.obj)
else:
colec = sbol_objs[sbol_objs_names.index('LinearDNAProducts')]
colec = sbol_objs[sbol_objs_names.index('Linear DNA Products')]
colec.members.append(rowobj.obj)


Expand Down
21 changes: 21 additions & 0 deletions excelutils/excel_sbol_utils/temp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sbol2
import sbol3
import excel_sbol_utils.library2 as l2
import excel_sbol_utils.library3 as l3
import excel_sbol_utils.helpers as help
import excel2sbol.comp_column_functions2 as ccf
import excel2sbol.converter as conv
import os

# Build a barebones combinatorial derivation object
# Attempt to build subcomponent on it

obj = sbol2.combinatorialderivation()

doc = sbol2.Document()
obj_dict = {}
sheet = 'Composite Parts'




Loading
Loading