Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor updates #83

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
f07ba3e
pin package for ci
PaulaKramer Oct 23, 2024
f90c73d
debug ci timeout tsne
PaulaKramer Oct 24, 2024
8651ef8
debug tsne timeout
PaulaKramer Nov 12, 2024
425744c
debug ci workflow
PaulaKramer Nov 12, 2024
ddb5d71
ci ignore files
PaulaKramer Nov 12, 2024
0655231
tsne debug
PaulaKramer Nov 12, 2024
fe806fb
add py3.8 to ci
PaulaKramer Nov 12, 2024
d84df6e
default tsne
PaulaKramer Nov 12, 2024
9b53c0e
subset for tsne
PaulaKramer Nov 12, 2024
fdd5f70
subset tsne
PaulaKramer Nov 13, 2024
36f03ef
increase sample size tsne
PaulaKramer Nov 13, 2024
4df2be6
increase tsne more
PaulaKramer Nov 13, 2024
465bc98
tsne
PaulaKramer Nov 13, 2024
7ec9ee9
rerun new chembl version
PaulaKramer Dec 20, 2024
3242e42
Merge pull request #82 from volkamerlab/rerun_chembl_notebook
PaulaKramer Dec 20, 2024
cdc76dc
sample subset for tsne
PaulaKramer Jan 27, 2025
b08b147
reduce sampling
PaulaKramer Jan 27, 2025
a717b53
split tsne
PaulaKramer Jan 28, 2025
2fd180b
reduce number of tsnes
PaulaKramer Jan 28, 2025
33a09f8
sample threshold tsne
PaulaKramer Jan 28, 2025
2d452ab
tsne filters debug
PaulaKramer Jan 28, 2025
08064e2
tsne filters debug still
PaulaKramer Jan 28, 2025
faf2594
tsne split into multiple code cells
PaulaKramer Jan 28, 2025
ae07fe6
tsne split into multiple code cells
PaulaKramer Jan 28, 2025
c8d9654
tsne pca less
PaulaKramer Jan 28, 2025
762f24f
save tsne embeddings
PaulaKramer Feb 1, 2025
a56aefc
sample fraction
PaulaKramer Feb 1, 2025
7d7569d
nbval and reduce dimensions in pca
PaulaKramer Feb 1, 2025
b3914c3
add all data tsne
PaulaKramer Feb 1, 2025
d8bb9af
sample less
PaulaKramer Feb 1, 2025
368be76
sample tsne data
PaulaKramer Feb 1, 2025
b1cf797
commented code
PaulaKramer Feb 1, 2025
8714ca9
small changes, rerun notebooks
PaulaKramer Feb 3, 2025
2df80f5
nbval changes
PaulaKramer Feb 3, 2025
5c174ca
add all notebooks back to ci, increase timeout
PaulaKramer Feb 3, 2025
264d632
Merge pull request #81 from volkamerlab/ci_small_fix
PaulaKramer Feb 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ jobs:
- name: Run tests
shell: bash -l {0}
run: |
PYTEST_ARGS="--nbval-lax --nbval-current-env --nbval-cell-timeout=7200"
PYTEST_IGNORE="--ignore=notebooks/custom_kinfraglib/2_3_custom_filters_paper.ipynb"
PYTEST_ARGS="--nbval-lax --nbval-current-env --nbval-cell-timeout=3600"
PYTEST_IGNORE="--ignore=notebooks/custom_kinfraglib/2_3_custom_filters_paper.ipynb"

pytest $PYTEST_ARGS $PYTEST_IGNORE
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
## CI tests
- pytest=7.4
- pytest-xdist
- pyparsing=3.1.4 # CI fails with version 3.2.0
- nbval
- shyaml
- pip:
Expand Down
77 changes: 56 additions & 21 deletions kinfraglib/filters/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,32 +310,67 @@ def retro_routes_fragments(fragment_library, evaluate, subpocket, molsPerRow=10)
return img


def create_tsne_plots(fragment_library):
def sample_subset(fragment_library, sample):
"""
Creates t-SNE plots comparing
a) pre-filtered and reduced fragment library
b) pre-filtered and custom filtered fragment library
c) pre-filtered, reduced and custom fragment library
Samples a random subset of the given fragment library
----------
fragment_library : dict
fragment library organized in subpockets
sample : float
fraction of data to be sampled

and prints number of fragments in the subsets.
"""

fragment_library_subset = {}
if sample != 1.0:
for subpocket in fragment_library.keys():
sample_num = int(len(fragment_library[subpocket]) * sample)
fragment_library_subset[subpocket] = fragment_library[subpocket].sample(
sample_num, random_state=1
)
return fragment_library_subset


def create_tsne_embeddings(fragment_library, n_components):
"""
Creates the t-SNE embedding for all following t-SNE plots
----------
fragment_library : dict
fragment library organized in subpockets containing boolean columuns `bool_reduced`and
`bool_custom`defining if the fragments are part of the subsets
fragment library organized in subpockets

"""

fragment_library_concat = pd.concat(fragment_library).reset_index(drop=True)
fragment_library_concat["maccs"] = fragment_library_concat.ROMol.apply(
MACCSkeys.GenMACCSKeys
)

pca = PCA(n_components=50)
pca = PCA(n_components)
crds = pca.fit_transform(list(fragment_library_concat["maccs"]))

crds_embedded = TSNE(
n_components=2, init="pca", learning_rate="auto"
).fit_transform(crds)
return crds_embedded


def create_tsne_plots(crds_embedded, fragment_library):
"""
Creates t-SNE plots comparing
a) pre-filtered and reduced fragment library
b) pre-filtered and custom filtered fragment library
c) pre-filtered, reduced and custom fragment library

and prints number of fragments in the subsets.
----------
crds_embedded : list
t-SNE embedding of the fragment library
fragment_library : dict
fragment library organized in subpockets containing boolean columuns `bool_reduced`and
`bool_custom`defining if the fragments are part of the subsets

"""
fragment_library_concat = pd.concat(fragment_library).reset_index(drop=True)
tsne_df = pd.DataFrame(crds_embedded, columns=["X", "Y"])
# add bool column from filtering steps here
tsne_df["reduced"] = fragment_library_concat["bool_reduced"]
Expand Down Expand Up @@ -448,28 +483,27 @@ def create_tsne_plots(fragment_library):
return tsne_df


def create_tsne_plots_filters(fragment_library, saved_filter_results):
def create_tsne_plots_filters(crds_embedded, fragment_library, saved_filter_results):
"""
Creates t-SNE plots with accepted (green) and rejected (red) fragments for each filtering step.

----------
crds_embedded : list
t-SNE embeddings of the fragment library
fragment_library : dict
    fragment library organized in subpockets containing boolean columuns
fragment library organized in subpockets containing boolean columuns
saved_filter_results : dataframe
loaded file with saved filter results

"""
# get sampled subset from saved filter results
fragment_library_concat = pd.concat(fragment_library).reset_index(drop=True)
fragment_library_concat["maccs"] = fragment_library_concat.ROMol.apply(
MACCSkeys.GenMACCSKeys
)

pca = PCA(n_components=50)
crds = pca.fit_transform(list(fragment_library_concat["maccs"]))

crds_embedded = TSNE(
n_components=2, init="pca", learning_rate="auto"
).fit_transform(crds)
bool_samples = [
i
for i, smi in enumerate(saved_filter_results["smiles"])
if smi in list(fragment_library_concat["smiles"])
]
saved_filter_results = saved_filter_results.iloc[bool_samples]

tsne_df = pd.DataFrame(crds_embedded, columns=["X", "Y"])
# add bool column from filter steps
Expand Down Expand Up @@ -519,6 +553,7 @@ def create_tsne_plots_filters(fragment_library, saved_filter_results):
color="lightcoral",
label="rejected",
)
# plt.show()
return tsne_df


Expand Down
Loading