From 26bb292efaded4be0a5c8c019d62dafce887401d Mon Sep 17 00:00:00 2001 From: Lucas Levassor <70581528+hiyama341@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:30:56 +0200 Subject: [PATCH] New CRISPR example notebook (#260) (#284) * fourth trt * final file added * update: added installation of pydna * update: added CRIPSR examples in notebooks issue #259 * update: added google badge issue #266 * Polished Example CRISPR notebook --------- Co-authored-by: Manuel Lera-Ramirez --- docs/cookbook/cookbook.ipynb | 136 ++++++++------ docs/notebooks/CRISPR.ipynb | 67 ++++--- docs/notebooks/Example_CRISPR.ipynb | 268 ++++++++++++++++++++++++++++ 3 files changed, 391 insertions(+), 80 deletions(-) create mode 100644 docs/notebooks/Example_CRISPR.ipynb diff --git a/docs/cookbook/cookbook.ipynb b/docs/cookbook/cookbook.ipynb index 34c15e53..ab8748be 100644 --- a/docs/cookbook/cookbook.ipynb +++ b/docs/cookbook/cookbook.ipynb @@ -17,6 +17,15 @@ "" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -63,7 +72,18 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Install pydna for colab.\n", + "%%capture\n", + "!pip install pydna" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -72,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -81,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -109,7 +129,7 @@ "Gbnk(o9637 KC562906.1)" ] }, - "execution_count": 4, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -120,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -131,7 +151,7 @@ "CTTA..AGTT" ] }, - "execution_count": 5, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -142,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -151,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -166,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -175,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -187,7 +207,7 @@ "Gbnk(-3140 Z72979.1)" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -198,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -209,7 +229,7 @@ "TACT..TTTT" ] }, - "execution_count": 10, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -220,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -229,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -245,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -259,7 +279,7 @@ " 3ACAAGTCATTAAGTC...GACCTTTTCTGAGAGTAGATT5" ] }, - "execution_count": 13, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -270,7 +290,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -279,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -288,7 +308,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -297,7 +317,7 @@ "(Dseqrecord(-7), Dseqrecord(-1819), Dseqrecord(-11))" ] }, - "execution_count": 16, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -308,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -319,7 +339,7 @@ " GAGA..ATTCCTAG" ] }, - "execution_count": 17, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -330,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -339,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -350,7 +370,7 @@ " AGGG..TTTTCTAG" ] }, - "execution_count": 19, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -361,7 +381,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -370,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -379,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -388,7 +408,7 @@ "'t9fs_9UvEuD-Ankyy8XEr1hD5DQ'" ] }, - "execution_count": 22, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -399,7 +419,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -461,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -477,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -486,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -495,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -509,7 +529,7 @@ " 3agtcgtaaaatccatttaaggc...ctgtcctacgactagtcgctgta5" ] }, - "execution_count": 27, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -520,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -529,7 +549,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -538,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -550,7 +570,7 @@ "File(pGREG505)(o9465)" ] }, - "execution_count": 30, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -561,7 +581,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -570,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -579,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -588,7 +608,7 @@ "(Dseqrecord(-8301), Dseqrecord(-1172))" ] }, - "execution_count": 41, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -599,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -615,7 +635,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -624,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -637,7 +657,7 @@ "algorithm..: common_sub_strings" ] }, - "execution_count": 44, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -648,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -664,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -673,7 +693,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -682,7 +702,7 @@ "True" ] }, - "execution_count": 55, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -693,7 +713,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -702,7 +722,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -711,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -720,7 +740,7 @@ "'0R8hr15t-psjHVuuTj_JufGxOPg'" ] }, - "execution_count": 58, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -731,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "metadata": {}, "outputs": [ { diff --git a/docs/notebooks/CRISPR.ipynb b/docs/notebooks/CRISPR.ipynb index 9d9249e7..1cc5ba3e 100644 --- a/docs/notebooks/CRISPR.ipynb +++ b/docs/notebooks/CRISPR.ipynb @@ -54,32 +54,55 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cutting with enzyme 1: (Dseqrecord(-17), Dseqrecord(-6))\n", + "protospacer: GTTACTTTACCCGACGTCCC\n", + "cutting with enzyme 2: (Dseqrecord(-17), Dseqrecord(-6))\n", + "cutting with no PAM in target: ()\n" + ] + } + ], "source": [ - "# Defining the target sequence\n", - "sequence = Dseqrecord(\"GTTACTTTACCCGACGTCCCCGG\")\n", + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.crispr import cas9, protospacer\n", + "\n", + "# <----protospacer---><-------scaffold----------------->\n", + "guide = \"GTTACTTTACCCGACGTCCCgttttagagctagaaatagcaagttaaaataagg\"\n", + "target = \"GTTACTTTACCCGACGTCCCaGG\"\n", + "# <->\n", + "# PAM\n", + "\n", + "# Create an enzyme object with the protospacer\n", + "enzyme = cas9(\"GTTACTTTACCCGACGTCCC\")\n", + "\n", + "target_dseq = Dseqrecord(target)\n", + "\n", + "# Cut using the enzyme\n", + "print('cutting with enzyme 1:', target_dseq.cut(enzyme))\n", + "\n", + "\n", + "# Get the protospacer from the full gRNA sequence\n", + "gRNA_protospacers = protospacer(Dseqrecord(guide), cas=cas9)\n", + "# Print the protospacer (it's a list because often plasmids contain multiple gRNAs)\n", + "print('protospacer:', gRNA_protospacers[0])\n", + "gRNA_protospacer = gRNA_protospacers[0]\n", + "\n", + "# Create an enzyme from the protospacer\n", + "enzyme2 = cas9(gRNA_protospacer)\n", + "\n", + "# Simulate the cut\n", + "print('cutting with enzyme 2:', target_dseq.cut(enzyme2))\n", "\n", - "# Defining the guide RNA sequence\n", - "gRNA_sequence = protospacer(guide_construct = sequence, cas=cas9)\n", - "print(gRNA_sequence)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Initializing the Cas9 protein\n", - "enzyme = cas9(protospacer=gRNA_sequence[0])\n", "\n", - "# Simulating the CRISPR-Cas9 cut by searching for the cut sites\n", - "cas9_sites = enzyme.search(sequence)\n", - "print(len(cas9_sites))\n", + "# Note that without the PAM, the cut will not be made.\n", "\n", - "# The cas9_sites will contain the fragments resulting from the cut\n", - "for fragment in cas9_sites:\n", - " print(fragment.format(\"fasta\"))" + "target_noPAM_dseq = Dseqrecord(\"GTTACTTTACCCGACGTCCCaaa\")\n", + "print(\"cutting with no PAM in target:\", target_noPAM_dseq.cut(enzyme2))" ] } ], diff --git a/docs/notebooks/Example_CRISPR.ipynb b/docs/notebooks/Example_CRISPR.ipynb new file mode 100644 index 00000000..9e0d64e8 --- /dev/null +++ b/docs/notebooks/Example_CRISPR.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Implementation of Oligonucleotide-based CRISPR-Cas9 toolbox for efficient engineering of Komagataella phaffii \n", + "\n", + "\n", + "In this example we wanted to give a real life intuition on how to use the module in practice. \n", + "\n", + "For this purpose we have chosen to use the oligonucleotide-based CRISPR-Cas9 toolbox that i described \n", + "here by Strucko et al 2024, in the industrially relevant K. phaffi production organism: \n", + "\n", + "https://academic.oup.com/femsyr/article/doi/10.1093/femsyr/foae026/7740463?login=false " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Image\n", + "Image(url=\"https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/femsyr/24/10.1093_femsyr_foae026/1/m_foae026fig3.jpeg?Expires=1730974846&Signature=iBKvkhkUn1823IljQ~1uFEnKO0VqWrwiXADvCwQLz6Yv8yDEAFkgt~tsLrXKFTmGYIq3ZINcj5a5yNgs4cP4NeCvRcQh7Ad~1ZejIwNrjqw51CJhGcZWPzz~NDr93QVLZZd2Re41cJNFKFmEu756KxrHQxwKTQe2QPMPfiKBvhvo8J28PERj3vNjZ3LQRsFp9qUPpdsZEyWIiNY92jsuy448YyuaGCgaC2ExGDLeuArTEJmq8gtb0QnTPV0dEdtoxIfZpgavdvO~QyqikjCLj6hebUYU1lH7StuS8oqCQE82CXO0IUcjYF6m2Lb0evXhqdLDQe90M-NrKjzNRmBA0A__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Figure 1. oligo assisted repair in K. phaffi. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "- Basically we can make two cuts in the genome, and repair it with an oligo (Figure 1A, 1B).\n", + "\n", + "\n", + "- We can start by loading in our target. Here we have integrated LAC12 in our K. phaffi strain but want to knock it out. \n", + "\n", + "\n", + "- Let's see how this can be implemented in pydna\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \"Open\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Install pydna for colab.\n", + "%%capture\n", + "!pip install git+https://github.com/BjornFJohansson/pydna.git@dev_bjorn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import the gene we are going to work with" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dseqrecord\n", + "circular: False\n", + "size: 7127\n", + "ID: X06997.1\n", + "Name: X06997\n", + "Description: Kluyveromyces lactis LAC12 gene for lactose permease\n", + "Number of features: 8\n", + "/molecule_type=DNA\n", + "/topology=linear\n", + "/data_file_division=PLN\n", + "/date=25-JUL-2016\n", + "/accessions=['X06997']\n", + "/sequence_version=1\n", + "/keywords=['lactose permease', 'unidentified reading frame']\n", + "/source=Kluyveromyces lactis\n", + "/organism=Kluyveromyces lactis\n", + "/taxonomy=['Eukaryota', 'Fungi', 'Dikarya', 'Ascomycota', 'Saccharomycotina', 'Saccharomycetes', 'Saccharomycetales', 'Saccharomycetaceae', 'Kluyveromyces']\n", + "/references=[Reference(title='Primary structure of the lactose permease gene from the yeast Kluyveromyces lactis. Presence of an unusual transcript structure', ...), Reference(title='Direct Submission', ...)]\n", + "/comment=the sequence submitted starts from the 5'end of LAC4 gene but goes\n", + "to the opposite direction; therefore, base number 1 is -1199 of\n", + "LAC4 gene; for LAC4 gene seq. see\n", + "Mol. Cell. Biol. (1987)7,4369-4376.\n", + "Dseq(-7127)\n", + "GCGA..TTCG\n", + "CGCT..AAGC\n" + ] + } + ], + "source": [ + "from pydna.dseqrecord import Dseqrecord\n", + "from pydna.crispr import cas9, protospacer\n", + "from pydna.genbank import Genbank\n", + "\n", + "# initalize your favourite gene\n", + "gb = Genbank(\"myself@email.com\") # Tell Genbank who you are!\n", + "gene = gb.nucleotide(\"X06997\") # Kluyveromyces lactis LAC12 gene for lactose permease that have been integrated into K. phaffi\n", + "target_dseq = Dseqrecord(gene)\n", + "print(target_dseq)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we have chosen some guides and can add them to our cas9 enzymes and simulate the cuts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cutting with guide 1: (Dseqrecord(-135), Dseqrecord(-6992))\n", + "cutting with guide 2: (Dseqrecord(-6793), Dseqrecord(-334))\n" + ] + } + ], + "source": [ + "\n", + "# Choose guides\n", + "guides = [\"CCCTAAGTCCTTTGAAGATT\", \"TATTATTTTGAGGTGCTTTA\"]\n", + "\n", + "# Create an enzyme object with the protospacer\n", + "enzyme = cas9(guides[0])\n", + "\n", + "# Simulate the cut with enzyme1\n", + "print('cutting with guide 1:', target_dseq.cut(enzyme))\n", + "\n", + "# Create an enzyme from the protospacer\n", + "enzyme2 = cas9(guides[1])\n", + "\n", + "# Simulate the cut with enzyme2\n", + "print('cutting with guide 2:', target_dseq.cut(enzyme2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With these guides I would be able to generate a stable KO with a repair 60/90mer oligo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My repair oligo for this experiment : AGGTGAACACACTCTGATGTAGTGCAGTCCCTAAGTCCTTTGAAGTTACGGACTCCTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTC \n", + "My repair oligo for this experiment length : 90 \n" + ] + } + ], + "source": [ + "repair_oligo = target_dseq.cut(enzyme)[0][-45:]+target_dseq.cut(enzyme2)[-1][:45]\n", + "repair_oligo.name = 'My repair oligo for this experiment'\n", + "print(f'{repair_oligo.name} : {repair_oligo.seq} ')\n", + "print(f'{repair_oligo.name} length : {len(repair_oligo.seq)} ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The final edit gene would look like this in a case of homologous recombination. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
name|45\n",
+       "     \\/\n",
+       "     /\\\n",
+       "     45|My repair oligo for this experiment|45\n",
+       "                                            \\/\n",
+       "                                            /\\\n",
+       "                                            45|name
" + ], + "text/plain": [ + "Contig(-469)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydna.assembly import Assembly\n", + "\n", + "my_KO = Assembly((target_dseq.cut(enzyme)[0],repair_oligo, target_dseq.cut(enzyme2)[-1]), limit = 20 )\n", + "my_assembly_KO, *rest = my_KO.assemble_linear()\n", + "my_assembly_KO" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}