diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json index 8de306df..e11af8ef 100644 --- a/.config/dotnet-tools.json +++ b/.config/dotnet-tools.json @@ -7,6 +7,12 @@ "commands": [ "fsdocs" ] + }, + "dotnet-repl": { + "version": "0.1.216", + "commands": [ + "dotnet-repl" + ] } } } \ No newline at end of file diff --git a/.github/workflows/deploy-gh-pages.yml b/.github/workflows/deploy-gh-pages.yml index d5d42c0d..71d3d505 100644 --- a/.github/workflows/deploy-gh-pages.yml +++ b/.github/workflows/deploy-gh-pages.yml @@ -2,7 +2,7 @@ name: deploy-gh-pages on: push: - branches: [ main ] + branches: [ developer ] paths: - 'docs/**' diff --git a/build/DocumentationTasks.fs b/build/DocumentationTasks.fs index 57cc9186..67e702b2 100644 --- a/build/DocumentationTasks.fs +++ b/build/DocumentationTasks.fs @@ -12,7 +12,7 @@ let buildDocs = runDotNet (sprintf - "fsdocs build --eval --clean --properties Configuration=Release --parameters fsdocs-package-version %s" + "fsdocs build --clean --properties Configuration=Release --parameters fsdocs-package-version %s" stableVersionTag) "./" } @@ -23,7 +23,7 @@ let buildDocsPrerelease = runDotNet (sprintf - "fsdocs build --eval --clean --properties Configuration=Release --parameters fsdocs-package-version %s" + "fsdocs build --clean --properties Configuration=Release --parameters fsdocs-package-version %s" prereleaseTag) "./" } @@ -34,7 +34,7 @@ let watchDocs = runDotNet (sprintf - "fsdocs watch --eval --clean --properties Configuration=Release --parameters fsdocs-package-version %s" + "fsdocs watch --clean --properties Configuration=Release --parameters fsdocs-package-version %s" stableVersionTag) "./" } @@ -45,7 +45,7 @@ let watchDocsPrerelease = runDotNet (sprintf - "fsdocs watch --eval --clean --properties Configuration=Release --parameters fsdocs-package-version %s" + "fsdocs watch --clean --properties Configuration=Release --parameters fsdocs-package-version %s" prereleaseTag) "./" } \ No newline at end of file diff --git a/docs/01_01_formula.fsx b/docs/01_01_formula.fsx deleted file mode 100644 index 93232107..00000000 --- a/docs/01_01_formula.fsx +++ /dev/null @@ -1,82 +0,0 @@ -(** ---- -title: Formulas -category: BioFSharp Core -categoryindex: 1 -index: 1 ---- -*) - -(*** hide ***) - -(*** condition: prepare ***) -#r "nuget: FSharpAux, 1.1.0" -#r "nuget: FSharpAux.IO, 1.1.0" -#r "nuget: FSharp.Stats, 0.4.3" -#r "nuget: Plotly.NET, 2.0.0-preview.18" -#r "../src/BioFSharp/bin/Release/netstandard2.0/BioFSharp.dll" -#r "../src/BioFSharp.IO/bin/Release/netstandard2.0/BioFSharp.IO.dll" -#r "../src/BioFSharp.BioContainers/bin/Release/netstandard2.0/BioFSharp.BioContainers.dll" -#r "../src/BioFSharp.ML/bin/Release/netstandard2.0/BioFSharp.ML.dll" -#r "../src/BioFSharp.Stats/bin/Release/netstandard2.0/BioFSharp.Stats.dll" - -(*** condition: ipynb ***) -#if IPYNB -#r "nuget: FSharpAux, 1.1.0" -#r "nuget: FSharpAux.IO, 1.1.0" -#r "nuget: FSharp.Stats, 0.4.3" -#r "nuget: Plotly.NET, 2.0.0-preview.18" -#r "nuget: Plotly.NET.Interactive, 2.0.0-preview.18" -#r "nuget: BioFSharp, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.IO, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.BioContainers, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.ML, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.Stats, {{fsdocs-package-version}}" -#endif // IPYNB - -(** -# Formula - -[![Binder]({{root}}img/badge-binder.svg)](https://mybinder.org/v2/gh/CSBiology/BioFSharp/gh-pages?filepath={{fsdocs-source-basename}}.ipynb)  -[![Script]({{root}}img/badge-script.svg)]({{root}}{{fsdocs-source-basename}}.fsx)  -[![Notebook]({{root}}img/badge-notebook.svg)]({{root}}{{fsdocs-source-basename}}.ipynb) - -*Summary:* This example shows how to use chemical formulas in BioFSharp - -BioFSharp offers a great bunch of functionality for working with molecules. All elements are represented as the composition of their stable isotopes. A `Formula` is a collection of those Elements with the given count. Creating and altering formulas is quite easy. Also functions for obtaining a mass of a molecule, which becomes quite handy especially for mass spectrometry, can be used straightforwardly. - -To create formulas, no direct fiddling around with the data type is necessary. You can just use the stringparser: -*) -open BioFSharp - - -let CO2 = Formula.parseFormulaString "CO2" -Formula.toString CO2 // val it : string = "C1.00 O2.00 " - -(** -We just created some Carbon Dioxide. Luckily there is no in silico climate change. But let's get rid of it anyways, by making some Sprudel\*:
_\*german term for sprinkly water_
-*) -let sprudel = Formula.add CO2 (Formula.Table.H2O) -Formula.toString sprudel // val it : string = "C1.00 H2.00 O3.00 " - -(** -Quite refreshing, but boring nevertheless. Let's make some radioactive sprudel. -*) - -/// create a monoisotopic carbon consisting only of C14 -let monoC14 = - Elements.createMono "C14" (Isotopes.Table.C14,1.) - |> Elements.Mono - -/// exchanges all carbon in formula with monoIsotopic C14 -let lableWithC14 molecule = Formula.replaceElement molecule Elements.Table.C monoC14 - -let radioactiveSprudel = lableWithC14 sprudel - - -(** -As you can see converting a refreshing drink to a refreshing, radioactive drink is quickly done. As a check up, let's compare the masses: -*) - -Formula.monoisoMass sprudel // val it : float = 62.00039392 -Formula.monoisoMass radioactiveSprudel // val it : float = 64.00363591 diff --git a/docs/01_02_bioitems.fsx b/docs/01_02_bioitems.fsx deleted file mode 100644 index 5cddf21d..00000000 --- a/docs/01_02_bioitems.fsx +++ /dev/null @@ -1,205 +0,0 @@ -(** ---- -title: BioItems -category: BioFSharp Core -categoryindex: 1 -index: 2 ---- -*) - -(*** hide ***) - -(*** condition: prepare ***) -#r "nuget: FSharpAux, 1.1.0" -#r "nuget: FSharpAux.IO, 1.1.0" -#r "nuget: FSharp.Stats, 0.4.3" -#r "nuget: Plotly.NET, 2.0.0-preview.18" -#r "../src/BioFSharp/bin/Release/netstandard2.0/BioFSharp.dll" -#r "../src/BioFSharp.IO/bin/Release/netstandard2.0/BioFSharp.IO.dll" -#r "../src/BioFSharp.BioContainers/bin/Release/netstandard2.0/BioFSharp.BioContainers.dll" -#r "../src/BioFSharp.ML/bin/Release/netstandard2.0/BioFSharp.ML.dll" -#r "../src/BioFSharp.Stats/bin/Release/netstandard2.0/BioFSharp.Stats.dll" - -(*** condition: ipynb ***) -#if IPYNB -#r "nuget: FSharpAux, 1.1.0" -#r "nuget: FSharpAux.IO, 1.1.0" -#r "nuget: FSharp.Stats, 0.4.3" -#r "nuget: Plotly.NET, 2.0.0-preview.18" -#r "nuget: Plotly.NET.Interactive, 2.0.0-preview.18" -#r "nuget: BioFSharp, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.IO, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.BioContainers, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.ML, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.Stats, {{fsdocs-package-version}}" -#endif // IPYNB - -(** -# BioItems - -[![Binder]({{root}}img/badge-binder.svg)](https://mybinder.org/v2/gh/CSBiology/BioFSharp/gh-pages?filepath={{fsdocs-source-basename}}.ipynb)  -[![Script]({{root}}img/badge-script.svg)]({{root}}{{fsdocs-source-basename}}.fsx)  -[![Notebook]({{root}}img/badge-notebook.svg)]({{root}}{{fsdocs-source-basename}}.ipynb) - -*Summary:* This example shows how to use BioItemsin BioFSharp - -Often, dealing with similar problems separately results in different approaches. In a programming background, this might make things needlessly complex. Therefore in BioFSharp nucleotides and amino acids are based on the same structural scaffold, leading to a consistent way of working with them. This can come in handy especially when working with their formulas. - -### Table of contents -- [Basics](#Basics) -- [AminoAcids](#Amino-Acids) - - [AminoAcids](#Modifying-Amino-Acids) -- [Nucleotides](#Nucleotides) - -## Basics - -Many functions are similar for AminoAcids and Nucleotides, like for example: -*) - -open BioFSharp -open BioFSharp.AminoAcids -open BioFSharp.Nucleotides - -(***hide***) - -(**Accessing the full name:*) - -AminoAcids.name Ala -(*** include-it ***) - -Nucleotides.name G -(*** include-it ***) - -(**or the underlying chemical formula:*) - -AminoAcids.formula Lys -|> Formula.toString -(*** include-it ***) - -Nucleotides.formula T -|> Formula.toString -(*** include-it ***) - -(** -Nucleotides and AminoAcids in BioFSharp are represented as Union cases. This makes applying functions selectively very easy. -*) -let filterLysine aa = - match aa with - | AminoAcids.Lys -> AminoAcids.Gap - | _ -> aa - -filterLysine Ala -(*** include-it ***) - -filterLysine Lys -(*** include-it ***) - -(** -Of course some functions like these are already defined. Let's use a predefined function to find charged amino acids. - -*) - -let giveMePositiveAAs aminoAcid = - match aminoAcid with - | a when AminoAcids.isPosCharged a -> - printfn - "Hey, how are you? I am %s, but my friends call me %c. I'm usually in a positive mood" - (AminoAcids.name a) - (AminoAcids.symbol a) - - | a when AminoAcids.isNegCharged a -> - printfn - "I am %s, short: %c. I'm usually in a negative mood" - (AminoAcids.name a) - (AminoAcids.symbol a) - - | _ -> printfn "Just strolling around, minding my own business." - -(**Alanine is usually not charged:*) - -giveMePositiveAAs Ala -(*** include-output ***) - -(**Lysine is usually positively charged:*) - -giveMePositiveAAs Lys -(*** include-output ***) - -(**Glutamic acid is usually negatively charged:*) - -giveMePositiveAAs Glu -(*** include-output ***) - - -(** - -## Amino Acids - -### Modifying Amino Acids - -What makes working on Amino Acids with BioFSharp truly powerful is the ability to easily modify AminoAcids, even altering their mass and formula. In the following example we try to find out the mass of a phosphorylated Serine. Applications like these might be quite usefull for identification of peptides in mass spectrometry. -*) - -Ser -|> AminoAcids.formula -|> Formula.toString -(***include-it***) - -(** -As you can see by the formula. Our Serine is missing two H and an O. In BioFSharp, all Amino Acids are dehydrolysed by default, because it is assumed that the user will use collections representing a peptide, rather than single Amino Acids. For our cause we want serine in hydrolysed form. An easy way to achieve this is to modify it. An addition of H2O is quite common and therefore premade: -*) - -///Hydrolysed serine - -let hydroSerine = AminoAcids.setModification ModificationInfo.Table.H2O Ser - -hydroSerine -|> AminoAcids.formula -|> Formula.toString -(***include-it***) - -(** -So far so good. Now let's add the phosphate. For this we first create a function which alters the formula of a given molecule in the way a phosphorylation would. In the second step we create a modification resembling a phosphorylation of a residual. At last we modify our Serine with this modification. -*) - -///Phosphorylation of OH-Groups adds PO3 to formula and removes one H -let phosporylate formula = - Formula.add (Formula.parseFormulaString "PO3") formula - |> Formula.substract (Formula.parseFormulaString "H") - -//We create a modification at the residual called phosphorylation which in our case is hypothetical, hence the `false` for the 'isBiological` parameter -let phosphorylation = ModificationInfo.createModification "Phosphorylation" false ModificationInfo.ModLocation.Residual phosporylate - -///phosphorylated Serine -let phosphoSerine = AminoAcids.setModification phosphorylation hydroSerine - -phosphoSerine -|> AminoAcids.formula -|> Formula.toString -(***include-it***) - -(** -As you can see the Serine is phosphorylated just as we wanted. Our inital aim was to check the mass, this can be done quite easily: -*) - -AminoAcids.averageMass Ser -(***include-it***) - -AminoAcids.averageMass phosphoSerine -(***include-it***) - -(** -## Nucleotides - -As working with nucleotides is usually focused on the sequence of the bases, rather than how they actually look like, the list of nucleotide specific functions would be quite short. Here are some of the basic helper functions: -*) - -let myAdenine = Nucleotides.A -let myThymine = Nucleotides.complement myAdenine -(***include-value:myThymine***) - -Nucleotides.replaceTbyU myAdenine -(***include-it***) - -Nucleotides.replaceTbyU myThymine -(***include-it***) diff --git a/docs/NuGet.config b/docs/NuGet.config deleted file mode 100644 index cf1ace51..00000000 --- a/docs/NuGet.config +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/_template.ipynb b/docs/_template.ipynb deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/core/bioitems.ipynb b/docs/core/bioitems.ipynb new file mode 100644 index 00000000..f3813bfd --- /dev/null +++ b/docs/core/bioitems.ipynb @@ -0,0 +1,939 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "---\n", + "title: BioItems\n", + "category: BioFSharp Core\n", + "categoryindex: 1\n", + "index: 2\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "(*** hide ***)\n", + "\n", + "(*** condition: prepare ***)\n", + "#r \"nuget: Plotly.NET, 4.2.0\"\n", + "#r \"nuget: FSharpAux, 2.0.0\"\n", + "#r \"nuget: FSharpAux.IO, 2.0.0\"\n", + "#r \"nuget: FSharp.Stats, 0.4.11\"\n", + "#r \"../../src/BioFSharp/bin/Release/netstandard2.0/BioFSharp.dll\"\n", + "#r \"../../src/BioFSharp.IO/bin/Release/netstandard2.0/BioFSharp.IO.dll\"\n", + "#r \"../../src/BioFSharp.BioContainers/bin/Release/netstandard2.0/BioFSharp.BioContainers.dll\"\n", + "#r \"../../src/BioFSharp.ML/bin/Release/netstandard2.0/BioFSharp.ML.dll\"\n", + "#r \"../../src/BioFSharp.Stats/bin/Release/netstandard2.0/BioFSharp.Stats.dll\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BioItems\n", + "\n", + "*Summary:* This example shows how to use BioItemsin BioFSharp\n", + "\n", + "Often, dealing with similar problems separately results in different approaches. In a programming background, this might make things needlessly complex. Therefore in BioFSharp nucleotides and amino acids are based on the same structural scaffold, leading to a consistent way of working with them. This can come in handy especially when working with their formulas. \n", + "\n", + "### Table of contents\n", + "- [Basics](#Basics)\n", + "- [AminoAcids](#Amino-Acids)\n", + " - [Modifying Amino Acids](#Modifying-Amino-Acids)\n", + "- [Nucleotides](#Nucleotides)\n", + "\n", + "## Basics\n", + "\n", + "Many functions are similar for AminoAcids and Nucleotides, like for example:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "open BioFSharp\n", + "open BioFSharp.AminoAcids\n", + "open BioFSharp.Nucleotides" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Accessing the full name:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
(Alanine, Guanine)
Item1
Alanine
Item2
Guanine
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "AminoAcids.name Ala,\n", + "Nucleotides.name G " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "or the underlying chemical formula:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
(C6.00 H12.00 N2.00 O1.00, C10.00 H14.00 N2.00 O5.00)
Item1
C6.00 H12.00 N2.00 O1.00
Item2
C10.00 H14.00 N2.00 O5.00
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "AminoAcids.formula Lys |> Formula.toString,\n", + "Nucleotides.formula T |> Formula.toString " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nucleotides and AminoAcids in BioFSharp are represented as Union cases. This makes applying functions selectively very easy. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "let filterLysine aa = \n", + " match aa with\n", + " | AminoAcids.Lys -> AminoAcids.Gap\n", + " | _ -> aa" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Ala
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "filterLysine Ala " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Gap
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "filterLysine Lys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course some functions like these are already defined. Let's use a predefined function to find charged amino acids." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "let giveMePositiveAAs aminoAcid = \n", + " match aminoAcid with\n", + " | a when AminoAcids.isPosCharged a -> \n", + " printfn \n", + " \"Hey, how are you? I am %s, but my friends call me %c. I'm usually in a positive mood\"\n", + " (AminoAcids.name a)\n", + " (AminoAcids.symbol a)\n", + "\n", + " | a when AminoAcids.isNegCharged a -> \n", + " printfn \n", + " \"I am %s, short: %c. I'm usually in a negative mood\"\n", + " (AminoAcids.name a)\n", + " (AminoAcids.symbol a)\n", + "\n", + " | _ -> printfn \"Just strolling around, minding my own business.\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alanine is usually not charged" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Just strolling around, minding my own business.\n" + ] + } + ], + "source": [ + "giveMePositiveAAs Ala" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lysine is usually positively charged:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hey, how are you? I am Lysine, but my friends call me K. I'm usually in a positive mood\n" + ] + } + ], + "source": [ + "giveMePositiveAAs Lys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Glutamic acid is usually negatively charged:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I am Glutamic Acid, short: E. I'm usually in a negative mood\n" + ] + } + ], + "source": [ + "giveMePositiveAAs Glu" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Amino Acids\n", + "\n", + "### Modifying Amino Acids\n", + "\n", + "What makes working on Amino Acids with BioFSharp truly powerful is the ability to easily modify AminoAcids, even altering their mass and formula. In the following example we try to find out the mass of a phosphorylated Serine. Applications like these might be quite usefull for identification of peptides in mass spectrometry. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "C3.00 H5.00 N1.00 O2.00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Ser\n", + "|> AminoAcids.formula \n", + "|> Formula.toString" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see by the formula, ur Serine is missing two H and an O. In BioFSharp, all Amino Acids are dehydrolysed by default, because it is assumed that the user will use collections representing a peptide, rather than single Amino Acids. For our cause we want serine in hydrolysed form. An easy way to achieve this is to modify it. An addition of H2O is quite common and therefore premade: " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "C3.00 H7.00 N1.00 O3.00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "///Hydrolysed serine\n", + "\n", + "let hydroSerine = AminoAcids.setModification ModificationInfo.Table.H2O Ser\n", + "\n", + "hydroSerine\n", + "|> AminoAcids.formula \n", + "|> Formula.toString" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So far so good. Now let's add the phosphate. For this we first create a function which alters the formula of a given molecule in the way a phosphorylation would. In the second step we create a modification resembling a phosphorylation of a residual. At last we modify our Serine with this modification." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "P1.00 C3.00 H6.00 N1.00 O6.00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "///Phosphorylation of OH-Groups adds PO3 to formula and removes one H\n", + "let phosporylate formula = \n", + " Formula.add (Formula.parseFormulaString \"PO3\") formula\n", + " |> Formula.substract (Formula.parseFormulaString \"H\")\n", + "\n", + "//We create a modification at the residual called phosphorylation which in our case is hypothetical, hence the `false` for the 'isBiological` parameter\n", + "let phosphorylation = ModificationInfo.createModification \"Phosphorylation\" false ModificationInfo.ModLocation.Residual phosporylate\n", + "\n", + "///phosphorylated Serine\n", + "let phosphoSerine = AminoAcids.setModification phosphorylation hydroSerine\n", + "\n", + "phosphoSerine \n", + "|> AminoAcids.formula \n", + "|> Formula.toString" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see the Serine is phosphorylated just as we wanted. Our inital aim was to check the mass, this can be done quite easily:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
87.07757500000001
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "AminoAcids.averageMass Ser" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
183.05688399999997
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "AminoAcids.averageMass phosphoSerine" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nucleotides\n", + "\n", + "As working with nucleotides is usually focused on the sequence of the bases, rather than how they actually look like, the list of nucleotide specific functions would be quite short. Here are some of the basic helper functions: \n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
(A, T)
Item1
A
Item2
T
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "let myAdenine = Nucleotides.A \n", + "let myThymine = Nucleotides.complement myAdenine \n", + "\n", + "myAdenine, myThymine" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
A
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Nucleotides.replaceTbyU myAdenine" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
U
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Nucleotides.replaceTbyU myThymine " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (C#)", + "language": "C#", + "name": ".net-csharp" + }, + "language_info": { + "name": "polyglot-notebook" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "csharp", + "items": [ + { + "aliases": [], + "name": "csharp" + }, + { + "aliases": [], + "languageName": "fsharp", + "name": "fsharp" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/core/formula.ipynb b/docs/core/formula.ipynb new file mode 100644 index 00000000..426c43ad --- /dev/null +++ b/docs/core/formula.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "title: Formulas\n", + "category: BioFSharp Core\n", + "categoryindex: 1\n", + "index: 1\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "(*** hide ***)\n", + "\n", + "(*** condition: prepare ***)\n", + "#r \"nuget: Plotly.NET, 4.2.0\"\n", + "#r \"nuget: FSharpAux, 2.0.0\"\n", + "#r \"nuget: FSharpAux.IO, 2.0.0\"\n", + "#r \"nuget: FSharp.Stats, 0.4.11\"\n", + "#r \"../../src/BioFSharp/bin/Release/netstandard2.0/BioFSharp.dll\"\n", + "#r \"../../src/BioFSharp.IO/bin/Release/netstandard2.0/BioFSharp.IO.dll\"\n", + "#r \"../../src/BioFSharp.BioContainers/bin/Release/netstandard2.0/BioFSharp.BioContainers.dll\"\n", + "#r \"../../src/BioFSharp.ML/bin/Release/netstandard2.0/BioFSharp.ML.dll\"\n", + "#r \"../../src/BioFSharp.Stats/bin/Release/netstandard2.0/BioFSharp.Stats.dll\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Formula\n", + "\n", + "*Summary:* This example shows how to use chemical formulas in BioFSharp\n", + "\n", + "BioFSharp offers a great bunch of functionality for working with molecules. All elements are represented as the composition of their stable isotopes. A `Formula` is a collection of those Elements with the given count. Creating and altering formulas is quite easy. Also functions for obtaining a mass of a molecule, which becomes quite handy especially for mass spectrometry, can be used straightforwardly. \n", + "\n", + "To create formulas, no direct fiddling around with the data type is necessary. You can just use the stringparser:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "C1.00 O2.00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "open BioFSharp\n", + "\n", + "let CO2 = Formula.parseFormulaString \"CO2\"\n", + "Formula.toString CO2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We just created some Carbon Dioxide. Luckily there is no in silico climate change. But let's get rid of it anyways, by making some _Sprudel_ (german term for sprinkly water)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "C1.00 H2.00 O3.00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "let sprudel = Formula.add CO2 (Formula.Table.H2O)\n", + "Formula.toString sprudel" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Quite refreshing, but boring nevertheless. Let's make some radioactive sprudel." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "/// create a monoisotopic carbon consisting only of C14\n", + "let monoC14 = \n", + " Elements.createMono \"C14\" (Isotopes.Table.C14,1.)\n", + " |> Elements.Mono \n", + "\n", + "/// exchanges all carbon in formula with monoIsotopic C14\n", + "let lableWithC14 molecule = Formula.replaceElement molecule Elements.Table.C monoC14\n", + "\n", + "let radioactiveSprudel = lableWithC14 sprudel" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see converting a refreshing drink to a refreshing, radioactive drink is quickly done. As a check up, let's compare the masses:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
(62.00039392114, 64.00363591054)
Item1
62.00039392114
Item2
64.00363591054
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Formula.monoisoMass sprudel,\n", + "Formula.monoisoMass radioactiveSprudel\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (F#)", + "language": "F#", + "name": ".net-fsharp" + }, + "language_info": { + "name": "polyglot-notebook" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "fsharp", + "items": [ + { + "aliases": [], + "languageName": "fsharp", + "name": "fsharp" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/index.fsx b/docs/index.fsx deleted file mode 100644 index 80a674af..00000000 --- a/docs/index.fsx +++ /dev/null @@ -1,169 +0,0 @@ -(*** hide ***) - -(*** condition: prepare ***) -#r "nuget: Plotly.NET, 2.0.0-preview.8" -#r "nuget: FSharpAux, 1.0.0" -#r "nuget: FSharpAux.IO, 1.0.0" -#r "nuget: FSharp.Stats, 0.4.0" -#r "../bin/BioFSharp/netstandard2.0/BioFSharp.dll" -#r "../bin/BioFSharp.IO/netstandard2.0/BioFSharp.IO.dll" -#r "../bin/BioFSharp.BioContainers/netstandard2.0/BioFSharp.BioContainers.dll" -#r "../bin/BioFSharp.ML/netstandard2.0/BioFSharp.ML.dll" -#r "../bin/BioFSharp.Stats/netstandard2.0/BioFSharp.Stats.dll" - -(*** condition: ipynb ***) -#if IPYNB -#r "nuget: Plotly.NET, 2.0.0-beta6" -#r "nuget: FSharpAux, 1.0.0" -#r "nuget: FSharpAux.IO, 1.0.0" -#r "nuget: FSharp.Stats, 0.4.0" -#r "nuget: Plotly.NET.Interactive, 2.0.0-beta6" -#r "nuget: BioFSharp, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.IO, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.BioContainers, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.ML, {{fsdocs-package-version}}" -#r "nuget: BioFSharp.Stats, {{fsdocs-package-version}}" -#endif // IPYNB - -(** -# BioFSharp - -[![Binder]({{root}}img/badge-binder.svg)](https://mybinder.org/v2/gh/plotly/Plotly.NET/gh-pages?filepath={{fsdocs-source-basename}}.ipynb)  -[![Script]({{root}}img/badge-script.svg)]({{root}}{{fsdocs-source-basename}}.fsx)  -[![Notebook]({{root}}img/badge-notebook.svg)]({{root}}{{fsdocs-source-basename}}.ipynb) - -BioFSharp aims to be a user-friendly functional library for bioinformatics written in F#. It contains the basic data structures for common biological objects like amino acids and nucleotides based on chemical formulas and chemical elements. - -BioFSharp facilitates working with sequences in a strongly typed way and is designed to work well with F# Interactive. -It provides a variety of parsers for many biological file formats and a variety of algorithms suited for bioinformatic workflows. - -The core datamodel implements in ascending hierarchical order: - -- Chemical elements and [formulas](https://csbiology.github.io/BioFSharp/Formula.html) which are a collection of elements -- Amino Acids, Nucleotides and Modifications, which all implement the common [IBioItem interface](https://csbiology.github.io/BioFSharp/BioItem.html#Basics) -- [BioCollections](https://csbiology.github.io/BioFSharp/BioCollections.html) (BioItem,BioList,BioSeq) as representation of biological sequences - -
- -![Data model](https://i.imgur.com/LXBvhmi.png) - -
- ---- - -## Installation - -### For applications and libraries - -You can find all available package versions on [nuget](https://www.nuget.org/packages?q=BioFSharp). - - - dotnet CLI - - ```shell - dotnet add package BioFSharp --version {{fsdocs-package-version}} - ``` - - - paket CLI - - ```shell - paket add BioFSharp --version {{fsdocs-package-version}} - ``` - - - package manager - - ```shell - Install-Package BioFSharp -Version {{fsdocs-package-version}} - ``` - - Or add the package reference directly to your `.*proj` file: - - ``` - - ``` - -### For scripting and interactive notebooks -You can include the package via an inline package reference: - -``` -#r "nuget: BioFSharp, {{fsdocs-package-version}}" -``` - ---- - -## Example - -The following example shows how easy it is to start working with sequences: -*) - -open BioFSharp - -// Create a peptide sequence -let peptideSequence = "PEPTIDE" |> BioSeq.ofAminoAcidString -(***include-value:peptideSequence***) - -// Create a nucleotide sequence -let nucleotideSequence = "ATGC" |> BioSeq.ofNucleotideString -(***include-value:nucleotideSequence***) - - -(** -BioFSharp comes equipped with a broad range of features and functions to map amino acids and nucleotides. -*) -// Returns the corresponding nucleotide of the complementary strand -let antiG = Nucleotides.G |> Nucleotides.complement -(***include-value:antiG***) - -// Returns the monoisotopic mass of Arginine (minus H2O) -let arginineMass = AminoAcids.Arg |> AminoAcids.monoisoMass -(***include-value:arginineMass***) - - -(** -The various file readers in BioFSharp help to easyly retrieve information and write biology-associated file formats like for example FastA: -*) -open BioFSharp.IO - -let filepathFastaA = (__SOURCE_DIRECTORY__ + "/data/Chlamy_Cp.fastA") -//reads from file to an array of FastaItems. -let fastaItems = - FastA.fromFile BioArray.ofAminoAcidString filepathFastaA - -(** -This will return a sequence of `FastaItem`s, where you can directly start working with the individual sequences represented as a `BioArray` of amino acids. -*) - -let firstItem = fastaItems |> Seq.item 0 - -(***include-value: firstItem***) - -(** -BioFSharp.IO also provides pretty printers for BioSequences: -*) - -fsi.AddPrinter(BioFSharp.IO.FSIPrinters.prettyPrintBioCollection) - -firstItem.Sequence - -(***hide***) -let pretty = firstItem.Sequence |> BioFSharp.IO.FSIPrinters.prettyPrintBioCollection - -(***include-value: pretty***) - -(** -For more detailed examples continue to explore the BioFSharp documentation. -In the near future we will start to provide a cookbook like tutorial in the [CSBlog](https://csbiology.github.io/CSBlog/). - -## Contributing and copyright - -The project is hosted on [GitHub][gh] where you can [report issues][issues], fork -the project and submit pull requests. If you're adding a new public API, please also -consider adding [samples][docs] that can be turned into a documentation. - -The library is available under the OSI-approved MIT license. For more information see the -[License file][license] in the GitHub repository. - - [docs]: https://github.com/CSBiology/BioFSharp/tree/master/docs - [gh]: https://github.com/CSBiology/BioFSharp - [issues]: https://github.com/CSBiology/BioFSharp/issues - [license]: https://github.com/CSBiology/BioFSharp/blob/master/LICENSE -*) diff --git a/docs/index.ipynb b/docs/index.ipynb new file mode 100644 index 00000000..76bbc606 --- /dev/null +++ b/docs/index.ipynb @@ -0,0 +1,436 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "(*** hide ***)\n", + "\n", + "(*** condition: prepare ***)\n", + "#r \"nuget: Plotly.NET, 4.2.0\"\n", + "#r \"nuget: FSharpAux, 2.0.0\"\n", + "#r \"nuget: FSharpAux.IO, 2.0.0\"\n", + "#r \"nuget: FSharp.Stats, 0.4.11\"\n", + "#r \"../src/BioFSharp/bin/Release/netstandard2.0/BioFSharp.dll\"\n", + "#r \"../src/BioFSharp.IO/bin/Release/netstandard2.0/BioFSharp.IO.dll\"\n", + "#r \"../src/BioFSharp.BioContainers/bin/Release/netstandard2.0/BioFSharp.BioContainers.dll\"\n", + "#r \"../src/BioFSharp.ML/bin/Release/netstandard2.0/BioFSharp.ML.dll\"\n", + "#r \"../src/BioFSharp.Stats/bin/Release/netstandard2.0/BioFSharp.Stats.dll\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BioFSharp\n", + "\n", + "BioFSharp aims to be a user-friendly functional library for bioinformatics written in F#. It contains the basic data structures for common biological objects like amino acids and nucleotides based on chemical formulas and chemical elements.\n", + "\n", + "BioFSharp facilitates working with sequences in a strongly typed way and is designed to work well with F# Interactive.\n", + "It provides a variety of parsers for many biological file formats and a variety of algorithms suited for bioinformatic workflows.\n", + "\n", + "The core datamodel implements in ascending hierarchical order:\n", + "\n", + "- Chemical elements and [formulas](https://csbiology.github.io/BioFSharp/Formula.html) which are a collection of elements\n", + "- Amino Acids, Nucleotides and Modifications, which all implement the common [IBioItem interface](https://csbiology.github.io/BioFSharp/BioItem.html#Basics)\n", + "- [BioCollections](https://csbiology.github.io/BioFSharp/BioCollections.html) (BioItem,BioList,BioSeq) as representation of biological sequences\n", + "\n", + "
\n", + "\n", + "![Data model](https://i.imgur.com/LXBvhmi.png)\n", + "\n", + "
\n", + "\n", + "---\n", + "\n", + "## Installation\n", + "\n", + "### For applications and libraries\n", + "\n", + "You can find all available package versions on [nuget](https://www.nuget.org/packages?q=BioFSharp).\n", + "\n", + " - dotnet CLI\n", + "\n", + " ```shell\n", + " dotnet add package BioFSharp\n", + " ```\n", + "\n", + " - paket CLI\n", + "\n", + " ```shell\n", + " paket add BioFSharp\n", + " ```\n", + "\n", + " - package manager\n", + "\n", + " ```shell\n", + " Install-Package BioFSharp -Version {{fsdocs-package-version}}\n", + " ```\n", + "\n", + " Or add the package reference directly to your `.*proj` file:\n", + "\n", + " ```\n", + " \n", + " ```\n", + "\n", + "### For scripting and interactive notebooks\n", + "You can include the package via an inline package reference:\n", + "\n", + "```\n", + "#r \"nuget: BioFSharp\"\n", + "```\n", + "\n", + "---\n", + "\n", + "## Example\n", + "\n", + "The following example shows how easy it is to start working with sequences:\n", + "\n", + "Create a peptide sequence:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\r\n",
+       "         1  PEPTIDE\r\n",
+       "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "open BioFSharp\n", + "\n", + "\"PEPTIDE\" |> BioArray.ofAminoAcidString" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a nucleotide sequence:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\r\n",
+       "         1  ATGC\r\n",
+       "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\"ATGC\" |> BioArray.ofNucleotideString" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "BioFSharp comes equipped with a broad range of features and functions to map amino acids and nucleotides. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
C
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Returns the corresponding nucleotide of the complementary strand\n", + "Nucleotides.G |> Nucleotides.complement" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
156.10111102304
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Returns the monoisotopic mass of Arginine (minus H2O)\n", + "AminoAcids.Arg |> AminoAcids.monoisoMass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The various file readers in BioFSharp help to easily retrieve information and write biology-associated file formats like for example FastA:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [], + "source": [ + "open BioFSharp.IO\n", + "\n", + "let filepathFastaA = (__SOURCE_DIRECTORY__ + \"/data/Chlamy_Cp.fastA\")\n", + "//reads from file to an array of FastaItems.\n", + "\n", + "let fastaItems = FastA.fromFile BioArray.ofAminoAcidString filepathFastaA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will return a sequence of `FastaItem`s, where you can directly start working with the individual sequences represented as a `BioArray` of amino acids. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "dotnet_interactive": { + "language": "fsharp" + }, + "polyglot_notebook": { + "kernelName": "fsharp" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
{ Header = "sp|P19528| cytochrome b6/f complex subunit 4 GN=petD PE=petD.p01"\\n Sequence =\\n [|Met; Ser; Val; Thr; Lys; Lys; Pro; Asp; Leu; Ser; Asp; Pro; Val; Leu; Lys;\\n Ala; Lys; Leu; Ala; Lys; Gly; Met; Gly; His; Asn; Thr; Tyr; Gly; Glu; Pro;\\n Ala; Trp; Pro; Asn; Asp; Leu; Leu; Tyr; ...
Header
sp|P19528| cytochrome b6/f complex subunit 4 GN=petD PE=petD.p01
Sequence
\r\n",
+       "         1  MSVTKKPDLS DPVLKAKLAK GMGHNTYGEP AWPNDLLYMF PVVILGTFAC VIGLSVLDPA\r\n",
+       "        61  AMGEPANPFA TPLEILPEWY FYPVFQILRV VPNKLLGVLL MAAVPAGLIT VPFIESINKF\r\n",
+       "       121  QNPYRRPIAT ILFLLGTLVA VWLGIGSTFP IDISLTLGLF *\r\n",
+       "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fastaItems |> Seq.item 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed examples continue to explore the BioFSharp documentation.\n", + "In the near future we will start to provide a cookbook like tutorial in the [CSBlog](https://csbiology.github.io/CSBlog/).\n", + "\n", + "## Contributing and copyright\n", + "\n", + "The project is hosted on [GitHub][gh] where you can [report issues][issues], fork \n", + "the project and submit pull requests. If you're adding a new public API, please also \n", + "consider adding [samples][docs] that can be turned into a documentation.\n", + "\n", + "The library is available under the OSI-approved MIT license. For more information see the \n", + "[License file][license] in the GitHub repository. \n", + "\n", + " [docs]: https://github.com/CSBiology/BioFSharp/tree/developer/docs\n", + " [gh]: https://github.com/CSBiology/BioFSharp\n", + " [issues]: https://github.com/CSBiology/BioFSharp/issues\n", + " [license]: https://github.com/CSBiology/BioFSharp/blob/developer/LICENSE" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (C#)", + "language": "C#", + "name": ".net-csharp" + }, + "language_info": { + "name": "polyglot-notebook" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "csharp", + "items": [ + { + "aliases": [], + "name": "csharp" + }, + { + "aliases": [], + "languageName": "fsharp", + "name": "fsharp" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}