Skip to content

Commit

Permalink
Merge branch 'histdb_dev' into curated_histonedb
Browse files Browse the repository at this point in the history
  • Loading branch information
l-singh-biomsu committed Jul 9, 2024
2 parents 8a9f323 + 00b86d7 commit 7179e92
Show file tree
Hide file tree
Showing 194 changed files with 19,008 additions and 3,951 deletions.
Empty file modified CURATED_SET/TODO.md
100644 → 100755
Empty file.
Empty file modified CURATED_SET/classification.json
100644 → 100755
Empty file.
9 changes: 5 additions & 4 deletions HistoneDB/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,19 +81,20 @@ def load_settings(path=os.path.join(BASE_DIR, "HistoneDB", "database_info.txt"))
'browse',
'djangophylocore',
'django_extensions',
'mptt',
# 'mod_wsgi.server',
'analytics',
'human_hist',

)

MIDDLEWARE_CLASSES = (
MIDDLEWARE = (
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
# 'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'django.middleware.security.SecurityMiddleware',
)
Expand Down
5 changes: 2 additions & 3 deletions HistoneDB/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

urlpatterns = [
url(r'', include('browse.urls')),
url(r'^admin/', admin.site.urls),
url(r'human_hist/', include('human_hist.urls')),
]

urlpatterns += [url(r'^admin/', include(admin.site.urls))]
urlpatterns += [url(r'human_hist/', include('human_hist.urls'))]
13 changes: 13 additions & 0 deletions browse/aggregate_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from django.db import models

class GroupConcat(models.Aggregate):
function = 'GROUP_CONCAT'
template = '%(function)s(DISTINCT %(expressions)s%(separator)s)'

def __init__(self, expression, distinct=False, separator=None, **extra):
super(GroupConcat, self).__init__(
expression,
# distinct='DISTINCT ' if distinct else '',
separator= f" SEPARATOR '{separator}'" if separator else '',
output_field=models.CharField(),
**extra)
18 changes: 12 additions & 6 deletions browse/management/commands/buildfeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

from browse.models import TemplateSequence, Feature
from browse.models import Feature
from djangophylocore.models import Taxonomy

config = configparser.ConfigParser()
config.read('./histonedb.ini')

class Command(BaseCommand):
help = 'Reset sequence features'
template_sequences = config['WEB_DATA']['template_sequences']

# Logging info
logging.basicConfig(filename=os.path.join(config['LOG']['database_log'], "buildfeatures.log"),
Expand Down Expand Up @@ -56,11 +57,14 @@ def _handle(self, *args, **options):
except Taxonomy.DoesNotExist:
taxonomy = Taxonomy.objects.get(name="root")

template, created = TemplateSequence.objects.get_or_create(taxonomy=taxonomy, variant=variant)
# template, created = TemplateSequence.objects.get_or_create(taxonomy=taxonomy, variant=variant)
# if not os.path.isfile(template.path()): #we need to rewrite it!!!
if not os.path.exists(self.template_sequences):
os.makedirs(self.template_sequences)
SeqIO.write(
SeqRecord(Seq(sequence), id=str(template)),
template.path(),
SeqRecord(Seq(sequence), id=f"{variant}_{taxonomy.name}"),
# template.path(),
os.path.join(self.template_sequences, f"{variant}_{taxonomy.name}.fasta"),
"fasta"
)
used_features = {}
Expand All @@ -70,8 +74,10 @@ def _handle(self, *args, **options):
if not feature_name in [" ", "="]:
name = info["feature_info"][feature_name]["name"]
feature = Feature(
id = "{}_{}{}".format(template, name, " "+str(used_features.get(name, ""))),
template = template,
id = "{}_{}{}".format(variant, name, " "+str(used_features.get(name, ""))),
# template = template,
variant = variant,
taxonomy = taxonomy,
start = int(group[0][0]),
end = int(group[-1][0]),
name = name,
Expand Down
58 changes: 55 additions & 3 deletions browse/management/commands/buildhistoneclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@
import json, os, logging, configparser
from datetime import datetime
from bibtexparser.bparser import BibTexParser
from colour import Color
# import matplotlib as mlt #import to_hex
# from matplotlib import colors #import to_hex

colors = [
"#8dd3c7",
"#E6E600",
"#bebada",
"#fb8072",
"#80b1d3",
"#fdb462",
"#b3de69",
"#fccde5",
"#d9d9d9",
"#bc80bd",
"#ccebc5",
"#ffed6f",
"#ddc497",
]
def color_variant(hex_color, brightness_offset=1):
""" takes a color like #87c95f and produces a lighter or darker variant """
if len(hex_color) != 7:
raise Exception("Passed %s into color_variant(), needs to be in #87c95f format." % hex_color)
rgb_hex = [hex_color[x:x+2] for x in [1, 3, 5]]
new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex]
new_rgb_int = [min([255, max([0, i])]) for i in new_rgb_int] # make sure new values are between 0 and 255
# hex() produces "0x88", we want just "88"
return "#" + "".join([hex(i)[2:] for i in new_rgb_int])

config = configparser.ConfigParser()
config.read('./histonedb.ini')
Expand Down Expand Up @@ -126,14 +154,38 @@ def create_histone_classes(self):

self.create_histone_variants(variants=self.variants_json['tree'][htype], hist_type=htype)

# generate different colors for variant_groups
# c = (0, 0, 0)
# hvars = Variant.objects.filter(hist_type_id=htype)
# step = 64/len(hvars)
# # print(htype)
# # print(len(hvars))
# # print(1/len(hvars))
# # print(step)
# for hvar in hvars:
# # print(c)
# # print(Color(rgb=c).hex_l)
# hvar.color = Color(rgb=c).hex_l
# hvar.save()
# # print(hvar.color)
# if c[1]+step>1 and c[2]+step>1:
# c = (min(c[0]+step, 1), 0, 0)
# elif c[2]+step>1:
# c = (c[0], min(c[1] + step, 1), 0)
# else:
# c = (c[0], c[1], min(c[2] + step, 1))

def create_histone_variants(self, variants, hist_type, parent=None):
"""Create variants (including generics for each histone type) listed in variants_list.json"""
if variants=="null": return
for variant in variants.keys():
for i, variant in enumerate(variants.keys()):
# variant = var.replace('(', '').replace(')', '').replace('?', '').replace(' ', '_')
color = colors[i] if not parent else parent.color
# print(color)
obj = self.create_description(hclass=variant)
obj = Variant.objects.create(id=variant, hist_type_id=hist_type,
taxonomic_span=self.variants_json['info'][variant]['taxonomic_span'],
doublet=False, description=obj, parent_id=parent)
doublet=False, description=obj, parent=parent, color=color)
self.log.info("Created {} variant model in database".format(obj.id))

# self.add_publications(hclass_obj=obj)
Expand All @@ -158,4 +210,4 @@ def create_histone_variants(self, variants, hist_type, parent=None):
"ALTER TABLE browse_feature CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci;")
alt_variant.save()

self.create_histone_variants(variants=variants[variant], hist_type=hist_type, parent=variant)
self.create_histone_variants(variants=variants[variant], hist_type=hist_type, parent=obj)
86 changes: 45 additions & 41 deletions browse/management/commands/buildseedinfo.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
import os
import logging
import os, configparser, logging, json, shutil

from django.core.management.base import BaseCommand, CommandError
from django.conf import settings

from tools.L_shade_hist_aln import write_alignments
from tools.hist_ss import get_features_in_aln

from Bio import SeqIO
from Bio.Align import MultipleSeqAlignment
from browse.models import Sequence
from Bio.Align.AlignInfo import SummaryInfo

from browse.models import Sequence

config = configparser.ConfigParser()
config.read('./histonedb.ini')

class Command(BaseCommand):
help = 'Reset sequence features'
seed_directory = os.path.join(settings.STATIC_ROOT_AUX, "browse", "seeds")
seed_directory = config['WEB_DATA']['seeds']
# seed_directory = os.path.join(settings.STATIC_ROOT_AUX, "browse", "seeds_oldversion")

# Logging info
logging.basicConfig(filename='log/buildseedinfo.log',
logging.basicConfig(filename=os.path.join(config['LOG']['database_log'], "buildseedinfo.log"),
format='%(asctime)s %(name)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
Expand All @@ -33,48 +39,41 @@ def handle(self, *args, **options):
self.log.info('=======================================================')
self.log.info('=== buildseedinfo START ===')
self.log.info('=======================================================')

if options["force"]:
if os.path.exists(self.seed_directory) and os.path.isdir(self.seed_directory):
shutil.rmtree(self.seed_directory)
# shutil.copytree(os.path.join(config['DATA']['directory'], 'seeds'), self.seed_directory)
shutil.copytree(os.path.join(config['DATA']['directory'], 'draft_seeds'), self.seed_directory)

save_dir = os.path.join("tmp", "HistoneDB")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for variant, seed in self.get_seeds():
for variant, seed_variant in self.get_variants():
#PDF currently contaminates the dir with many files.
#if not os.path.exists("{}.pdf".format(seed[:-6])) or options["force"]:
#Write PDF
#write_alignments([seed], seed[:-6], save_dir=os.path.dirname(seed))

if not os.path.exists("{}.gff".format(seed[:-6])) or options["force"]:
# if variant=='cH3': variant='cH3_(Metazoa)' ## This is for training because there is no cH3 yet
if variant=='cH3': variant='cH3_Metazoa' ## This is for training because there is no cH3 yet
seed = os.path.join(self.seed_directory, seed_variant)
if not os.path.exists(f"{seed}.gff") or options["force"]:
#Write GFF
self.log.info("writing gff for {} to {}.gff".format(variant, seed[:-6]))
# print "writing gff"
with open("{}.gff".format(seed[:-6]), "w") as gff:
# print " ", variant
# print "{}.gff".format(seed[:-6])
msa = MultipleSeqAlignment(list(SeqIO.parse(seed, "fasta")))
self.log.info(f"writing gff for {variant} to {seed}.gff")
with open(f"{seed}.gff", "w") as gff:
if not os.path.exists(f"{seed}.fasta"): continue ## This is for training because there is no some sequences yet
seqs = list(SeqIO.parse(f'{seed}.fasta', "fasta"))
if len(seqs) < 1: continue ## This is for training because there is no some sequences yet
msa = MultipleSeqAlignment(seqs)
self.log.info("Making features for variant: %s", variant)
print(get_features_in_aln(msa, variant, save_dir=os.path.dirname(seed)), file=gff)
print(get_features_in_aln(msa, variant, save_dir=self.seed_directory), file=gff)

#Set reviewed to True
not_found = {}
for num_seq, s in enumerate(SeqIO.parse(seed, "fasta")):
fields = s.id.split("|")
for num_seq, s in enumerate(SeqIO.parse(f'{seed}.fasta', "fasta")):
fields = s.description.split()
id = fields[1]
# #Historically type seed gi was first index, but now we are changing it to last for better view
# #In seed alignmnets of variants fist argument is taxonomy name, second gi.
# #so we just try everything
# id = int(fields[0])
# continue
# except ValueError:
# try:
# #Variant seed is second index
# id = int(fields[1])
# except ValueError:
# try:
# id=int(fields[2])
# except ValueError:
# try:
# not_found[seed[:-6]].append(s.id)
# except KeyError:
# not_found[seed[:-6]] = [s.id]
try:
s = Sequence.objects.get(id=str(id), variant__id=variant)
s.reviewed = True
Expand All @@ -90,11 +89,16 @@ def handle(self, *args, **options):
self.log.info('=== buildseedinfo SUCCESSFULLY finished ===')
self.log.info('=======================================================')

def get_seeds(self):
for i, (root, _, files) in enumerate(os.walk(self.seed_directory)):
for seed in files:
if not seed.endswith(".fasta"): continue
variant = os.path.basename(seed)[:-6]
if i == 0:
variant = "canonical_{}".format(variant) if variant != "H1" else "generic_{}".format(variant)
yield variant, os.path.join(root, seed)
def get_variants(self, dl=None, hist_type=True):
if not dl:
with open(config['DATA']['variants'], encoding='utf-8') as f:
dl = json.load(f)['tree']
if isinstance(dl, dict):
if hist_type:
variants = [(f'generic_{hist}', hist) if hist=='H1' else (f'c{hist}', hist) for hist in dl.keys()]
else:
variants = [(variant, variant) for variant in dl.keys()]
for dv in dl.values():
variants += self.get_variants(dl=dv, hist_type=False)
return variants
return []
13 changes: 8 additions & 5 deletions browse/management/commands/buildsunburst.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
from django.core.management.base import BaseCommand, CommandError
from browse.models import *
from djangophylocore.models import Rank
import os

import os, configparser, json, logging
from itertools import chain
import pprint as pp
import json
from colour import Color
from django.db.models import Max, Min, Count, Avg
from math import floor
import logging

config = configparser.ConfigParser()
config.read('./histonedb.ini')

class Command(BaseCommand):
help = 'Build the sunburst json files for each core histone and its variants'

# Logging info
logging.basicConfig(filename='log/buildsunburst.log',
logging.basicConfig(filename=os.path.join(config['LOG']['database_log'], "buildsunburst.log"),
format='%(asctime)s %(name)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
Expand All @@ -37,7 +39,8 @@ def handle(self, *args, **options):
self.log.info('=======================================================')
self.log.info('=== buildsunburst START ===')
self.log.info('=======================================================')
path = os.path.join("static", "browse", "sunbursts")
path = config['WEB_DATA']['sunbursts']
# path = os.path.join("static", "browse", "sunbursts")
if options["all_taxonomy"]:
sb = self.build_sunburst(all_taxonomy=True)
with open(os.path.join(path, "all_taxa.json"), "w") as all_taxa:
Expand Down
Loading

0 comments on commit 7179e92

Please sign in to comment.