diff --git a/LIMS2DB/__init__.pyc b/LIMS2DB/__init__.pyc deleted file mode 100644 index 24134ef..0000000 Binary files a/LIMS2DB/__init__.pyc and /dev/null differ diff --git a/LIMS2DB/helpers.pyc b/LIMS2DB/helpers.pyc deleted file mode 100644 index 0c72499..0000000 Binary files a/LIMS2DB/helpers.pyc and /dev/null differ diff --git a/LIMS2DB/lims_utils.pyc b/LIMS2DB/lims_utils.pyc deleted file mode 100644 index 636af29..0000000 Binary files a/LIMS2DB/lims_utils.pyc and /dev/null differ diff --git a/LIMS2DB/load_status_from_google_docs.pyc b/LIMS2DB/load_status_from_google_docs.pyc deleted file mode 100644 index 3e784d8..0000000 Binary files a/LIMS2DB/load_status_from_google_docs.pyc and /dev/null differ diff --git a/LIMS2DB/objectsDB/__init__.pyc b/LIMS2DB/objectsDB/__init__.pyc deleted file mode 100644 index d95137c..0000000 Binary files a/LIMS2DB/objectsDB/__init__.pyc and /dev/null differ diff --git a/LIMS2DB/helpers.py b/LIMS2DB/objectsDB/helpers.py similarity index 100% rename from LIMS2DB/helpers.py rename to LIMS2DB/objectsDB/helpers.py diff --git a/LIMS2DB/lims_utils.py b/LIMS2DB/objectsDB/lims_utils.py similarity index 100% rename from LIMS2DB/lims_utils.py rename to LIMS2DB/objectsDB/lims_utils.py diff --git a/LIMS2DB/load_status_from_google_docs.py b/LIMS2DB/objectsDB/load_status_from_google_docs.py similarity index 100% rename from LIMS2DB/load_status_from_google_docs.py rename to LIMS2DB/objectsDB/load_status_from_google_docs.py diff --git a/LIMS2DB/objectsDB/objectsDB.pyc b/LIMS2DB/objectsDB/objectsDB.pyc deleted file mode 100644 index a008359..0000000 Binary files a/LIMS2DB/objectsDB/objectsDB.pyc and /dev/null differ diff --git a/LIMS2DB/objectsDB/process_categories.pyc b/LIMS2DB/objectsDB/process_categories.pyc deleted file mode 100644 index 60253c3..0000000 Binary files a/LIMS2DB/objectsDB/process_categories.pyc and /dev/null differ diff --git a/LIMS2DB/objectsDB/process_categories.rst b/LIMS2DB/objectsDB/process_categories.rst deleted file mode 100644 index 7460bed..0000000 --- a/LIMS2DB/objectsDB/process_categories.rst +++ /dev/null @@ -1,263 +0,0 @@ - -What is a Process Category? -============================ - -In the project-statusdb context, lims processes are categorised into groups that define, or are used to define a certain type of status-db key in a project database. The categories are specified here. When a new work flow is initialised in lims, the different categories needs to be updated to contain any aditional steps that has not already been included from some other workfrow. If a work flow does not fit with the categories one might have to change the category definitions or ad new categories. This needs to be done in corperation with the developer of project_summary_uppload_LIMS.py. - -Adding a work flow. -========================== - -... - -SEQSTART -================== -These processes are used to set the sequencing_start_date - - -=== ======================================= -ID process Name -=== ======================================= -26 Denature, Dilute and Load Sample (MiSeq) 4.0 -23 Cluster Generation (Illumina SBS) 4.0 -=== ======================================= - - -LIBVALFINISHEDLIB -================== - - - -=== ======================================= -ID process Name -=== ======================================= -24 Customer Gel QC -20 CaliperGX QC (DNA) -17 Bioanalyzer QC (Library Validation) 4.0 -62 qPCR QC (Library Validation) 4.0 -64 Quant-iT QC (Library Validation) 4.0 -67 Qubit QC (Library Validation) 4.0 -=== ======================================= - - -PREPREPSTART -================== -Process/processes that can be defined as a start of the library preparation protocol. If the work flow involves two library preparation protocols, such as for exome captue, only the steps of the first protocol should be given here. - - -=== ======================================= -ID process Name -=== ======================================= -304 Ligate 3' adapters (TruSeq small RNA) 1.0 -=== ======================================= - - -INITALQCFINISHEDLIB -================== -All processes included in the initial qc protocol for finished libraries, except the aggregation step. - - -=== ======================================= -ID process Name -=== ======================================= -24 Customer Gel QC -20 CaliperGX QC (DNA) -17 Bioanalyzer QC (Library Validation) 4.0 -62 qPCR QC (Library Validation) 4.0 -64 Quant-iT QC (Library Validation) 4.0 -67 Qubit QC (Library Validation) 4.0 -=== ======================================= - - -AGRINITQC -================== -Aggregation step of the initial qc protocol - - -=== ======================================= -ID process Name -=== ======================================= -9 Aggregate QC (RNA) 4.0 -7 Aggregate QC (DNA) 4.0 -=== ======================================= - - -POOLING -================== -To identify the reagent labels (indexes) of each sample. If multiple pooling steps, the first pooling step after indexing should be specified - - -=== ======================================= -ID process Name -=== ======================================= -308 Library Pooling (TruSeq Small RNA) 1.0 -58 Pooling For Multiplexed Sequencing (SS XT) 4.0 -255 Library Pooling (Finished Libraries) 4.0 -44 Library Pooling (TruSeq Amplicon) 4.0 -45 Library Pooling (TruSeq Exome) 4.0 -42 Library Pooling (Illumina SBS) 4.0 -43 Library Pooling (MiSeq) 4.0 -404 Pre-Pooling (Illumina SBS) 4.0 -508 Applications Pre-Pooling -506 Pre-Pooling (MiSeq) 4.0 -=== ======================================= - - -CALIPER -================== - - - -=== ======================================= -ID process Name -=== ======================================= -116 CaliperGX QC (RNA) -20 CaliperGX QC (DNA) -=== ======================================= - - -WORKSET -================== -To identify the work sets on which the samples has been run. The process used to define a workset for the protocol. - - -=== ======================================= -ID process Name -=== ======================================= -204 Setup Workset/Plate -=== ======================================= - - -PREPEND -================== -Process that can be defined as a end of the library preparation. If more than one library preparation protocol is included in the work flow, only the prep end step of the second protocol should be given here. Used to set the prep finished date. - - -=== ======================================= -ID process Name -=== ======================================= -157 Applications Finish Prep -311 Sample Placement (Size Selection) -456 Purification (ThruPlex) -406 End repair, size selection, A-tailing and adapter ligation (TruSeq PCR-free DNA) 4.0 -109 CA Purification -111 Amplify Captured Libraries to Add Index Tags (SS XT) 4.0 -=== ======================================= - - -DILSTART -================== -These processes are used to set the dilution_and_pooling_start_date - - -=== ======================================= -ID process Name -=== ======================================= -39 Library Normalization (Illumina SBS) 4.0 -40 Library Normalization (MiSeq) 4.0 -=== ======================================= - - -INITALQC -================== -All processes included in the initial qc protocol, except the aggrigation step. - - -=== ======================================= -ID process Name -=== ======================================= -63 Quant-iT QC (DNA) 4.0 -65 Quant-iT QC (RNA) 4.0 -66 Qubit QC (DNA) 4.0 -68 Qubit QC (RNA) 4.0 -24 Customer Gel QC -20 CaliperGX QC (DNA) -16 Bioanalyzer QC (DNA) 4.0 -18 Bioanalyzer QC (RNA) 4.0 -116 CaliperGX QC (RNA) -504 Volume Measurement QC -=== ======================================= - - -SUMMARY -================== - - - -=== ======================================= -ID process Name -=== ======================================= -356 Project Summary 1.3 -=== ======================================= - - -LIBVAL -================== -All processes included in the library validation protocol, except the aggregation step. If the work flow involves two library preparation protocols, such as for exome capture, only the steps of the second protocol should be given here. - - -=== ======================================= -ID process Name -=== ======================================= -20 CaliperGX QC (DNA) -17 Bioanalyzer QC (Library Validation) 4.0 -62 qPCR QC (Library Validation) 4.0 -64 Quant-iT QC (Library Validation) 4.0 -67 Qubit QC (Library Validation) 4.0 -=== ======================================= - - -SEQUENCING -================== -Sequencing - - -=== ======================================= -ID process Name -=== ======================================= -46 MiSeq Run (MiSeq) 4.0 -38 Illumina Sequencing (Illumina SBS) 4.0 -=== ======================================= - - -DEMULTIPLEX -================== - - - -=== ======================================= -ID process Name -=== ======================================= -13 Bcl Conversion & Demultiplexing (Illumina SBS) 4.0 -=== ======================================= - - -PREPSTART -================== -Process/processes that can be defined as a start of the library preparation protocol. The first one of these that are run in lims is used to set the prep start date. If the work flow involves two library preparation protocols, such as for exome capture, the prep start step of the second protocol should be given here. - - -=== ======================================= -ID process Name -=== ======================================= -407 Fragment DNA (ThruPlex) -10 Aliquot Libraries for Hybridization (SS XT) -117 Applications Generic Process -33 Fragment DNA (TruSeq DNA) 4.0 -47 mRNA Purification, Fragmentation & cDNA synthesis (TruSeq RNA) 4.0 -308 Library Pooling (TruSeq Small RNA) 1.0 -405 RiboZero depletion -=== ======================================= - - -AGRLIBVAL -================== -The aggregation step of the library validation protocol - - -=== ======================================= -ID process Name -=== ======================================= -8 Aggregate QC (Library Validation) 4.0 -=== ======================================= - - diff --git a/LIMS2DB/project_summary_upload_LIMS.py b/LIMS2DB/project_summary_upload_LIMS.py deleted file mode 100755 index f8a7fea..0000000 --- a/LIMS2DB/project_summary_upload_LIMS.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -"""Script to load project info from Lims into the project database in statusdb. - -Maya Brandi, Science for Life Laboratory, Stockholm, Sweden. -""" - -import sys -import os -import codecs -from optparse import OptionParser -import load_status_from_google_docs -from scilifelab.db.statusDB_utils import * -from helpers import * -from pprint import pprint -from genologics.lims import * -from genologics.config import BASEURI, USERNAME, PASSWORD -import objectsDB as DB -from datetime import date -import time -import scilifelab.log -import threading -import Queue -lims = Lims(BASEURI, USERNAME, PASSWORD) -LOG = scilifelab.log.minimal_logger('LOG') -projectsQueue=Queue.Queue() - -class PSUL(): - def __init__(self, proj, samp_db, proj_db, upload_data, days, man_name, output_f): - self.proj = proj - self.id = proj.id - self.udfs = proj.udf - self.name = proj.name - self.open_date = proj.open_date - self.close_date = proj.close_date - self.samp_db = samp_db - self.proj_db = proj_db - self.upload_data = upload_data - self.man_name = man_name - self.days = days - self.output_f = output_f - self.ordered_opened = None - - def print_couchdb_obj_to_file(self, obj): - if self.output_f is not None: - with open(self.output_f, 'w') as f: - print(obj, file = f) - else: - print(obj, file = sys.stdout) - - def get_ordered_opened(self): - """Is project registered as opened or ordered?""" - - if self.open_date: - self.ordered_opened = self.open_date - elif 'Order received' in dict(self.udfs.items()).keys(): - self.ordered_opened = self.udfs['Order received'].isoformat() - else: - LOG.info("Project is not updated because 'Order received' date and " - "'open date' is missing for project {name}".format( - name = self.name)) - - def get_days_closed(self): - """Project registered as closed?""" - - if self.close_date: - closed = date(*map(int, self.close_date.split('-'))) - return (date.today() - closed).days - else: - return 0 - - def determine_update(self): - """Determine wether to and how to update project""" - days_closed = self.get_days_closed() - opended_after_130630 = comp_dates('2013-06-30', self.ordered_opened) - closed_for_a_while = (days_closed > self.days) - log_info = '' - if (not opended_after_130630) or closed_for_a_while: - if self.man_name: ## Ask wether to update - start_update = raw_input(""" - Project {name} was ordered or opended at {ord_op} and has been - closed for {days} days. Do you still want to load the data from - lims into statusdb? - Press enter for No, any other key for Yes! """.format( - name = self.name, ord_op = self.ordered_opened, days = days_closed)) - else: ## Do not update - start_update = False - log_info = ('Project is not updated because: ') - if closed_for_a_while: - log_info += ('It has been closed for {days} days. '.format( - days = days_closed)) - if not opended_after_130630: - log_info += ('It was opened or ordered before 2013-06-30 ' - '({ord_op})'.format(ord_op = self.ordered_opened)) - else: - start_update = True - - if start_update: - log_info = self.update_project(DB) - return log_info - - def update_project(self, database): - """Fetch project info and update project in the database.""" - opended_after_140630 = comp_dates('2014-06-30', self.ordered_opened) - try: - LOG.info('Handeling {proj}'.format(proj = self.name)) - project = database.ProjectDB(lims, self.id, self.samp_db) - key = find_proj_from_view(self.proj_db, self.name) - project.obj['_id'] = find_or_make_key(key) - if not opended_after_140630: - project.obj = load_status_from_google_docs.get(self.name, project.obj) - if self.upload_data: - info = save_couchdb_obj(self.proj_db, project.obj) - else: - info = self.print_couchdb_obj_to_file(project.obj) - return "project {name} is handled and {info}: _id = {id}".format( - name=self.name, info=info, id=project.obj['_id']) - except: - return ('Issues geting info for {name}. The "Application" udf might' - ' be missing'.format(name = self.name)) - - def project_update_and_logging(self): - start_time = time.time() - self.get_ordered_opened() - if self.ordered_opened: - log_info = self.determine_update() - else: - log_info = ('No open date or order date found for project {name}. ' - 'Project not updated.'.format(name = self.name)) - elapsed = time.time() - start_time - LOG.info('Time - {elapsed} : Proj Name - ' - '{name}'.format(elapsed = elapsed, name = self.name)) - LOG.info(log_info) - -def main(options): - man_name=options.project_name - all_projects=options.all_projects - days=options.days - conf=options.conf - upload_data=options.upload - output_f = options.output_f - couch = load_couch_server(conf) - proj_db = couch['projects'] - samp_db = couch['samples'] - - if all_projects: - projects = lims.get_projects() - masterThread(options,projects) - elif man_name: - proj = lims.get_projects(name = man_name) - if not proj: - LOG.warning('No project named {man_name} in Lims'.format( - man_name = man_name)) - else: - P = PSUL(proj[0], samp_db, proj_db, upload_data, days, man_name, output_f) - P.project_update_and_logging() - -class ThreadPSUL(threading.Thread): - def __init__(self, options,queue): - threading.Thread.__init__(self) - self.options=options - self.queue = queue - couch = load_couch_server(options.conf) - self.proj_db = couch['projects'] - self.samp_db = couch['samples'] - def run(self): - while True: - #grabs project from queue - proj = self.queue.get(block=True, timeout=2) - P = PSUL(proj, self.samp_db, self.proj_db, self.options.upload, self.options.days, self.options.project_name, self.options.output_f) - P.project_update_and_logging() - #signals to queue job is done - self.queue.task_done() - if self.queue.empty() : - break - - -def masterThread(options,projectList): -#spawn a pool of threads, and pass them queue instance - for i in range(options.threads): - t = ThreadPSUL(options,projectsQueue) - t.start() -#populate queue with data - for proj in projectList: - projectsQueue.put(proj) - -#wait on the queue until everything has been processed - projectsQueue.join() - - -if __name__ == '__main__': - usage = "Usage: python project_summary_upload_LIMS.py [options]" - parser = OptionParser(usage=usage) - parser.add_option("-p", "--project", dest = "project_name", default = None, - help = "eg: M.Uhlen_13_01. Dont use with -a flagg.") - parser.add_option("-a", "--all_projects", dest = "all_projects", action = - "store_true", default = False, help = ("Upload all Lims ", - "projects into couchDB. Don't use with -f flagg.")) - parser.add_option("-d", "--days", dest = "days", default = 60, help = ( - "Projects with a close_date older than DAYS days are not", - " updated. Default is 60 days. Use with -a flagg")) - parser.add_option("-c", "--conf", dest = "conf", default = os.path.join( - os.environ['HOME'],'opt/config/post_process.yaml'), help = - "Config file. Default: ~/opt/config/post_process.yaml") - parser.add_option("--no_upload", dest = "upload", default = True, action = - "store_false", help = ("Use this tag if project objects ", - "should not be uploaded, but printed to output_f, or to ", - "stdout")) - parser.add_option("--output_f", dest = "output_f", help = ("Output file", - " that will be used only if --no_upload tag is used"), default=None) - parser.add_option("-t", "--threads", type='int', dest = "threads", default = 4, - help = "How many threads will be spawned. Will only work with -a") - - (options, args) = parser.parse_args() - LOG = scilifelab.log.file_logger('LOG', options.conf, 'lims2db_projects.log' - ,'log_dir_tools') - - main(options) - diff --git a/LIMS2DB/project_summary_upload_LIMS.pyc b/LIMS2DB/project_summary_upload_LIMS.pyc deleted file mode 100644 index d9ce9eb..0000000 Binary files a/LIMS2DB/project_summary_upload_LIMS.pyc and /dev/null differ diff --git a/helpers.py b/helpers.py deleted file mode 100644 index 2b1c411..0000000 --- a/helpers.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python - -from datetime import date - -def comp_dates(a, b): - """Dates in isoformat. Is a < b?""" - a = date(*map(int, a.split('-') )) - b = date(*map(int, b.split('-') )) - delta = a - b - if delta.days < 0: - return True - else: - return False - -def delete_Nones(dict): - "Deletes None type items from dict." - new_dict = {} - if dict: - for key, val in dict.items(): - if val: - if not val=='null': - if not (val=='2000-10-10' or val=='3000-10-10'): - new_dict[key] = val - if new_dict != {}: - return new_dict - else: - return None diff --git a/lims_utils.py b/lims_utils.py deleted file mode 100644 index 465c30e..0000000 --- a/lims_utils.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python - -"""A module with lims help functions. - -Maya Brandi, Science for Life Laboratory, Stockholm, Sweden. -""" -from genologics.lims import * -from genologics.config import BASEURI, USERNAME, PASSWORD -lims = Lims(BASEURI, USERNAME, PASSWORD) - -"""process category dictionaries - -In the lims_utils context, processes are categorised into groups that define, -or are used to define a certain type of statusdb key. The categories and their -processes are defined here:""" 'hh' - -INITALQCFINISHEDLIB = {'24' : 'Customer Gel QC', - '62' : 'qPCR QC (Library Validation) 4.0', - '64' : 'Quant-iT QC (Library Validation) 4.0', - '67' : 'Qubit QC (Library Validation) 4.0', - '20' : 'CaliperGX QC (DNA)', - '17' : 'Bioanalyzer QC (Library Validation) 4.0'} -INITALQC ={'63' : 'Quant-iT QC (DNA) 4.0', - '65' : 'Quant-iT QC (RNA) 4.0', - '66' : 'Qubit QC (DNA) 4.0', - '68' : 'Qubit QC (RNA) 4.0', - '24' : 'Customer Gel QC', - '20' : 'CaliperGX QC (DNA)', - '16' : 'Bioanalyzer QC (DNA) 4.0', - '18' : 'Bioanalyzer QC (RNA) 4.0', - '116' : 'CaliperGX QC (RNA)', - '504' : 'Volume Measurement QC'} -AGRINITQC = {'7' : 'Aggregate QC (DNA) 4.0', - '9' : 'Aggregate QC (RNA) 4.0'} -PREPREPSTART = {'74': 'Shear DNA (SS XT) 4.0', - '304' : "Ligate 3' adapters (TruSeq small RNA) 1.0"} -POOLING = {'42': "Library Pooling (Illumina SBS) 4.0", - '43': "Library Pooling (MiSeq) 4.0", - '44': "Library Pooling (TruSeq Amplicon) 4.0", - '45': "Library Pooling (TruSeq Exome) 4.0", - '58': "Pooling For Multiplexed Sequencing (SS XT) 4.0", - '255': "Library Pooling (Finished Libraries) 4.0", - '308': "Library Pooling (TruSeq Small RNA) 1.0", - '404': "Pre-Pooling (Illumina SBS) 4.0", - '506': "Pre-Pooling (MiSeq) 4.0", - '508': "Applications Pre-Pooling"} -PREPSTART = {'10' : 'Aliquot Libraries for Hybridization (SS XT)', - '47' : 'mRNA Purification, Fragmentation & cDNA synthesis (TruSeq RNA) 4.0', - '33' : 'Fragment DNA (TruSeq DNA) 4.0', - '407' : 'Fragment DNA (ThruPlex)', - '308': 'Library Pooling (TruSeq Small RNA) 1.0', - '117' : 'Applications Generic Process', - '405' : 'RiboZero depletion'} -PREPEND = {'157': 'Applications Finish Prep', - '109' : 'CA Purification', - '456' : 'Purification (ThruPlex)', - '111' : 'Amplify Captured Libraries to Add Index Tags (SS XT) 4.0', - '406' : 'End repair, size selection, A-tailing and adapter ligation (TruSeq PCR-free DNA) 4.0', - '311': 'Sample Placement (Size Selection)'} -LIBVAL = {'62' : 'qPCR QC (Library Validation) 4.0', - '64' : 'Quant-iT QC (Library Validation) 4.0', - '67' : 'Qubit QC (Library Validation) 4.0', - '20' : 'CaliperGX QC (DNA)', - '17' : 'Bioanalyzer QC (Library Validation) 4.0'} -LIBVALFINISHEDLIB = {'62' : 'qPCR QC (Library Validation) 4.0', - '64' : 'Quant-iT QC (Library Validation) 4.0', - '67' : 'Qubit QC (Library Validation) 4.0', - '20' : 'CaliperGX QC (DNA)', - '17' : 'Bioanalyzer QC (Library Validation) 4.0', - '24' : 'Customer Gel QC'} -AGRLIBVAL = {'8': 'Aggregate QC (Library Validation) 4.0'} -SEQSTART = {'23':'Cluster Generation (Illumina SBS) 4.0', - '26':'Denature, Dilute and Load Sample (MiSeq) 4.0'} - -DILSTART = {'40' : 'Library Normalization (MiSeq) 4.0', - '39' : 'Library Normalization (Illumina SBS) 4.0'} -SEQUENCING = {'38' : 'Illumina Sequencing (Illumina SBS) 4.0', - '46' : 'MiSeq Run (MiSeq) 4.0'} -WORKSET = {'204' : 'Setup Workset/Plate'} -SUMMARY = {'356' : 'Project Summary 1.3'} -DEMULTIPLEX={'13' : 'Bcl Conversion & Demultiplexing (Illumina SBS) 4.0'} - -FINLIB = ['Finished library', 'Amplicon'] -PROJ_UDF_EXCEPTIONS = ['customer_reference','uppnex_id','reference_genome','application'] -SAMP_UDF_EXCEPTIONS = ['customer_name','reads_requested_(millions)','min_reads', - 'm_reads','dup_rm','status_auto','status_manual','average_size_bp','incoming_qc_status'] - -CALIPER={'20' : 'CaliperGX QC (DNA)','116' : 'CaliperGX QC (RNA)'} - - -def procHistory(proc, samplename): - """Quick wat to get the ids of parent processes from the given process, - while staying in a sample scope""" - hist=[] - artifacts = lims.get_artifacts(sample_name = samplename, type = 'Analyte') - not_done=True - starting_art=proc.input_per_sample(samplename)[0].id - while not_done: - not_done=False - for o in artifacts: - if o.id == starting_art: - if o.parent_process is None: - #flow control : if there is no parent process, we can stop iterating, we're done. - not_done=False - break #breaks the for artifacts, we are done anyway. - else: - not_done=True #keep the loop running - hist.append(o.parent_process.id) - for i in o.parent_process.all_inputs(): - if i in artifacts: - # while increment - starting_art=i.id - - break #break the for allinputs, if we found the right one - break # breaks the for artifacts if we matched the current one - return hist - -def get_run_qcs(fc, lanesobj): - for art in fc.all_inputs(): - lane=art.location[1][0] - if lane not in lanesobj: - #should never happen if pm works - lanesobj[lane]={} - lanesobj[lane]['seq_qc_flag']=art.qc_flag - dem=lims.get_processes(type=DEMULTIPLEX.values(), inputartifactlimsid=art.id) - try: - for outart in dem[0].all_outputs(): - if "FASTQ reads" not in outart.name: - continue - else: - for outsample in outart.samples: - #this should be only one - lanesobj[lane][outsample.name]={} - lanesobj[lane][outsample.name]['dem_qc_flag']=outart.qc_flag - - except IndexError: - #No demutiplexing found. this is fine. - pass - - - -def get_sequencing_info(fc): - """Input: a process object 'fc', of type 'Illumina Sequencing (Illumina SBS) 4.0', - Output: A dictionary where keys are lanes 1,2,...,8, and values are lane artifact udfs""" - fc_summary={} - for iom in fc.input_output_maps: - art = Artifact(lims,id = iom[0]['limsid']) - lane = art.location[1].split(':')[0] - if not fc_summary.has_key(lane): - fc_summary[lane]= dict(art.udf.items()) #"%.2f" % val ----round?? - fc_summary[lane]['qc'] = art.qc_flag - return fc_summary - -def make_sample_artifact_maps(sample_name): - """outin: connects each out_art for a specific sample to its - corresponding in_art and process. one-one relation""" - outin = {} - artifacts = lims.get_artifacts(sample_name = sample_name, type = 'Analyte') - for outart in artifacts: - try: - pro = outart.parent_process - inarts = outart.input_artifact_list() - for inart in inarts: - for samp in inart.samples: - if samp.name == sample_name: - outin[outart.id] = (pro, inart.id) - except: - pass - return outin diff --git a/load_status_from_google_docs.py b/load_status_from_google_docs.py deleted file mode 100644 index 1d8df21..0000000 --- a/load_status_from_google_docs.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python -import sys -import os -import time -from datetime import datetime -from uuid import uuid4 -import hashlib -from optparse import OptionParser -import logging -import bcbio.pipeline.config_utils as cl -from scilifelab.google.google_docs import SpreadSheet -from scilifelab.google import get_credentials -import couchdb - - -# GOOGLE DOCS -def get_google_document(ssheet_title, wsheet_title, client): - ssheet = client.get_spreadsheet(ssheet_title) - wsheet = client.get_worksheet(wsheet_title) - content = client.get_cell_content(wsheet) - ss_key = client.get_key(ssheet) - ws_key = client.get_key(wsheet) - return content, ws_key, ss_key - -def get_column(ssheet_content, header, col_cond=0): - colindex='' - for j, row in enumerate(ssheet_content): - if colindex == '': - for i, col in enumerate(row): - if col_cond <= i and colindex == '': - if str(col).strip().replace('\n','').replace(' ','') == header.replace(' ',''): - colindex = i - else: - rowindex = j-1 - return rowindex, colindex - -# NAME STRIP -def strip_index(name): - indexes = ['_nxdual','_index','_rpi','_agilent','_mondrian','_haloht', - '_halo','_sureselect','_dual','_hht','_ss','_i','_r','_a','_m','_h'] - name = name.replace('-', '_').replace(' ', '') - for i in indexes: - name=name.split(i)[0] - preps='FBCDEF' - for prep in preps: - name=name.rstrip(prep) - return name - -def get_20158_info(credentials, project_name_swe): - versions = {"01": ['Sample name Scilife', "Total reads per sample", "Sheet1","Passed=P/ not passed=NP*"], - "02": ["Sample name (SciLifeLab)", "Total number of reads (Millions)","Sheet1", - "Based on total number of reads after mapping and duplicate removal"], - "03": ["Sample name (SciLifeLab)", "Total number of reads (Millions)","Sheet1", - "Based on total number of reads after mapping and duplicate removal "], - "05": ["Sample name (from Project read counts)", "Total number","Sheet1", - "Based on total number of reads","Based on total number of reads after mapping and duplicate removal"], - "06": ["Sample name (from Project read counts)", "Total number","Sheet1", - "Based on total number of reads","Based on total number of reads after mapping and duplicate removal"]} - info = {} - client = SpreadSheet(credentials) - feed = client.get_spreadsheets_feed(project_name_swe + '_20158', False) - if len(feed.entry) != 0: - ssheet = feed.entry[0].title.text - version = ssheet.split(str('_20158_'))[1].split(' ')[0].split('_')[0] - client = SpreadSheet(credentials, ssheet) - content, ws_key, ss_key = get_google_document(ssheet, versions[version][2], client) - dummy, P_NP_colindex = get_column(content, versions[version][3]) - dummy, No_reads_sequenced_colindex = get_column(content, versions[version][1]) - row_ind, scilife_names_colindex = get_column(content, versions[version][0]) - if (version=="05")| (version=="06"): - dummy, P_NP_duprem_colindex = get_column(content, versions[version][4]) ## [version][4] for dup rem - else: - P_NP_duprem_colindex='' - for j, row in enumerate(content): - if (j > row_ind): - try: - sci_name = str(row[scilife_names_colindex]).strip() - striped_name = strip_index(sci_name) - no_reads = str(row[No_reads_sequenced_colindex]).strip() - if (P_NP_duprem_colindex!='') and (str(row[P_NP_duprem_colindex]).strip()!=''): - status = str(row[P_NP_duprem_colindex]).strip() - else: - status = str(row[P_NP_colindex]).strip() - info[striped_name] = [status,no_reads] - except: - pass - else: - info=None - return info - -def get(project_ID, proj): - CREDENTIALS_FILE = os.path.join(os.environ['HOME'], 'opt/config/gdocs_credentials') - credentials = get_credentials(CREDENTIALS_FILE) - info = get_20158_info(credentials, project_ID) - if info: - if proj.has_key('samples'): - for sample in proj['samples']: - if sample in info.keys(): - proj['samples'][sample]['status'] = info[sample][0] - proj['samples'][sample]['m_reads_sequenced'] = info[sample][1] - if not proj['samples'][sample].has_key('details'): - proj['samples'][sample]['details'] = { - 'status_(manual)' : info[sample][0], - 'total_reads_(m)' : info[sample][1]} - else: - if not proj['samples'][sample]['details'].has_key('status_(manual)'): - proj['samples'][sample]['details']['status_(manual)'] = info[sample][0] - if not proj['samples'][sample]['details'].has_key('total_reads_(m)'): - proj['samples'][sample]['details']['total_reads_(m)'] = info[sample][1] - return proj - - diff --git a/objectsDB.py b/objectsDB.py deleted file mode 100644 index 980b7b2..0000000 --- a/objectsDB.py +++ /dev/null @@ -1,833 +0,0 @@ -#!/usr/bin/env python - -"""A module for building up the project objects that build up the project database on -statusdb with lims as the main source of information. - -Maya Brandi, Science for Life Laboratory, Stockholm, Sweden. -""" -#import codecs -#from genologics.lims import * -#import genologics.entities as gent -#from lims_utils import * -#from scilifelab.db.statusDB_utils import * -#from helpers import * -#import os -#import couchdb -#import bcbio.pipeline.config_utils as cl -#import time -#from datetime import date -#import logging - -### Functions ### - -def udf_dict(element, exeptions = [], exclude = True): - """Takes a lims element and tertuns a dictionary of its udfs, where the udf - names are trensformed to statusdb keys (underscore and lowercase). - - exeptions and exclude = False - will return a dict with only the exeptions - exeptions and exclude = True - will return a dict without the exeptions - - Arguments: - element lims element (Sample, Artifact, Process, Project...) - exeptions list of exception udf keys (underscore and lowercase) - exlude (True/False)""" - - udf_dict = {} - for key, val in element.udf.items(): - key = key.replace(' ', '_').lower().replace('.','') - try: val = val.isoformat() - except: pass - if key in exeptions and not exclude: - udf_dict[key] = val - elif key not in exeptions and exclude: - udf_dict[key] = val - return udf_dict - -def get_last_first(process_list, last=True): - """""" - returned_process=None - for pro in process_list: - if (not returned_process) \ - or (pro.get('date')>returned_process.get('date') and last) \ - or (pro.get('date') 0: - self.obj['project_summary'] = udf_dict(project_summary[0]) - if len(project_summary) > 1: - print 'Warning. project summary process run more than once' - - def _get_sequencing_finished(self): - """ - :project/[KEY]: - =================== ============ =========== ================ - KEY lims_element lims_field description - =================== ============ =========== ================ - sequencing_finished Source - =================== ============ =========== ================ - Finish Date = last seq date if proj closed. Will be removed and - feched from lims.""" - seq_fin = [] - if self.project.close_date and 'samples' in self.obj.keys(): - for samp in self.obj['samples'].values(): - if 'library_prep' in samp.keys(): - for prep in samp['library_prep'].values(): - if 'sample_run_metrics' in prep.keys(): - for run in prep['sample_run_metrics'].values(): - if 'sequencing_finish_date' in run.keys(): - seq_fin.append(run['sequencing_finish_date']) - if seq_fin: - self.obj['sequencing_finished'] = max(seq_fin) - else: - self.obj['sequencing_finished'] = None - - def _make_DB_samples(self): - ## Getting sample info - """ - :project/[KEY]: - ================ ============ =========== ================ - KEY lims_element lims_field description - ================ ============ =========== ================ - first_initial_qc Source - no_of_samples Project - Number of registered samples for the project - samples Sample Name Dict of all samples registered for the project. Keys are sample names. Values are described by the project/samples/[sample] doc. - ================ ============ =========== ================ -""" - samples = self.lims.get_samples(projectlimsid = self.project.id) - self.obj['no_of_samples'] = len(samples) - if len(samples) > 0: - procss_per_art = self.build_processes_per_artifact(self.lims, - self.project.name) - self.obj['first_initial_qc'] = '3000-10-10' - for samp in samples: - sampDB = SampleDB(self.lims, - samp.id, - self.obj['project_name'], - self.samp_db, - self.obj['application'], - self.preps.info, - self.demux_procs.info, - processes_per_artifact = procss_per_art) - self.obj['samples'][sampDB.name] = sampDB.obj - try: - initial_qc_start_date = self.obj['samples'][sampDB.name]['initial_qc']['start_date'] - if comp_dates(initial_qc_start_date, - self.obj['first_initial_qc']): - self.obj['first_initial_qc'] = initial_qc_start_date - except: - pass - self.obj = delete_Nones(self.obj) - - - def build_processes_per_artifact(self,lims, pname): - """Constructs a dictionary linking each artifact id with its processes. - Other artifacts can be present as keys. All processes where the project is - present should be included. The values of the dictionary is sets, to avoid - duplicated projects for a single artifact. - """ - processes = lims.get_processes(projectname = pname) - processes_per_artifact = {} - for process in processes: - for inart, outart in process.input_output_maps: - if inart is not None: - if inart['limsid'] in processes_per_artifact: - processes_per_artifact[inart['limsid']].add(process) - else: - processes_per_artifact[inart['limsid']] = {process} - - return processes_per_artifact - -class ProcessInfo(): - """This class takes a list of process type names. Eg - 'Aggregate QC (Library Validation) 4.0' and forms a dict with info about - all processes of the type specified in runs which the project has gon through. - - info = {24-8460:{ - 'start_date' - 'samples':{'P424_111':{in_art_id1 : [in_art1, out_art1], - in_art_id2: [in_art2, out_art2]}, - 'P424_115': ...}, - ...}, - '24-8480':...}""" - def __init__(self, lims_instance, processes): - self.lims = lims_instance - self.info = self._get_process_info(processes) - - def _get_process_info(self, processes): - process_info = {} - for process in processes: - process_info[process.id] = {'type' : process.type.name , - 'start_date': process.date_run, - 'samples' : {}} - in_arts=[] - for in_art_id, out_art_id in process.input_output_maps: - in_art = in_art_id['uri'] #these are actually artifacts - out_art = out_art_id['uri'] - samples = in_art.samples - if in_art.id not in in_arts: - in_arts.append(in_art.id) - for samp in samples: - if not samp.name in process_info[process.id]['samples']: - process_info[process.id]['samples'][samp.name] = {} - process_info[process.id]['samples'][samp.name][in_art.id] = [in_art, out_art] - return process_info - - -class SampleDB(): - """Instances of this class holds a dictionary formatted for building up the - samples in the project database on status db. Source of information come - from different lims artifacts and processes.""" - def __init__(self, lims_instance , sample_id, project_name, samp_db, - application = None, AgrLibQCs = [], run_info = [], - processes_per_artifact = None): - self.lims = lims_instance - self.samp_db = samp_db - self.AgrLibQCs = AgrLibQCs - self.lims_sample = Sample(self.lims, id = sample_id) - self.name = self.lims_sample.name - self.application = application - self.run_info = run_info - self.processes_per_artifact = processes_per_artifact - self.obj = {} - self._get_sample_info() - - def _get_sample_info(self): - """ - :project/samples/[sample id]/[KEY]: - =========================== ============ =========== ================ - KEY lims_element lims_field description - =========================== ============ =========== ================ - scilife_name Sample name - well_location Fals - details Sample udf All Sample level udfs exept SAMP_UDF_EXCEPTIONS defined in lims_utils.py - sample_run_metrics - - Keys have the formate: LANE_DATE_FCID_BARCODE, where DATE and FCID: from udf ('Run ID') of the SEQUENCING step. BARCODE: from reagent-lables of output artifact from SEQSTART step. LANE: from the location of the input artifact to the SEQUENCING step. - library_prep Process date-run The keys of this dict are named A, B, etc and represent A-prep, B-prep etc. Preps are named A,B,... and are defined by the date of any PREPSTART step. First date-> prep A, second date -> prep B, etc. These are however not logged into the database until the process AGRLIBVAL has been run on the related artifact. - initial_qc Process - Dict ... - first_initial_qc_start_date Process date-run If aplication is Finished library this value is feched from the date-run of a the first INITALQCFINISHEDLIB step, otherwise from the date-run of a the first INITALQC step - first_prep_start_date .... - =========================== ============ =========== ================""" - self.obj['scilife_name'] = self.name - self.obj['well_location'] = self.lims_sample.artifact.location[1] - self.obj['details'] = udf_dict(self.lims_sample, SAMP_UDF_EXCEPTIONS) - self.obj.update(udf_dict(self.lims_sample, SAMP_UDF_EXCEPTIONS, False)) - preps = self._get_preps_and_libval() - if preps: - runs = self._get_sample_run_metrics(self.run_info, preps) - for prep_id in runs.keys(): - if preps.has_key(prep_id): - preps[prep_id]['sample_run_metrics'] = runs[prep_id] - self.obj['library_prep'] = self._get_prep_leter(preps) - initqc = InitialQC(self.lims, self.name, self.processes_per_artifact, - self.application) - self.obj['initial_qc'] = initqc.set_initialqc_info() - if self.application in ['Finished library', 'Amplicon']: - chategory = INITALQCFINISHEDLIB.values() - else: - chategory = INITALQC.values() - self.obj['first_initial_qc_start_date'] = self._get_firts_day(self.name, - chategory) - self.obj['first_prep_start_date'] = self._get_firts_day(self.name, - PREPSTART.values() + PREPREPSTART.values()) - self.obj = delete_Nones(self.obj) - - def _get_firts_day(self, sample_name ,process_list, last_day = False): - """process_list is a list of process type names, sample_name is a - sample name :)""" - arts = self.lims.get_artifacts(sample_name = sample_name, - process_type = process_list) - index = -1 if last_day else 0 - uniqueDates=set([a.parent_process.date_run for a in arts]) - try: - return sorted(uniqueDates)[index] - except IndexError: - return None - - def _get_barcode(self, reagent_label): - """Extracts barcode from list of artifact.reagent_labels""" - if reagent_label: - try: - index = reagent_label.split('(')[1].strip(')') - except: - index = reagent_label - else: - return None - return index - - def _get_sample_run_metrics(self, demux_info, preps): - """Input: demux_info - instance of the ProcessInfo class with - DEMULTIPLEX processes as argument - For each SEQUENCING process run on the sample, this function steps - bacward in the artifact history of the input artifact of the SEQUENCING - process to find the folowing information: - - - :project/samples/[sample id]/library_prep/[prep id]/sample_run_metrics/[samp run id]/[KEY]: - ================================ ============ =========== ================ - KEY lims_element lims_field description - ================================ ============ =========== ================ - dillution_and_pooling_start_date Process date-run date-run of DILSTART step - sequencing_start_date Process date-run ate-run of SEQSTART step - sequencing_run_QC_finished Process date-run date-run of SEQUENCING step - sequencing_finish_date Process Finish Date udf ('Finish Date') of SEQUENCING step - sample_run_metrics_id - - The sample database (statusdb) _id for the sample_run_metrics corresponding to the run, sample, lane in question. - dem_qc_flag ... - seq_qc_flag ... - ================================ ============ =========== ================""" - sample_runs = {} - for id, run in demux_info.items(): - if run['samples'].has_key(self.name): - for id , arts in run['samples'][self.name].items(): - history = gent.SampleHistory(sample_name = self.name, - output_artifact = arts[1].id, - input_artifact = arts[0].id, - lims = self.lims, - pro_per_art = self.processes_per_artifact) - steps = ProcessSpec(history.history, history.history_list, - self.application) - if self.application in ['Finished library', 'Amplicon']: - key = 'Finished' - elif steps.preprepstart: - key = steps.preprepstart['id'] - elif steps.prepstart: - key = steps.prepstart['id'] - else: - key = None - if key: - lims_run = Process(lims, id = steps.lastseq['id']) - run_dict = dict(lims_run.udf.items()) - if preps[key].has_key('reagent_label') and run_dict.has_key('Finish Date'): - dem_art = Artifact(lims, id = steps.latestdem['outart']) - seq_art = Artifact(lims, id = steps.lastseq['inart']) - lims_run = Process(lims, id = steps.lastseq['id']) - samp_run_met_id = self._make_sample_run_id(seq_art, - lims_run, preps[key], - steps.lastseq['type']) - if samp_run_met_id and self.samp_db: - srmi = find_sample_run_id_from_view(self.samp_db, - samp_run_met_id) - dpsd = steps.dilstart['date'] if steps.dilstart else None - ssd = steps.seqstart['date'] if steps.seqstart else None - sfd = lims_run.udf['Finish Date'].isoformat() - d = {'sample_run_metrics_id' : srmi, - 'dillution_and_pooling_start_date' : dpsd, - 'sequencing_start_date' : ssd, - 'sequencing_run_QC_finished' : run['start_date'], - 'sequencing_finish_date' : sfd, - 'dem_qc_flag' : dem_art.qc_flag, - 'seq_qc_flag' : seq_art.qc_flag} - d = delete_Nones(d) - if not sample_runs.has_key(key): - sample_runs[key] = {} - sample_runs[key][samp_run_met_id] = d - return sample_runs - - def _make_sample_run_id(self, seq_art, lims_run, prep, run_type): - samp_run_met_id = None - barcode = self._get_barcode(prep['reagent_label']) - if run_type == "MiSeq Run (MiSeq) 4.0": - lane = seq_art.location[1].split(':')[1] - else: - lane = seq_art.location[1].split(':')[0] - if dict(lims_run.udf.items()).has_key('Run ID'): - run_id = lims_run.udf['Run ID'] - try: - date = run_id.split('_')[0] - fcid = run_id.split('_')[3] - samp_run_met_id = '_'.join([lane, date, fcid, barcode]) - except TypeError: - #happens if the History object is missing fields, barcode might be None - logging.debug(self.name+" ",prep,"-", prep['reagent_label']) - raise TypeError - return samp_run_met_id - - def _get_prep_leter(self, prep_info): - """Get preps and prep names; A,B,C... based on prep dates for - sample_name. - Output: A dict where keys are prep_art_id and values are prep names.""" - dates = {} - prep_info_new = {} - preps_keys = map(chr, range(65, 65+len(prep_info))) - if len(prep_info) == 1: - prep_info_new['A'] = prep_info.values()[0] - else: - for key, val in prep_info.items(): - if val['pre_prep_start_date']: - dates[key] = val['pre_prep_start_date'] - else: - dates[key] = val['prep_start_date'] - for i, key in enumerate(sorted(dates,key= lambda x : dates[x])): - prep_info_new[preps_keys[i]] = delete_Nones(prep_info[key]) - return prep_info_new - - def _get_preps_and_libval(self): - """ - :project/samples/[sample id]/library_prep/[prep id]/[KEY]: - =========================== ============ =========== ================ - KEY lims_element lims_field description - =========================== ============ =========== ================ - pre_prep_library_validation True - library_validation - prep_status True - reagent_label True - =========================== ============ =========== ================ -""" - top_level_agrlibval_steps = self._get_top_level_agrlibval_steps() - preps = {} - very_last_libval_key = {} - for AgrLibQC_id in top_level_agrlibval_steps.keys(): - AgrLibQC_info = self.AgrLibQCs[AgrLibQC_id] - if AgrLibQC_info['samples'].has_key(self.name): - for inart in AgrLibQC_info['samples'][self.name].items(): - inart, outart = inart[1] - history = gent.SampleHistory(sample_name = self.name, - output_artifact = outart.id, - input_artifact = inart.id, - lims = self.lims, - pro_per_art = self.processes_per_artifact) - steps = ProcessSpec(history.history, history.history_list, - self.application) - prep = Prep(self.name) - prep.set_prep_info(steps, self.application) - if not preps.has_key(prep.id2AB) and prep.id2AB: - preps[prep.id2AB] = prep.prep_info - if prep.pre_prep_library_validations and prep.id2AB: - preps[prep.id2AB]['pre_prep_library_validation'].update( - prep.pre_prep_library_validations) - if prep.library_validations and prep.id2AB: - preps[prep.id2AB]['library_validation'].update( - prep.library_validations) - last_libval_key = max(prep.library_validations.keys()) - last_libval = prep.library_validations[last_libval_key] - in_last = very_last_libval_key.has_key(prep.id2AB) - is_last = prep.id2AB in very_last_libval_key and ( - last_libval_key > very_last_libval_key[prep.id2AB]) - if is_last or not in_last: - very_last_libval_key[prep.id2AB] = last_libval_key - if last_libval.has_key('prep_status'): - preps[prep.id2AB]['prep_status'] = last_libval['prep_status'] - preps[prep.id2AB]['reagent_label'] = self._pars_reagent_labels(steps, last_libval) - if preps.has_key('Finished'): - try: - preps['Finished']['reagent_label'] = self.lims_sample.artifact.reagent_labels[0] - except IndexError: - #P821 has nothing here - logging.warn("No reagent label for artifact {} in sample {}".format(self.lims_sample.artifact.id, self.name)) - preps['Finished']['reagent_label'] = None - - preps['Finished'] = delete_Nones(preps['Finished']) - - return preps - - - def _pars_reagent_labels(self, steps, last_libval): - if steps.firstpoolstep: - inart = Artifact(lims, id = steps.firstpoolstep['inart']) - if len(inart.reagent_labels) == 1: - return inart.reagent_labels[0] - if last_libval.has_key('reagent_labels'): - if len(last_libval['reagent_labels']) == 1: - return last_libval['reagent_labels'][0] - return None - return None - - def _get_top_level_agrlibval_steps(self): - topLevel_AgrLibQC={} - for AgrLibQC_id, AgrLibQC_info in self.AgrLibQCs.items(): - if AgrLibQC_info['samples'].has_key(self.name): - topLevel_AgrLibQC[AgrLibQC_id]=[] - inart, outart = AgrLibQC_info['samples'][self.name].items()[0][1] - history = gent.SampleHistory(sample_name = self.name, - output_artifact = outart.id, - input_artifact = inart.id, - lims = self.lims, - pro_per_art = self.processes_per_artifact) - for inart in history.history_list: - proc_info =history.history[inart] - proc_info = filter(lambda p : - (p['type'] in AGRLIBVAL.keys()),proc_info.values()) - - proc_ids = map(lambda p : p['id'], proc_info) - topLevel_AgrLibQC[AgrLibQC_id] = topLevel_AgrLibQC[AgrLibQC_id] + proc_ids - for AgrLibQC, LibQC in topLevel_AgrLibQC.items(): - LibQC=set(LibQC) - if LibQC: - for AgrLibQC_comp, LibQC_comp in topLevel_AgrLibQC.items(): - if AgrLibQC_comp != AgrLibQC: - LibQC_comp=set(LibQC_comp) - if LibQC.issubset(LibQC_comp) and topLevel_AgrLibQC.has_key(AgrLibQC): - topLevel_AgrLibQC.pop(AgrLibQC) - return topLevel_AgrLibQC - -class InitialQC(): - """""" - def __init__(self, lims_inst ,sample, procs_per_art, application): - self.lims = lims_inst - self.processes_per_artifact = procs_per_art - self.sample_name = sample - self.initialqc_info = {} - self.steps = None - self.application = application - - def set_initialqc_info(self): - """ - :project/samples/[sample id]/initial_qc/[KEY]: - =================== ============ ================ ================ - KEY lims_element lims_field description - =================== ============ ================ ================ - start_date Process date-run First of all (INITALQCFINISHEDLIB if application in FINLIB else INITALQC) steps found for in the artifact history of the output artifact of one of the AGRINITQC stepst - finish_date Process date-run One of the AGRINITQC steps - initials Technician initials technician.initials of the last of all (AGRLIBVAL if application in FINLIB else AGRINITQC) steps - initial_qc_status Artifact qc-flag qc-flag of thre input artifact to the last of all (AGRLIBVAL if application in FINLIB else AGRINITQC) steps - caliper_image Artifact content-location content-location of output Result files of the last of all CALIPER steps in the artifact history of the output artifact of one of the AGRINITQC steps - =================== ============ ================ ================""" - - self._get_initialqc_processes() - if self.steps: - if self.steps.initialqstart: - self.initialqc_info['start_date'] = self.steps.initialqstart['date'] - if self.steps.initialqcend: - inart = Artifact(lims, id = self.steps.initialqcend['inart']) - process = Process(lims,id = self.steps.initialqcend['id']) - self.initialqc_info.update(udf_dict(inart)) - initials = process.technician.initials - self.initialqc_info['initials'] = initials - self.initialqc_info['finish_date'] = self.steps.initialqcend['date'] - self.initialqc_info['initial_qc_status'] = inart.qc_flag - if self.steps.latestCaliper: - self.initialqc_info['caliper_image'] = get_caliper_img( - self.sample_name, - self.steps.latestCaliper['id']) - return delete_Nones(self.initialqc_info) - - def _get_initialqc_processes(self): - """""" - outarts = self.lims.get_artifacts(sample_name = self.sample_name, - process_type = AGRINITQC.values()) - if outarts: - outart = Artifact(lims, id = max(map(lambda a: a.id, outarts))) - latestInitQc = outart.parent_process - inart = latestInitQc.input_per_sample(self.sample_name)[0].id - history = gent.SampleHistory(sample_name = self.sample_name, - output_artifact = outart.id, - input_artifact = inart, lims = self.lims, - pro_per_art = self.processes_per_artifact) - if history.history_list: - self.steps = ProcessSpec(history.history, history.history_list, - self.application) - -class ProcessSpec(): - def __init__(self, hist_sort, hist_list, application): - self.application = application - self.init_qc = INITALQCFINISHEDLIB if application in FINLIB else INITALQC - self.agr_qc = AGRLIBVAL if application in FINLIB else AGRINITQC - self.libvalends = [] - self.libvalend = None - self.libvals = [] - self.libvalstart = None - self.prepend = None - self.prepstarts = [] - self.prepstart = None - self.prepreplibvalends = [] - self.prepreplibvalend = None - self.prepreplibvals = [] - self.prepreplibvalstart = None - self.preprepstarts = [] - self.prepends = [] - self.preprepstart = None - self.workset = None - self.worksets = [] - self.seqstarts = [] - self.seqstart = None - self.dilstart = None - self.dilstarts = [] - self.poolingsteps = [] - self.firstpoolstep = None - self.demproc = [] - self.latestdem = None - self.seq = [] - self.lastseq = None - self.caliper_procs = [] - self.latestCaliper = None - self.initialqcends = [] - self.initialqcs = [] - self.initialqcend = None - self.initialqcs = [] - self.initialqstart = None - - self._set_prep_processes(hist_sort, hist_list) - - def _set_prep_processes(self, hist_sort, hist_list): - hist_list.reverse() - for inart in hist_list: - prepreplibvalends = [] - art_steps = hist_sort[inart] - # INITALQCEND - get last agr initialqc val step after prepreplibval - self.initialqcends += filter(lambda pro: pro['type'] in self.agr_qc, - art_steps.values()) - # INITALQCSTART - get all lib val step after prepreplibval - self.initialqcs += filter(lambda pro: pro['type'] in self.init_qc, - art_steps.values()) - #1) PREPREPSTART - self.preprepstarts += filter(lambda pro: (pro['type'] in - PREPREPSTART and pro['outart']), art_steps.values()) - if self.preprepstarts and not self.prepends: - # 2)PREPREPLIBVALSTART PREPREPLIBVALEND - self.prepreplibvals += filter(lambda pro: (pro['type'] in - LIBVAL), art_steps.values()) - self.prepreplibvalends += filter(lambda pro: pro['type'] in - AGRLIBVAL, art_steps.values()) - elif self.application in FINLIB: - # 6) LIBVALSTART LIBVALEND - self.libvals += filter(lambda pro: pro['type'] in - LIBVALFINISHEDLIB, art_steps.values()) - self.libvalends += filter(lambda pro: pro['type'] in - AGRLIBVAL, art_steps.values()) - elif self.prepends: - # 6) LIBVALSTART LIBVALEND - self.libvals += filter(lambda pro: pro['type'] in - LIBVAL, art_steps.values()) - self.libvalends += filter(lambda pro: pro['type'] in - AGRLIBVAL, art_steps.values()) - # 4) PREPSTART - self.prepstarts += filter(lambda pro: (pro['type'] in - PREPSTART) and pro['outart'], art_steps.values()) - # 5) PREPEND - get latest prep end - self.prepends += filter(lambda pro: (pro['type'] in - PREPEND) and pro['outart'] , art_steps.values()) - # 8) WORKSET - get latest workset - self.worksets += filter(lambda pro: (pro['type'] in - WORKSET) and pro['outart'], art_steps.values()) - # 9) SEQSTART dubbelkolla - if not self.seqstarts: - self.seqstarts = filter(lambda pro: (pro['type'] in SEQSTART) - and pro['outart'], art_steps.values()) - # 10) DILSTART dubbelkolla - if not self.dilstarts: - self.dilstarts = filter(lambda pro: (pro['type'] in DILSTART) - and pro['outart'], art_steps.values()) - # 11) POOLING STEPS - self.poolingsteps += filter(lambda pro: (pro['type'] in - POOLING), art_steps.values()) - # 12) DEMULTIPLEXING - self.demproc += filter(lambda pro: (pro['type'] in - DEMULTIPLEX), art_steps.values()) - # 13) SEQUENCING - self.seq += filter(lambda pro: (pro['type'] in - SEQUENCING), art_steps.values()) - # 14) CALIPER - self.caliper_procs += filter(lambda pro: (pro['type'] in - CALIPER), art_steps.values()) - self.latestCaliper = get_last_first(self.caliper_procs, last = True) - self.initialqcend = get_last_first(self.initialqcends, last = True) - self.initialqstart = get_last_first(self.initialqcs, last = False) - self.lastseq = get_last_first(self.seq) - self.latestdem = get_last_first(self.demproc) - self.workset = get_last_first(self.worksets) - self.libvalstart = get_last_first(self.libvals, last = False) - self.libvalend = get_last_first(self.libvalends) - self.prepreplibvalend = get_last_first(self.prepreplibvalends) - self.prepstart = get_last_first(self.prepstarts, last = False) - self.prepend = get_last_first(self.prepends) - self.prepreplibvalstart = get_last_first(self.prepreplibvals, - last = False) - self.preprepstart = get_last_first(self.preprepstarts, last = False) - self.firstpoolstep = get_last_first(self.poolingsteps, last = False) - self.dilstart = get_last_first(self.dilstarts, last = False) - self.seqstart = get_last_first(self.seqstarts, last = False) - -class Prep(): - def __init__(self, sample_name): - self.sample_name=sample_name - self.prep_info = { - 'reagent_label': None, - 'library_validation':{}, - 'pre_prep_library_validation':{}, - 'prep_start_date': None, - 'prep_finished_date': None, - 'prep_id': None, - 'workset_setup': None, - 'pre_prep_start_date' : None} - self.id2AB = None - self.library_validations = {} - self.pre_prep_library_validations = {} - self.lib_val_templ = { - 'start_date' : None, - 'finish_date' : None, - 'well_location' : None, - 'prep_status' : None, - 'reagent_labels' : None, - 'average_size_bp' : None, - 'initials' : None, - 'caliper_image' : None} - - def set_prep_info(self, steps, aplication): - """ - :project/samples/[sample id]/library_prep/[lib prep id]/[KEY]: - =================== ============ =========== ================ - KEY lims_element lims_field description - =================== ============ =========== ================ - prep_start_date Process date-run The date-run of a PREPSTART step - prep_finished_date Process date-run The date-run of a PREPEND step - prep_id Process id The lims id of a PREPEND step - workset_setup False - pre_prep_start_date Process date-run The date-run of process 'Shear DNA (SS XT) 4.0'. Only for 'Exome capture' projects - =================== ============ =========== ================""" - - if aplication in ['Amplicon', 'Finished library']: - self.id2AB = 'Finished' - else: - if steps.prepstart: - self.prep_info['prep_start_date'] = steps.prepstart['date'] - if steps.prepend: - self.prep_info['prep_finished_date'] = steps.prepend['date'] - self.prep_info['prep_id'] = steps.prepend['id'] - if steps.workset: - self.prep_info['workset_setup'] = steps.workset['id'] - if steps.preprepstart: - self.prep_info['pre_prep_start_date'] = steps.preprepstart['date'] - self.id2AB = steps.preprepstart['id'] - if steps.preprepstart['outart']: - art = Artifact(lims, id = steps.preprepstart['outart']) - self.prep_info.update(udf_dict(art)) - elif steps.prepstart: - self.id2AB = steps.prepstart['id'] - if steps.prepstart['outart']: - art = Artifact(lims, id = steps.prepstart['outart']) - self.prep_info.update(udf_dict(art)) - if steps.libvalend: - self.library_validations = self._get_lib_val_info(steps.libvalends, - steps.libvalstart, steps.latestCaliper) - if steps.prepreplibvalend: - self.pre_prep_library_validations = self._get_lib_val_info( - steps.prepreplibvalends, steps.prepreplibvalstart) - - - def _get_lib_val_info(self, agrlibQCsteps, libvalstart, latest_caliper_id = None): - """ - :project/samples/[sample id]/library_prep/[lib prep id]/library_validation/[libval id]/[KEY]: - =============== ============ =========== ================ - KEY lims_element lims_field description - =============== ============ =========== ================ - finish_date Process date-run date-run of AGRLIBVAL step - start_date Process date-run First of all LIBVAL steps found for in the artifact history of the output artifact of one of the AGRLIBVAL step - well_location True - prep_status True - reagent_labels True - initials True - average_size_bp Artifact Size (bp) udf ('Size (bp)') of the input artifact to the process AGRLIBVAL - caliper_image True - =============== ============ =========== ================ """ - library_validations = {} - start_date = libvalstart['date'] if (libvalstart and - libvalstart.has_key('date')) else None - for agrlibQCstep in agrlibQCsteps: - library_validation = self.lib_val_templ - inart = Artifact(lims, id = agrlibQCstep['inart']) - if agrlibQCstep.has_key('date'): - library_validation['finish_date'] = agrlibQCstep['date'] - library_validation['start_date'] = start_date - library_validation['well_location'] = inart.location[1] - library_validation['prep_status'] = inart.qc_flag - library_validation['reagent_labels'] = inart.reagent_labels - library_validation.update(udf_dict(inart)) - initials = Process(lims, id = agrlibQCstep['id']).technician.initials - if initials: - library_validation['initials'] = initials - if library_validation.has_key("size_(bp)"): - average_size_bp = library_validation.pop("size_(bp)") - library_validation["average_size_bp"] = average_size_bp - if latest_caliper_id: - library_validation["caliper_image"] = get_caliper_img(self.sample_name, - latest_caliper_id['id']) - library_validations[agrlibQCstep['id']] = delete_Nones(library_validation) - return delete_Nones(library_validations) diff --git a/post_process_dev.yaml b/post_process_dev.yaml deleted file mode 100644 index e6c66cd..0000000 --- a/post_process_dev.yaml +++ /dev/null @@ -1,222 +0,0 @@ -galaxy_config: universe_wsgi.ini -program: - bowtie: bowtie - samtools: samtools - bwa: bwa - maq: maq - ucsc_bigwig: echo - picard: $PICARD_HOME - gatk: $GATK_HOME - fastqc: fastqc - pdflatex: pdflatex - ps2pdf: ps2pdf - barcode: barcode_sort_trim.py - snpEff: $SNPEFF_HOME -algorithm: - aligner: bwa - max_errors: 2 -# Use 2 concurrent processes - num_cores: 2 - stringency: high -# Quality format is standard - quality_format: Standard - platform: illumina - recalibrate: false - snpcall: false -# Do duplicate removal by default - mark_duplicates: true - java_memory: 6g - save_diskspace: true -# Screen using fastq_screen - screen_contaminants: true -# In general, filtering phix is no longer necessary since it is not indexed - filter_phix: false - upload_fastq: true -# We have pre-demultiplexed fastq input files - demultiplexed: true -# Do not merge same sample files - merge_samples: false -# reference file paths relative to reference genome indexes parent folder - dbsnp: variation/dbsnp_132.vcf - train_hapmap: variation/hapmap_3.3.vcf - train_1000g_omni: variation/1000G_omni2.5.vcf - train_indels: variation/Mills_Devine_2hit.indels.vcf -# set to false to bypas snpeff variation effect annotation - variation_effects: true - - -log_dir: /proj/a2010002/data/log -log_dir_tools: ~/. -store_dir: /proj/a2010002/archive -store_user: funk_001 -store_host: biologin.uppmax.uu.se -transfer_protocol: rsync -smtp_host: smtp.uu.se -smtp_port: 25 - -distributed: - run_process_program_locally: true - cluster_platform: slurm - num_workers: 1 - cores_per_host: 8 - rabbitmq_vhost: bionextgen_production -# Only notify mailing list upon failure. Samples are run as core jobs - platform_args: -p core -n 4 -N 1 -A a2010002 -t 3-00:00:00 --qos=seqver --mail-user=seqmaster@scilifelab.se --mail-type=FAIL - -project: - INBOX: /proj/a2012043/INBOX - analysis: /proj/a2012043/private/nobackup/projects - -analysis: - log: /proj/a2010002/data/log - store_dir: /proj/a2010002/archive - base_dir: /proj/a2010002/nobackup/illumina - towig_script: echo - distributed_process_program: run_bcbb_pipeline.py - process_program: run_bcbb_pipeline.py - worker_program: nextgen_analysis_server.py -# Don't do any upload to galaxy by default - upload_program: echo - copy_keyfile: $HOME/private/id_rsa - -# configuration algorithm changes for custom post-processing steps -custom_algorithms: - 'RNA-seq': - aligner: tophat - transcript_assemble: true - transcripts: rnaseq/ref-transcripts.gtf - transcripts_mask: rnaseq/ref-transcripts-mask.gtf - 'RNA-seq analysis': - aligner: Tophat - aligner_version: 2.0.4 - aligner_libtype: '--library-type fr-firststrand' - dup_remover: picard-tools - dup_remover_version: '1.29' - counts: htseq - counts_version: 0.5.1 - counts_stranded: 'yes' - quantifyer: cufflinks - quantifyer_version: 2.1.1 - sam: samtools - sam_version: 0.1.12 - picard_version: 1.85 - picard_tools: /bubo/sw/apps/bioinfo/picard/1.85/kalkyl/ - bedGraphToBigWig: /proj/a2012043/software/bedGraphToBigWig - BEDTools_version: 2.11.2 - bamtools: bamtools - bamtools_version: 2.2.3 - Preseq: ~/preseq-0.0.3/lc_extrap - preseq: preseq/0.0.3 - rseqc_version: 2.3.6 - hg19: - genomepath: /pica/data/uppnex/reference/biodata/genomes/Hsapiens/GRCh37/bowtie2/concat - gtfpath: /pica/data/uppnex/reference/biodata/genomes/Hsapiens/GRCh37/annotation/Homo_sapiens.GRCh37.73.gtf - bedpath: /pica/data/uppnex/reference/biodata/genomes/Hsapiens/GRCh37/annotation/Homo_sapiens.GRCh37.73.bed - name: GRCh37 - annotation_release: 73 - mm9: - genomepath: /pica/data/uppnex/reference/biodata/genomes/Mmusculus/GRCm38/bowtie2/concat - gtfpath: /pica/data/uppnex/reference/biodata/genomes/Mmusculus/GRCm38/Mus_musculus.GRCm38.73.gtf - bedpath: /pica/data/uppnex/reference/biodata/genomes/Mmusculus/GRCm38/Mus_musculus.GRCm38.73.bed - name: GRCm38 - annotation_release: 73 - wg_qc: - aligner: bwa - recalibrate: false - snpcall: false - merge_samples: true - wg_bp: - aligner: bwa - recalibrate: true - snpcall: true - merge_samples: true - # genome, exome or regional. regional uses hard filtering of variations - coverage_interval: genome - seqcap_qc: - aligner: bwa - recalibrate: false - snpcall: false - merge_samples: true - hybrid_target: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_targets.interval_list - hybrid_bait: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_baits.interval_list - seqcap_bp: - aligner: bwa - recalibrate: true - snpcall: true - merge_samples: true - hybrid_target: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_targets.interval_list - hybrid_bait: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_baits.interval_list - coverage_interval: exome - coverage_depth: high - customcap_qc: - aligner: bwa - recalibrate: false - snpcall: false - merge_samples: true - hybrid_target: /path/to/project_name/targets.interval_list - hybrid_bait: /path/to/project_name/baits.interval_list - # regional, genome or exome, depending on coverage and size of capture region - coverage_interval: regional - coverage_depth: high - customcap_bp: - aligner: bwa - recalibrate: true - snpcall: true - merge_samples: true - hybrid_target: /path/to/project_name/targets.interval_list - hybrid_bait: /path/to/project_name/baits.interval_list - # regional, genome or exome, depending on coverage and size of capture region - coverage_interval: regional - coverage_depth: high - -statusdb: - url: tools-dev.scilifelab.se - username: production - password: MGVmNjVkNzJlOTU1NmFh - port: 5984 - -sbatch: - extra_arg: --qos=seqver - -# 2 parallell processes for each job means that each process can use 2 cores -resources: - ucsc_bigwig: - memory: 6g - gatk: - cores: 2 - bwa: - cores: 2 - mosaik: - cores: 2 - bowtie: - cores: 2 - tophat: - cores: 2 - cufflinks: - cores: 2 - snpEff: - genome_remap: - GRCh37: - - GRCh37.64 - - GRCh37.63 - hg19: - - GRCh37.64 - - GRCh37.63 - mm9: - - NCBIM37.64 - - NCBIM37.63 - araTha_tair9: - - athalianaTair10 - araTha_tair10: - - athalianaTair10 - sacCer2: - - sacCer2 - canFam3: - - canFam3.1.73 - -gdocs_upload: - gdocs_credentials: $HOME/opt/config/gdocs_credentials - gdocs_projects_folder: Project read counts - projects_spreadsheet: Genomics project list - projects_worksheet: Ongoing, Finished 2013, Finished 2012, Finished 2011 - gdocs_email_notification: seqmaster@scilifelab.se diff --git a/post_process_tools.yaml b/post_process_tools.yaml deleted file mode 100644 index 5185f9e..0000000 --- a/post_process_tools.yaml +++ /dev/null @@ -1,222 +0,0 @@ -galaxy_config: universe_wsgi.ini -program: - bowtie: bowtie - samtools: samtools - bwa: bwa - maq: maq - ucsc_bigwig: echo - picard: $PICARD_HOME - gatk: $GATK_HOME - fastqc: fastqc - pdflatex: pdflatex - ps2pdf: ps2pdf - barcode: barcode_sort_trim.py - snpEff: $SNPEFF_HOME -algorithm: - aligner: bwa - max_errors: 2 -# Use 2 concurrent processes - num_cores: 2 - stringency: high -# Quality format is standard - quality_format: Standard - platform: illumina - recalibrate: false - snpcall: false -# Do duplicate removal by default - mark_duplicates: true - java_memory: 6g - save_diskspace: true -# Screen using fastq_screen - screen_contaminants: true -# In general, filtering phix is no longer necessary since it is not indexed - filter_phix: false - upload_fastq: true -# We have pre-demultiplexed fastq input files - demultiplexed: true -# Do not merge same sample files - merge_samples: false -# reference file paths relative to reference genome indexes parent folder - dbsnp: variation/dbsnp_132.vcf - train_hapmap: variation/hapmap_3.3.vcf - train_1000g_omni: variation/1000G_omni2.5.vcf - train_indels: variation/Mills_Devine_2hit.indels.vcf -# set to false to bypas snpeff variation effect annotation - variation_effects: true - - -log_dir: /proj/a2010002/data/log -log_dir_tools: ~/. -store_dir: /proj/a2010002/archive -store_user: funk_001 -store_host: biologin.uppmax.uu.se -transfer_protocol: rsync -smtp_host: smtp.uu.se -smtp_port: 25 - -distributed: - run_process_program_locally: true - cluster_platform: slurm - num_workers: 1 - cores_per_host: 8 - rabbitmq_vhost: bionextgen_production -# Only notify mailing list upon failure. Samples are run as core jobs - platform_args: -p core -n 4 -N 1 -A a2010002 -t 3-00:00:00 --qos=seqver --mail-user=seqmaster@scilifelab.se --mail-type=FAIL - -project: - INBOX: /proj/a2012043/INBOX - analysis: /proj/a2012043/private/nobackup/projects - -analysis: - log: /proj/a2010002/data/log - store_dir: /proj/a2010002/archive - base_dir: /proj/a2010002/nobackup/illumina - towig_script: echo - distributed_process_program: run_bcbb_pipeline.py - process_program: run_bcbb_pipeline.py - worker_program: nextgen_analysis_server.py -# Don't do any upload to galaxy by default - upload_program: echo - copy_keyfile: $HOME/private/id_rsa - -# configuration algorithm changes for custom post-processing steps -custom_algorithms: - 'RNA-seq': - aligner: tophat - transcript_assemble: true - transcripts: rnaseq/ref-transcripts.gtf - transcripts_mask: rnaseq/ref-transcripts-mask.gtf - 'RNA-seq analysis': - aligner: Tophat - aligner_version: 2.0.4 - aligner_libtype: '--library-type fr-firststrand' - dup_remover: picard-tools - dup_remover_version: '1.29' - counts: htseq - counts_version: 0.5.1 - counts_stranded: 'yes' - quantifyer: cufflinks - quantifyer_version: 2.1.1 - sam: samtools - sam_version: 0.1.12 - picard_version: 1.85 - picard_tools: /bubo/sw/apps/bioinfo/picard/1.85/kalkyl/ - bedGraphToBigWig: /proj/a2012043/software/bedGraphToBigWig - BEDTools_version: 2.11.2 - bamtools: bamtools - bamtools_version: 2.2.3 - Preseq: ~/preseq-0.0.3/lc_extrap - preseq: preseq/0.0.3 - rseqc_version: 2.3.6 - hg19: - genomepath: /pica/data/uppnex/reference/biodata/genomes/Hsapiens/GRCh37/bowtie2/concat - gtfpath: /pica/data/uppnex/reference/biodata/genomes/Hsapiens/GRCh37/annotation/Homo_sapiens.GRCh37.73.gtf - bedpath: /pica/data/uppnex/reference/biodata/genomes/Hsapiens/GRCh37/annotation/Homo_sapiens.GRCh37.73.bed - name: GRCh37 - annotation_release: 73 - mm9: - genomepath: /pica/data/uppnex/reference/biodata/genomes/Mmusculus/GRCm38/bowtie2/concat - gtfpath: /pica/data/uppnex/reference/biodata/genomes/Mmusculus/GRCm38/Mus_musculus.GRCm38.73.gtf - bedpath: /pica/data/uppnex/reference/biodata/genomes/Mmusculus/GRCm38/Mus_musculus.GRCm38.73.bed - name: GRCm38 - annotation_release: 73 - wg_qc: - aligner: bwa - recalibrate: false - snpcall: false - merge_samples: true - wg_bp: - aligner: bwa - recalibrate: true - snpcall: true - merge_samples: true - # genome, exome or regional. regional uses hard filtering of variations - coverage_interval: genome - seqcap_qc: - aligner: bwa - recalibrate: false - snpcall: false - merge_samples: true - hybrid_target: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_targets.interval_list - hybrid_bait: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_baits.interval_list - seqcap_bp: - aligner: bwa - recalibrate: true - snpcall: true - merge_samples: true - hybrid_target: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_targets.interval_list - hybrid_bait: /proj/a2010002/private/metadata/seqcap/agilent/SureSelect_Human_All_Exon_V5_baits.interval_list - coverage_interval: exome - coverage_depth: high - customcap_qc: - aligner: bwa - recalibrate: false - snpcall: false - merge_samples: true - hybrid_target: /path/to/project_name/targets.interval_list - hybrid_bait: /path/to/project_name/baits.interval_list - # regional, genome or exome, depending on coverage and size of capture region - coverage_interval: regional - coverage_depth: high - customcap_bp: - aligner: bwa - recalibrate: true - snpcall: true - merge_samples: true - hybrid_target: /path/to/project_name/targets.interval_list - hybrid_bait: /path/to/project_name/baits.interval_list - # regional, genome or exome, depending on coverage and size of capture region - coverage_interval: regional - coverage_depth: high - -statusdb: - url: tools.scilifelab.se - username: production - password: MGVmNjVkNzJlOTU1NmFh - port: 5984 - -sbatch: - extra_arg: --qos=seqver - -# 2 parallell processes for each job means that each process can use 2 cores -resources: - ucsc_bigwig: - memory: 6g - gatk: - cores: 2 - bwa: - cores: 2 - mosaik: - cores: 2 - bowtie: - cores: 2 - tophat: - cores: 2 - cufflinks: - cores: 2 - snpEff: - genome_remap: - GRCh37: - - GRCh37.64 - - GRCh37.63 - hg19: - - GRCh37.64 - - GRCh37.63 - mm9: - - NCBIM37.64 - - NCBIM37.63 - araTha_tair9: - - athalianaTair10 - araTha_tair10: - - athalianaTair10 - sacCer2: - - sacCer2 - canFam3: - - canFam3.1.73 - -gdocs_upload: - gdocs_credentials: $HOME/opt/config/gdocs_credentials - gdocs_projects_folder: Project read counts - projects_spreadsheet: Genomics project list - projects_worksheet: Ongoing, Finished 2013, Finished 2012, Finished 2011 - gdocs_email_notification: seqmaster@scilifelab.se diff --git a/project_summary_upload_LIMS.py b/project_summary_upload_LIMS.py deleted file mode 100755 index f8a7fea..0000000 --- a/project_summary_upload_LIMS.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -"""Script to load project info from Lims into the project database in statusdb. - -Maya Brandi, Science for Life Laboratory, Stockholm, Sweden. -""" - -import sys -import os -import codecs -from optparse import OptionParser -import load_status_from_google_docs -from scilifelab.db.statusDB_utils import * -from helpers import * -from pprint import pprint -from genologics.lims import * -from genologics.config import BASEURI, USERNAME, PASSWORD -import objectsDB as DB -from datetime import date -import time -import scilifelab.log -import threading -import Queue -lims = Lims(BASEURI, USERNAME, PASSWORD) -LOG = scilifelab.log.minimal_logger('LOG') -projectsQueue=Queue.Queue() - -class PSUL(): - def __init__(self, proj, samp_db, proj_db, upload_data, days, man_name, output_f): - self.proj = proj - self.id = proj.id - self.udfs = proj.udf - self.name = proj.name - self.open_date = proj.open_date - self.close_date = proj.close_date - self.samp_db = samp_db - self.proj_db = proj_db - self.upload_data = upload_data - self.man_name = man_name - self.days = days - self.output_f = output_f - self.ordered_opened = None - - def print_couchdb_obj_to_file(self, obj): - if self.output_f is not None: - with open(self.output_f, 'w') as f: - print(obj, file = f) - else: - print(obj, file = sys.stdout) - - def get_ordered_opened(self): - """Is project registered as opened or ordered?""" - - if self.open_date: - self.ordered_opened = self.open_date - elif 'Order received' in dict(self.udfs.items()).keys(): - self.ordered_opened = self.udfs['Order received'].isoformat() - else: - LOG.info("Project is not updated because 'Order received' date and " - "'open date' is missing for project {name}".format( - name = self.name)) - - def get_days_closed(self): - """Project registered as closed?""" - - if self.close_date: - closed = date(*map(int, self.close_date.split('-'))) - return (date.today() - closed).days - else: - return 0 - - def determine_update(self): - """Determine wether to and how to update project""" - days_closed = self.get_days_closed() - opended_after_130630 = comp_dates('2013-06-30', self.ordered_opened) - closed_for_a_while = (days_closed > self.days) - log_info = '' - if (not opended_after_130630) or closed_for_a_while: - if self.man_name: ## Ask wether to update - start_update = raw_input(""" - Project {name} was ordered or opended at {ord_op} and has been - closed for {days} days. Do you still want to load the data from - lims into statusdb? - Press enter for No, any other key for Yes! """.format( - name = self.name, ord_op = self.ordered_opened, days = days_closed)) - else: ## Do not update - start_update = False - log_info = ('Project is not updated because: ') - if closed_for_a_while: - log_info += ('It has been closed for {days} days. '.format( - days = days_closed)) - if not opended_after_130630: - log_info += ('It was opened or ordered before 2013-06-30 ' - '({ord_op})'.format(ord_op = self.ordered_opened)) - else: - start_update = True - - if start_update: - log_info = self.update_project(DB) - return log_info - - def update_project(self, database): - """Fetch project info and update project in the database.""" - opended_after_140630 = comp_dates('2014-06-30', self.ordered_opened) - try: - LOG.info('Handeling {proj}'.format(proj = self.name)) - project = database.ProjectDB(lims, self.id, self.samp_db) - key = find_proj_from_view(self.proj_db, self.name) - project.obj['_id'] = find_or_make_key(key) - if not opended_after_140630: - project.obj = load_status_from_google_docs.get(self.name, project.obj) - if self.upload_data: - info = save_couchdb_obj(self.proj_db, project.obj) - else: - info = self.print_couchdb_obj_to_file(project.obj) - return "project {name} is handled and {info}: _id = {id}".format( - name=self.name, info=info, id=project.obj['_id']) - except: - return ('Issues geting info for {name}. The "Application" udf might' - ' be missing'.format(name = self.name)) - - def project_update_and_logging(self): - start_time = time.time() - self.get_ordered_opened() - if self.ordered_opened: - log_info = self.determine_update() - else: - log_info = ('No open date or order date found for project {name}. ' - 'Project not updated.'.format(name = self.name)) - elapsed = time.time() - start_time - LOG.info('Time - {elapsed} : Proj Name - ' - '{name}'.format(elapsed = elapsed, name = self.name)) - LOG.info(log_info) - -def main(options): - man_name=options.project_name - all_projects=options.all_projects - days=options.days - conf=options.conf - upload_data=options.upload - output_f = options.output_f - couch = load_couch_server(conf) - proj_db = couch['projects'] - samp_db = couch['samples'] - - if all_projects: - projects = lims.get_projects() - masterThread(options,projects) - elif man_name: - proj = lims.get_projects(name = man_name) - if not proj: - LOG.warning('No project named {man_name} in Lims'.format( - man_name = man_name)) - else: - P = PSUL(proj[0], samp_db, proj_db, upload_data, days, man_name, output_f) - P.project_update_and_logging() - -class ThreadPSUL(threading.Thread): - def __init__(self, options,queue): - threading.Thread.__init__(self) - self.options=options - self.queue = queue - couch = load_couch_server(options.conf) - self.proj_db = couch['projects'] - self.samp_db = couch['samples'] - def run(self): - while True: - #grabs project from queue - proj = self.queue.get(block=True, timeout=2) - P = PSUL(proj, self.samp_db, self.proj_db, self.options.upload, self.options.days, self.options.project_name, self.options.output_f) - P.project_update_and_logging() - #signals to queue job is done - self.queue.task_done() - if self.queue.empty() : - break - - -def masterThread(options,projectList): -#spawn a pool of threads, and pass them queue instance - for i in range(options.threads): - t = ThreadPSUL(options,projectsQueue) - t.start() -#populate queue with data - for proj in projectList: - projectsQueue.put(proj) - -#wait on the queue until everything has been processed - projectsQueue.join() - - -if __name__ == '__main__': - usage = "Usage: python project_summary_upload_LIMS.py [options]" - parser = OptionParser(usage=usage) - parser.add_option("-p", "--project", dest = "project_name", default = None, - help = "eg: M.Uhlen_13_01. Dont use with -a flagg.") - parser.add_option("-a", "--all_projects", dest = "all_projects", action = - "store_true", default = False, help = ("Upload all Lims ", - "projects into couchDB. Don't use with -f flagg.")) - parser.add_option("-d", "--days", dest = "days", default = 60, help = ( - "Projects with a close_date older than DAYS days are not", - " updated. Default is 60 days. Use with -a flagg")) - parser.add_option("-c", "--conf", dest = "conf", default = os.path.join( - os.environ['HOME'],'opt/config/post_process.yaml'), help = - "Config file. Default: ~/opt/config/post_process.yaml") - parser.add_option("--no_upload", dest = "upload", default = True, action = - "store_false", help = ("Use this tag if project objects ", - "should not be uploaded, but printed to output_f, or to ", - "stdout")) - parser.add_option("--output_f", dest = "output_f", help = ("Output file", - " that will be used only if --no_upload tag is used"), default=None) - parser.add_option("-t", "--threads", type='int', dest = "threads", default = 4, - help = "How many threads will be spawned. Will only work with -a") - - (options, args) = parser.parse_args() - LOG = scilifelab.log.file_logger('LOG', options.conf, 'lims2db_projects.log' - ,'log_dir_tools') - - main(options) - diff --git a/flowcell_summary_uppload_LIMS.py b/scripts/flowcell_summary_uppload_LIMS.py similarity index 100% rename from flowcell_summary_uppload_LIMS.py rename to scripts/flowcell_summary_uppload_LIMS.py diff --git a/reference_uppload_LIMS.py b/scripts/reference_uppload_LIMS.py similarity index 100% rename from reference_uppload_LIMS.py rename to scripts/reference_uppload_LIMS.py diff --git a/separate_load_status_from_google.py b/scripts/separate_load_status_from_google.py similarity index 100% rename from separate_load_status_from_google.py rename to scripts/separate_load_status_from_google.py diff --git a/valitadion_of_LIMS_upgrade.py b/scripts/valitadion_of_LIMS_upgrade.py similarity index 100% rename from valitadion_of_LIMS_upgrade.py rename to scripts/valitadion_of_LIMS_upgrade.py diff --git a/valitadion_of_project_objects_on_statusdb.py b/scripts/valitadion_of_project_objects_on_statusdb.py similarity index 100% rename from valitadion_of_project_objects_on_statusdb.py rename to scripts/valitadion_of_project_objects_on_statusdb.py