From f7b8421f51b7a7da3879c6ccf95fd226cadad584 Mon Sep 17 00:00:00 2001 From: SooLee Date: Sat, 16 Sep 2017 09:47:14 -0400 Subject: [PATCH] benchmark for partb --- Benchmark/Benchmark.py | 42 +++++++++++++++++++++++++++++++++++++++- tests/tests_benchmark.py | 12 ++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Benchmark/Benchmark.py b/Benchmark/Benchmark.py index 10d3f33..97a1eb0 100644 --- a/Benchmark/Benchmark.py +++ b/Benchmark/Benchmark.py @@ -16,6 +16,7 @@ GB_IN_BYTES = 1073741824 MB_IN_BYTES = 1048576 +GB_IN_MB = 1024 class BenchmarkResult(object): @@ -24,7 +25,7 @@ def __init__(self, size, mem, cpu): self.total_size_in_GB = size self.total_mem_in_MB = mem self.min_CPU = cpu - self.aws = get_optimal_instance_type(cpu=cpu, mem_in_gb=mem / 1024) + self.aws = get_optimal_instance_type(cpu=cpu, mem_in_gb=mem / GB_IN_MB) def as_dict(self): return self.__dict__ @@ -47,6 +48,8 @@ def benchmark(app_name, input_json, raise_error=False): return(pairsam_filter(input_json)) elif app_name == 'addfragtopairs': return(addfragtopairs(input_json)) + elif app_name == 'hi-c-processing-partb': + return(hi_c_processing_partb(input_json)) else: if raise_error: raise AppNameUnavailableException @@ -227,6 +230,43 @@ def addfragtopairs(input_json): return(r.as_dict()) +def hi_c_processing_partb(input_json): + assert 'input_size_in_bytes' in input_json + assert 'input_pairs' in input_json.get('input_size_in_bytes') + in_size = input_json['input_size_in_bytes'] + assert isinstance(in_size['input_pairs'], list) + + # cpu + nthreads = 8 # default from cwl + if 'parameters' in input_json: + if 'ncores' in input_json.get('parameters'): + nthreads = input_json.get('parameters').get('ncores') + + # space + input_size = sum(in_size['input_pairs']) / GB_IN_BYTES + out_pairs_size = input_size + out_cool_size = input_size + out_hic_size = input_size + out_size = out_pairs_size + out_cool_size + out_hic_size + total_size = input_size + out_size + total_safe_size = total_size * 2 + + # mem + mem = 14 * GB_IN_MB # default from cwl + if 'parameters' in input_json: + if 'maxmem' in input_json.get('parameters'): + maxmem = input_json.get('parameters').get('maxmem') + if 'g' in maxmem: + mem = int(maxmem.replace('g', '')) * GB_IN_MB + elif 'm' in maxmem: + mem = int(maxmem.replace('m', '')) + else: + raise Exception("proper maxmem string?") + + r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=nthreads) + return(r.as_dict()) + + def get_aws_ec2_info_file(): this_dir, _ = os.path.split(__file__) return(os.path.join(this_dir, "aws", "Amazon EC2 Instance Comparison.csv")) diff --git a/tests/tests_benchmark.py b/tests/tests_benchmark.py index 5b9700c..5ee5b46 100644 --- a/tests/tests_benchmark.py +++ b/tests/tests_benchmark.py @@ -89,6 +89,18 @@ def test_benchmark8(self): assert res['aws']['recommended_instance_type'] == 't2.micro' print(res) + def test_benchmark9(self): + input_json = {'input_size_in_bytes': {'input_pairs': [1000000000, + 2000000000, + 3000000000]}, + 'parameters': {'ncores': 1, + 'maxmem': '1900g'}} + res = B.benchmark('hi-c-processing-partb', input_json) + print(res) + assert 'aws' in res + assert 'recommended_instance_type' in res['aws'] + assert res['aws']['recommended_instance_type'] == 'x1.32xlarge' + def test_benchmark_none1(self): input_json = {'input_size_in_bytes': {'fastq1': 93520, 'fastq2': 97604,