diff --git a/README.md b/README.md
index ece50f3..c14a82d 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,7 @@ and define the **details** level of the analysis (useful when analyzing large so
- [nomos](https://github.com/fossology/fossology/tree/master/src/nomos)
- [scancode](https://github.com/nexB/scancode-toolkit)
- [github-linguist](https://github.com/github/linguist)
+- [cqmetrics](https://github.com/dspinellis/cqmetrics)
### How to install/create the executables:
- **Cloc**
@@ -86,6 +87,13 @@ After successfully executing the above mentioned steps, (if required) we have to
pip install simplejson execnet
+- **CQMetrics**
+ ```
+ $> git clone https://github.com/dspinellis/cqmetrics
+ $> cd cqmetrics/src
+ $> make && make install
+ ```
## How to install/uninstall
Graal is being developed and tested mainly on GNU/Linux platforms. Thus it is very likely it will work out of the box
@@ -109,7 +117,7 @@ Several backends have been developed to assess the genericity of Graal. Those ba
tools, where executions are triggered via system calls or their Python interfaces. In the current status, the backends
mostly target Python code, however other backends can be easily developed to cover other programming languages. The
currently available backends are:
-- **CoCom** gathers data about code complexity (e.g., cyclomatic complexity, LOC) from projects written in popular programming languages such as: C/C++, Java, Scala, JavaScript, Ruby, Python, Lua and Golang. It leverages on [Cloc](http://cloc.sourceforge.net/), [Lizard](https://github.com/terryyin/lizard) and [scc](https://github.com/boyter/scc). The tool can be exectued at `file` and `repository` levels activated with the help of category: `code_complexity_lizard_file` or `code_complexity_lizard_repository`.
+- **CoCom** gathers data about code complexity (e.g., cyclomatic complexity, LOC) from projects written in popular programming languages such as: C/C++, Java, Scala, JavaScript, Ruby, Python, Lua and Golang. It leverages on [Cloc](http://cloc.sourceforge.net/), [Lizard](https://github.com/terryyin/lizard), [scc](https://github.com/boyter/scc), and [CQMetrics](https://github.com/dspinellis/cqmetrics). The tool can be exectued at `file` and `repository` levels activated with the help of category: `code_complexity_lizard_file` or `code_complexity_lizard_repository`.
- **CoDep** extracts package and class dependencies of a Python module and serialized them as JSON structures, composed of edges and nodes, thus easing the bridging with front-end technologies for graph visualizations. It combines [PyReverse](https://pypi.org/project/pyreverse/) and [NetworkX](https://networkx.github.io/).
- **CoQua** retrieves code quality insights, such as checks about line-code’s length, well-formed variable names, unused imported modules and code clones. It uses [PyLint](https://www.pylint.org/) and [Flake8](http://flake8.pycqa.org/en/latest/index.html). The tools can be activated by passing the corresponding category: `code_quality_pylint` or `code_quality_flake8`.
- **CoVuln** scans the code to identify security vulnerabilities such as potential SQL and Shell injections, hard-coded passwords and weak cryptographic key size. It relies on [Bandit](https://github.com/PyCQA/bandit).
diff --git a/graal/backends/core/analyzers/cqmetrics-names.tsv b/graal/backends/core/analyzers/cqmetrics-names.tsv
new file mode 100644
index 0000000..8eab60b
--- /dev/null
+++ b/graal/backends/core/analyzers/cqmetrics-names.tsv
@@ -0,0 +1 @@
+nchar nline line_length_min line_length_mean line_length_median line_length_max line_length_sd nempty_line nfunction nstatement statement_nesting_min statement_nesting_mean statement_nesting_median statement_nesting_max statement_nesting_sd ninternal nconst nenum ngoto ninline nnoalias nregister nrestrict nsigned nstruct nunion nunsigned nvoid nvolatile ntypedef ncomment ncomment_char nboilerplate_comment_char ndox_comment ndox_comment_char nfun_comment ncpp_directive ncpp_include ncpp_conditional nfun_cpp_directive nfun_cpp_conditional style_inconsistency nfunction2 halstead_min halstead_mean halstead_median halstead_max halstead_sd nfunction3 cyclomatic_min cyclomatic_mean cyclomatic_median cyclomatic_max cyclomatic_sd nidentifier identifier_length_min identifier_length_mean identifier_length_median identifier_length_max identifier_length_sd unique_nidentifier unique_identifier_length_min unique_identifier_length_mean unique_identifier_length_median unique_identifier_length_max unique_identifier_length_sd indentation_spacing_count indentation_spacing_min indentation_spacing_mean indentation_spacing_median indentation_spacing_max indentation_spacing_sd nno_space_after_binary_op nno_space_after_closing_brace nno_space_after_comma nno_space_after_keyword nno_space_after_opening_brace nno_space_after_semicolon nno_space_before_binary_op nno_space_before_closing_brace nno_space_before_keyword nno_space_before_opening_brace nspace_after_opening_square_bracket nspace_after_struct_op nspace_after_unary_op nspace_at_end_of_line nspace_before_closing_bracket nspace_before_closing_square_bracket nspace_before_comma nspace_before_opening_square_bracket nspace_before_semicolon nspace_before_struct_op nspace_after_binary_op nspace_after_closing_brace nspace_after_comma nspace_after_keyword nspace_after_opening_brace nspace_after_semicolon nno_space_after_struct_op nspace_before_binary_op nspace_before_closing_brace nspace_before_keyword nspace_before_opening_brace nno_space_before_struct_op nno_space_after_opening_square_bracket nno_space_after_unary_op nno_space_before_closing_bracket nno_space_before_closing_square_bracket nno_space_before_comma nno_space_before_opening_square_bracket nno_space_before_semicolon
diff --git a/graal/backends/core/analyzers/qmcalc.py b/graal/backends/core/analyzers/qmcalc.py
new file mode 100644
index 0000000..63e6f21
--- /dev/null
+++ b/graal/backends/core/analyzers/qmcalc.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (C) 2015-2020 Bitergia
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# Authors:
+# James Walden
+# Valerio Cosentino
+# inishchith
+import subprocess
+from pathlib import Path
+from statistics import mean, median, stdev
+from graal.graal import (GraalError,
+ GraalRepository)
+from .analyzer import Analyzer
+class QMCalc(Analyzer):
+ """A wrapper for QMCalc (cqmetrics)
+ This class allows to call QMCalc with a file, parses
+ the result of the analysis and returns it as a dict.
+ :param diff_timeout: max time to compute diffs of a given file
+ """
+ version = '0.0.1'
+ metrics_names_file = 'cqmetrics-names.tsv'
+ metrics_names_path = Path(__file__).parent.absolute().joinpath(metrics_names_file)
+ def __init__(self):
+ try:
+ with open(QMCalc.metrics_names_path) as f:
+ name_string = f.read().rstrip()
+ except:
+ raise GraalError(cause="Error on reading cqmetrics metric names from %" % metrics_names_path)
+ self.metrics_names = name_string.split("\t")
+ def __analyze_file(self, message, file_path, relative_path):
+ """Convert tab-separated metrics values from qmcalc into a dictionary
+ :param message: message from standard output after execution of qmcalc
+ :returns result: dict of the results of qmcalc analysis of a file
+ """
+ value_strings = message.rstrip().split("\t")
+ results = dict(zip(self.metrics_names, value_strings))
+ # Coerce each metric value to correct type or NA
+ for metric in results:
+ if results[metric] == '':
+ results[metric] = 'NA'
+ else:
+ if (metric[0] == 'n' or metric.endswith("_length_min") or
+ metric.endswith("_length_max") or
+ metric.endswith("_nesting_min") or
+ metric.endswith("_nesting_max")):
+ results[metric] = int(results[metric])
+ else:
+ results[metric] = float(results[metric])
+ path = Path(file_path)
+ results['file_path'] = path.relative_to(relative_path).as_posix()
+ results['file_extension'] = path.suffix
+ return results
+ def __analyze_repository(self, message, file_paths, relative_path):
+ """Return metrics for all files in repository.
+ :param message: message from standard output after execution of qmcalc
+ :param file_paths: array of paths to C source and header files
+ :param relative_path: path to repository containing source files
+ :returns result: dict of the results of the analysis over a repository
+ """
+ # Create array of file metric dictionaries
+ file_metrics = []
+ i = 0
+ for line in message.strip().split("\n"):
+ file_results = self.__analyze_file(line, file_paths[i], relative_path)
+ file_metrics.append(file_results)
+ i = i + 1
+ # Build results dictionary with summary data and file_metrics
+ results = {
+ 'nfiles': len(file_metrics),
+ 'files': file_metrics
+ }
+ for metric_name in self.metrics_names:
+ metrics = [ file[metric_name] for file in file_metrics ]
+ metrics = list(filter(lambda x: x != 'NA', metrics))
+ if metric_name == 'filename':
+ continue
+ elif metric_name.endswith('min'):
+ results[metric_name] = min(metrics)
+ elif metric_name.endswith('max'):
+ results[metric_name] = max(metrics)
+ elif metric_name.endswith('mean'):
+ results[metric_name] = mean(metrics)
+ elif metric_name.endswith('median'):
+ results[metric_name] = median(metrics)
+ elif metric_name.endswith('sd'):
+ mean_metric = metric_name.replace('sd', 'mean')
+ mean_metrics = [ file[mean_metric] for file in file_metrics ]
+ mean_metrics = list(filter(lambda x: x != 'NA', mean_metrics))
+ results[metric_name] = stdev(mean_metrics)
+ else:
+ results[metric_name] = sum(metrics)
+ return results
+ def analyze(self, **kwargs):
+ """Add information using qmcalc
+ :param file_path: path of a single C source or header file to analyze
+ :param repository_level: set to True if analysis has to be performed on a repository
+ :returns result: dict of the results of the analysis
+ """
+ repository_level = kwargs.get('repository_level', False)
+ if repository_level:
+ file_paths = list(Path(kwargs['repository_path']).glob('**/*.[ch]'))
+ else:
+ file_paths = [ kwargs['file_path'] ]
+ # If no C source/header files exist, return empty array for results
+ if len(file_paths) == 0:
+ return []
+ # Run qmcalc to compute metrics for all file paths
+ try:
+ qmcalc_command = ['qmcalc'] + file_paths
+ message = subprocess.check_output(qmcalc_command).decode('utf-8')
+ except subprocess.CalledProcessError as e:
+ raise GraalError(cause="QMCalc failed at %s, %s" % (file_path, e.output.decode('utf-8')))
+ finally:
+ subprocess._cleanup()
+ if repository_level:
+ results = self.__analyze_repository(message, file_paths, kwargs['repository_path'])
+ else:
+ results = self.__analyze_file(message, file_paths[0], kwargs['file_path'])
+ return results
diff --git a/graal/backends/core/cocom.py b/graal/backends/core/cocom.py
index 640532a..9e17fae 100644
--- a/graal/backends/core/cocom.py
+++ b/graal/backends/core/cocom.py
@@ -30,6 +30,7 @@
from graal.backends.core.analyzers.cloc import Cloc
from graal.backends.core.analyzers.lizard import Lizard
+from graal.backends.core.analyzers.qmcalc import QMCalc
from graal.backends.core.analyzers.scc import SCC
@@ -39,12 +40,18 @@
LIZARD_FILE = 'lizard_file'
LIZARD_REPOSITORY = 'lizard_repository'
+QMCALC_FILE = 'qmcalc_file'
+QMCALC_REPOSITORY = 'qmcalc_repository'
CATEGORY_COCOM_SCC_FILE = 'code_complexity_' + SCC_FILE
logger = logging.getLogger(__name__)
@@ -84,10 +91,12 @@ class CoCom(Graal):
:raises RepositoryError: raised when there was an error cloning or
updating the repository.
- version = '0.5.1'
+ version = '0.6.0'
@@ -113,6 +122,10 @@ def fetch(self, category=CATEGORY_COCOM_LIZARD_FILE, paths=None,
self.analyzer_kind = LIZARD_FILE
self.analyzer_kind = LIZARD_REPOSITORY
+ elif category == CATEGORY_COCOM_QMCALC_FILE:
+ self.analyzer_kind = QMCALC_FILE
+ self.analyzer_kind = QMCALC_REPOSITORY
elif category == CATEGORY_COCOM_SCC_FILE:
self.analyzer_kind = SCC_FILE
@@ -141,6 +154,10 @@ def metadata_category(item):
elif item['analyzer'] == LIZARD_REPOSITORY:
+ elif item['analyzer'] == QMCALC_FILE:
+ elif item['analyzer'] == QMCALC_REPOSITORY:
elif item['analyzer'] == SCC_FILE:
elif item['analyzer'] == SCC_REPOSITORY:
@@ -173,7 +190,7 @@ def _analyze(self, commit):
analysis = []
- if self.analyzer_kind in [LIZARD_FILE, SCC_FILE]:
+ if self.analyzer_kind in [LIZARD_FILE, QMCALC_FILE, SCC_FILE]:
for committed_file in commit['files']:
file_path = committed_file['file']
@@ -211,6 +228,7 @@ def _analyze(self, commit):
file_info = self.analyzer.analyze(local_path)
file_info.update({'file_path': file_path})
files_affected = [file_info['file'] for file_info in commit['files']]
@@ -234,6 +252,7 @@ class FileAnalyzer:
"""Class to analyse the content of files"""
ALLOWED_EXTENSIONS = ['java', 'py', 'php', 'scala', 'js', 'rb', 'cs', 'cpp', 'c', 'lua', 'go', 'swift']
FORBIDDEN_EXTENSIONS = ['tar', 'bz2', "gz", "lz", "apk", "tbz2",
"lzma", "tlz", "war", "xar", "zip", "zipx"]
@@ -244,6 +263,9 @@ def __init__(self, details=False, kind=LIZARD_FILE):
if self.kind == LIZARD_FILE:
self.cloc = Cloc()
self.lizard = Lizard()
+ elif self.kind == QMCALC_FILE:
+ self.cloc = Cloc()
+ self.qmcalc = QMCalc()
self.scc = SCC()
@@ -281,6 +303,11 @@ def analyze(self, file_path):
file_analysis['blanks'] = cloc_analysis['blanks']
file_analysis['comments'] = cloc_analysis['comments']
+ elif self.kind == QMCALC_FILE:
+ if GraalRepository.extension(file_path) in self.QMC_ALLOWED_EXTENSIONS:
+ file_analysis = self.qmcalc.analyze(**kwargs)
+ else:
+ file_analysis = self.cloc.analyze(**kwargs)
file_analysis = self.scc.analyze(**kwargs)
@@ -299,6 +326,8 @@ def __init__(self, details=False, kind=LIZARD_REPOSITORY):
self.analyzer = Lizard()
+ elif self.kind == QMCALC_REPOSITORY:
+ self.analyzer = QMCalc()
self.analyzer = SCC()
@@ -327,6 +356,9 @@ def analyze(self, repository_path, files_affected):
'files_affected': files_affected,
'details': self.details
+ repository_path = kwargs.get('repository_path', False)
+ if not repository_path:
+ raise GraalError(cause="The 'repository_path' argument is not set for a repository level analysis.")
repository_analysis = self.analyzer.analyze(**kwargs)
diff --git a/tests/base_analyzer.py b/tests/base_analyzer.py
index 4141613..6ba11a2 100644
--- a/tests/base_analyzer.py
+++ b/tests/base_analyzer.py
@@ -29,6 +29,7 @@
ANALYZER_TEST_FILE = "sample_code.py"
+ANALYZER_TEST_C_FILE = "sample_code.c"
DOCKERFILE_TEST = "Dockerfile"
diff --git a/tests/base_repo.py b/tests/base_repo.py
index db2f2c5..4ffb52f 100644
--- a/tests/base_repo.py
+++ b/tests/base_repo.py
@@ -52,7 +52,7 @@ def setUp(self):
zip_path = os.path.join(data_path, self.repo_name + '.zip')
subprocess.check_call(['unzip', '-qq', zip_path, '-d', self.tmp_repo_path])
- origin_path = os.path.join(self.tmp_repo_path, 'graaltest')
+ origin_path = os.path.join(self.tmp_repo_path, self.repo_name)
subprocess.check_call(['git', 'clone', '-q', '--bare', origin_path, self.git_path],
diff --git a/tests/data/BSDCoreUtils.zip b/tests/data/BSDCoreUtils.zip
new file mode 100644
index 0000000..f4e6902
Binary files /dev/null and b/tests/data/BSDCoreUtils.zip differ
diff --git a/tests/data/sample_code.c b/tests/data/sample_code.c
new file mode 100644
index 0000000..89f9df2
--- /dev/null
+++ b/tests/data/sample_code.c
@@ -0,0 +1,44 @@
+ * Example file based on BSD-licensed c2rust project example files:
+ * https://github.com/immunant/c2rust/blob/master/examples/qsort/qsort.c
+ */
+#define DOES_EXIST
+static const unsigned int testvar=0;
+void swap(int* a, int* b)
+ int t = *a;
+ *a = *b;
+ *b = t;
+int partition (int arr[], int low, int high)
+ int pivot = arr[high];
+ int i = low - 1;
+ for (int j = low; j <= high - 1; j++) {
+ if (arr[j] <= pivot) {
+ i++;
+ swap(&arr[i], &arr[j]);
+ }
+ }
+ swap(&arr[i + 1], &arr[high]);
+ return i + 1;
+void quickSort(int arr[], int low, int high)
+ if (low < high) {
+ int i = partition(arr, low, high);
+ quickSort(arr, low, i - 1);
+ quickSort(arr, i + 1, high);
+ }
diff --git a/tests/test_qmcalc.py b/tests/test_qmcalc.py
new file mode 100644
index 0000000..1c11b98
--- /dev/null
+++ b/tests/test_qmcalc.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (C) 2015-2020 Bitergia
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# Authors:
+# James Walden
+# Valerio Cosentino
+# inishchith
+import os
+import subprocess
+import unittest.mock
+from base_analyzer import (TestCaseAnalyzer,
+from base_repo import TestCaseRepo
+from graal.backends.core.analyzers.qmcalc import QMCalc
+from graal.graal import GraalError
+class TestQMCalc(TestCaseAnalyzer):
+ """QMCalc tests"""
+ def test_initialization(self):
+ """Test whether attributes are initializated"""
+ qmc = QMCalc()
+ self.assertEqual(len(qmc.metrics_names), 111)
+ def test_analyze_fields_present(self):
+ """Test whether qmcalc returns the expected fields """
+ qmc = QMCalc()
+ kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_C_FILE)}
+ result = qmc.analyze(**kwargs)
+ for metric_name in qmc.metrics_names:
+ self.assertIn(metric_name, result)
+ def test_analyze_c(self):
+ """Test whether qmcalc returns expected code metric values"""
+ qmc = QMCalc()
+ kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_C_FILE)}
+ result = qmc.analyze(**kwargs)
+ self.assertEqual(result['nchar'], 839)
+ self.assertEqual(result['nline'], 44)
+ self.assertEqual(result['nfunction'], 3)
+ self.assertEqual(result['nfunction'], result['nfunction2'])
+ self.assertEqual(result['identifier_length_max'], 14)
+ self.assertEqual(result['identifier_length_min'], 1)
+ self.assertEqual(result['line_length_min'], 0)
+ self.assertEqual(result['line_length_median'], 15.5)
+ self.assertEqual(result['line_length_max'], 73)
+ self.assertEqual(result['ncpp_directive'], 5)
+ self.assertEqual(result['ncpp_conditional'], 1)
+ self.assertEqual(result['ncpp_include'], 2)
+ self.assertEqual(result['ncomment'], 1)
+ self.assertEqual(result['nconst'], 1)
+ self.assertEqual(result['nenum'], 0)
+ self.assertEqual(result['ngoto'], 0)
+ self.assertEqual(result['nsigned'], 0)
+ self.assertEqual(result['nstruct'], 0)
+ self.assertEqual(result['nunion'], 0)
+ self.assertEqual(result['nunsigned'], 1)
+ self.assertEqual(result['nvoid'], 2)
+ self.assertAlmostEqual(result['halstead_mean'], 124.599, 2)
+ self.assertAlmostEqual(result['halstead_median'], 114.714, 2)
+ self.assertAlmostEqual(result['halstead_max'], 228.898, 2)
+ self.assertAlmostEqual(result['halstead_min'], 30.185, 2)
+ self.assertEqual(result['statement_nesting_max'], 2)
+class TestQMCalcRepo(TestCaseRepo):
+ repo_name = 'BSDCoreUtils'
+ def test_analyze_repository_level_summary(self):
+ """Test metric summary values for repository."""
+ qmc = QMCalc()
+ origin_path = os.path.join(self.tmp_repo_path, self.repo_name)
+ kwargs = {
+ 'repository_path': origin_path,
+ 'repository_level': True
+ }
+ results = qmc.analyze(**kwargs)
+ self.assertEqual(results['nfiles'], 186)
+ self.assertEqual(results['nline'], 43440)
+ self.assertEqual(results['nfunction'], 892)
+ self.assertEqual(results['ngoto'], 161)
+ self.assertAlmostEqual(results['halstead_mean'], 605.45, 2)
+ self.assertAlmostEqual(results['halstead_median'], 332.97, 2)
+ self.assertAlmostEqual(results['halstead_max'], 11686.9, 2)
+ self.assertAlmostEqual(results['halstead_min'], 0, 2)
+ self.assertAlmostEqual(results['halstead_sd'], 551.99, 2)
+ self.assertEqual(results['statement_nesting_max'], 9)
+ def test_analyze_repository_level_files(self):
+ """Test metric values for a single file from repository."""
+ qmc = QMCalc()
+ origin_path = os.path.join(self.tmp_repo_path, self.repo_name)
+ kwargs = {
+ 'repository_path': origin_path,
+ 'repository_level': True
+ }
+ results = qmc.analyze(**kwargs)
+ result = results['files'][-1] # strlcpy.c
+ self.assertEqual(result['nchar'], 1686)
+ self.assertEqual(result['nline'], 54)
+ self.assertEqual(result['nfunction'], 1)
+ self.assertAlmostEqual(result['cyclomatic_mean'], 7.0, 1)
+ self.assertAlmostEqual(result['cyclomatic_median'], 7.0, 1)
+ self.assertAlmostEqual(result['cyclomatic_max'], 7.0, 1)
+ self.assertAlmostEqual(result['cyclomatic_min'], 7.0, 1)
+ self.assertEqual(result['statement_nesting_max'], 5)
+if __name__ == "__main__":
+ unittest.main()