Initial qmcalc analyzer class with support for single file analyses

chaoss · Jan 12, 2022 · a53b243 · a53b243
1 parent f187d7f
commit a53b243
Show file tree

Hide file tree

Showing 5 changed files with 306 additions and 0 deletions.
diff --git a/graal/backends/core/analyzers/cqmetrics-names.tsv b/graal/backends/core/analyzers/cqmetrics-names.tsv
@@ -0,0 +1 @@
+nchar	nline	line_length_min	line_length_mean	line_length_median	line_length_max	line_length_sd	nempty_line	nfunction	nstatement	statement_nesting_min	statement_nesting_mean	statement_nesting_median	statement_nesting_max	statement_nesting_sd	ninternal	nconst	nenum	ngoto	ninline	nnoalias	nregister	nrestrict	nsigned	nstruct	nunion	nunsigned	nvoid	nvolatile	ntypedef	ncomment	ncomment_char	nboilerplate_comment_char	ndox_comment	ndox_comment_char	nfun_comment	ncpp_directive	ncpp_include	ncpp_conditional	nfun_cpp_directive	nfun_cpp_conditional	style_inconsistency	nfunction	halstead_min	halstead_mean	halstead_median	halstead_max	halstead_sd	nfunction2	cyclomatic_min	cyclomatic_mean	cyclomatic_median	cyclomatic_max	cyclomatic_sd	nidentifier	identifier_length_min	identifier_length_mean	identifier_length_median	identifier_length_max	identifier_length_sd	unique_nidentifier	unique_identifier_length_min	unique_identifier_length_mean	unique_identifier_length_median	unique_identifier_length_max	unique_identifier_length_sd	indentation_spacing_count	indentation_spacing_min	indentation_spacing_mean	indentation_spacing_median	indentation_spacing_max	indentation_spacing_sd	nno_space_after_binary_op	nno_space_after_closing_brace	nno_space_after_comma	nno_space_after_keyword	nno_space_after_opening_brace	nno_space_after_semicolon	nno_space_before_binary_op	nno_space_before_closing_brace	nno_space_before_keyword	nno_space_before_opening_brace	nspace_after_opening_square_bracket	nspace_after_struct_op	nspace_after_unary_op	nspace_at_end_of_line	nspace_before_closing_bracket	nspace_before_closing_square_bracket	nspace_before_comma	nspace_before_opening_square_bracket	nspace_before_semicolon	nspace_before_struct_op	nspace_after_binary_op	nspace_after_closing_brace	nspace_after_comma	nspace_after_keyword	nspace_after_opening_brace	nspace_after_semicolon	nno_space_after_struct_op	nspace_before_binary_op	nspace_before_closing_brace	nspace_before_keyword	nspace_before_opening_brace	nno_space_before_struct_op	nno_space_after_opening_square_bracket	nno_space_after_unary_op	nno_space_before_closing_bracket	nno_space_before_closing_square_bracket	nno_space_before_comma	nno_space_before_opening_square_bracket	nno_space_before_semicolon
diff --git a/graal/backends/core/analyzers/qmcalc.py b/graal/backends/core/analyzers/qmcalc.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2015-2020 Bitergia
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+#     James Walden <[email protected]>
+#     Valerio Cosentino <[email protected]>
+#     inishchith <[email protected]>
+#
+
+import subprocess
+from pathlib import Path
+
+from graal.graal import (GraalError,
+                         GraalRepository)
+from .analyzer import Analyzer
+
+
+class QMCalc(Analyzer):
+    """A wrapper for QMCalc (cqmetrics)
+
+    This class allows to call QMCalc with a file, parses
+    the result of the analysis and returns it as a dict.
+
+    :param diff_timeout: max time to compute diffs of a given file
+    """
+    version = '0.0.1'
+    metrics_names_file = 'cqmetrics-names.tsv'
+    metrics_names_path = Path(__file__).parent.absolute().joinpath(metrics_names_file)
+
+    def __init__(self):
+        try:
+            with open(QMCalc.metrics_names_path) as f:
+                name_string = f.read().rstrip()
+        except:
+            raise GraalError(cause="Error on reading cqmetrics names from %" % metrics_names_path)
+
+        self.metrics_names = name_string.split("\t")
+
+    def __is_metric_int(self, metric):
+        metric[0] == 'n' or metric.endswith("_length_min") or metric.endswith("_length_max") or metric.endswith("_nesting_min") or metric.endswith("_nesting_max")
+
+    def __analyze_file(self, message):
+        """Convert tab-separated metrics values from qmcalc into a dictionary
+
+        :param message: message from standard output after execution of qmcalc
+
+        :returns result: dict of the results of qmcalc analysis of a file
+        """
+
+        value_strings = message.rstrip().split("\t")
+        results = dict(zip(self.metrics_names, value_strings))
+        for metric in results:
+            if self.__is_metric_int(metric):
+                results[metric] = int(results[metric])
+            else:
+                results[metric] = float(results[metric])
+
+        return results
+
+    def __analyze_repository(self, message):
+        # FIXME: not implemented yet
+        """Add information LOC, total files, blank and commented lines using CLOC for the entire repository
+
+        :param message: message from standard output after execution of qmcalc
+
+        :returns result: dict of the results of the analysis over a repository
+        """
+
+        results = {}
+        flag = False
+
+        for line in message.strip().split("\n"):
+            if flag:
+                if line.lower().startswith("sum"):
+                    break
+                elif not line.startswith("-----"):
+                    digested_split = line.split()
+                    langauge, files_info = digested_split[:-4], digested_split[-4:]
+                    language = " ".join(langauge)
+                    total_files, blank_lines, commented_lines, loc = map(int, files_info)
+                    language_result = {
+                        "total_files": total_files,
+                        "blanks": blank_lines,
+                        "comments": commented_lines,
+                        "loc": loc
+                    }
+                    results[language] = language_result
+
+            if line.lower().startswith("language"):
+                flag = True
+
+        return results
+
+    def analyze(self, **kwargs):
+        """Add information using qmcalc
+
+        :param file_path: file path
+        :param repository_level: set to True if analysis has to be performed on a repository
+
+        :returns result: dict of the results of the analysis
+        """
+
+        file_path = kwargs['file_path']
+        repository_level = kwargs.get('repository_level', False)
+        # FIXME: we currently only handle the single file case
+
+        try:
+            qmcalc_command = ['qmcalc', file_path]
+            message = subprocess.check_output(qmcalc_command).decode("utf-8")
+        except subprocess.CalledProcessError as e:
+            raise GraalError(cause="QMCalc failed at %s, %s" % (file_path, e.output.decode("utf-8")))
+        finally:
+            subprocess._cleanup()
+
+        if repository_level:
+            results = self.__analyze_repository(message)
+        else:
+            results = self.__analyze_file(message)
+            results['ext'] = GraalRepository.extension(file_path)
+
+        print(results) # FIXME: debug print
+        return results
diff --git a/tests/base_analyzer.py b/tests/base_analyzer.py
@@ -29,6 +29,7 @@
 
 ANALYZER_TEST_FOLDER = "data/"
 ANALYZER_TEST_FILE = "sample_code.py"
+ANALYZER_TEST_C_FILE = "sample_code.c"
 DOCKERFILE_TEST = "Dockerfile"
 
 

diff --git a/tests/data/sample_code.c b/tests/data/sample_code.c
@@ -0,0 +1,44 @@
+/* 
+ * Example file based on BSD-licensed c2rust project example files:
+ * https://github.com/immunant/c2rust/blob/master/examples/qsort/qsort.c 
+ */
+
+#ifdef DOES_NOT_EXIST
+#define DOES_EXIST
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static const unsigned int testvar=0;
+
+void swap(int* a, int* b)
+{
+    int t = *a;
+    *a = *b;
+    *b = t;
+}
+
+int partition (int arr[], int low, int high)
+{
+    int pivot = arr[high];
+    int i = low - 1;
+
+    for (int j = low; j <= high - 1; j++) {
+        if (arr[j] <= pivot) {
+            i++;
+            swap(&arr[i], &arr[j]);
+        }
+    }
+    swap(&arr[i + 1], &arr[high]);
+    return i + 1;
+}
+
+void quickSort(int arr[], int low, int high)
+{
+    if (low < high) {
+        int i = partition(arr, low, high);
+        quickSort(arr, low, i - 1);
+        quickSort(arr, i + 1, high);
+    }
+}
diff --git a/tests/test_qmcalc.py b/tests/test_qmcalc.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2015-2020 Bitergia
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+#     James Walden <[email protected]>
+#     Valerio Cosentino <[email protected]>
+#     inishchith <[email protected]>
+#
+
+import os
+import subprocess
+import unittest.mock
+
+from base_analyzer import (TestCaseAnalyzer,
+                           ANALYZER_TEST_C_FILE)
+
+from graal.backends.core.analyzers.qmcalc import QMCalc
+from graal.graal import GraalError
+
+
+class TestQMCalc(TestCaseAnalyzer):
+    """QMCalc tests"""
+
+    def test_initialization(self):
+        """Test whether attributes are initializated"""
+
+        qmc = QMCalc()
+        self.assertEqual(len(qmc.metrics_names), 111)
+
+    def test_analyze_fields_present(self):
+        """Test whether qmcalc returns the expected fields """
+
+        qmc = QMCalc()
+        # kwargs = {'file_path': 'data/sample_code.c'} FIXME
+        kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_C_FILE)}
+        result = qmc.analyze(**kwargs)
+
+        for metric_name in qmc.metrics_names:
+            self.assertIn(metric_name, result)
+
+    def test_analyze_c(self):
+        """Test whether qmcalc returns expected code metric values"""
+
+        qmc = QMCalc()
+        kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_C_FILE)}
+        result = qmc.analyze(**kwargs)
+
+        self.assertEqual(result['nchar'], 839)
+        self.assertEqual(result['nline'], 44)
+        self.assertEqual(result['nfunction'], 3)
+        self.assertEqual(result['nfunction2'], 3)
+        self.assertEqual(result['identifier_length_max'], 14)
+        self.assertEqual(result['identifier_length_min'], 1)
+        self.assertEqual(result['line_length_min'], 0)
+        self.assertEqual(result['line_length_median'], 15.5)
+        self.assertEqual(result['line_length_max'], 73)
+        self.assertEqual(result['ncpp_directive'], 5)
+        self.assertEqual(result['ncpp_conditional'], 1)
+        self.assertEqual(result['ncpp_include'], 2)
+        self.assertEqual(result['ncomment'], 1)
+        self.assertEqual(result['nconst'], 1)
+        self.assertEqual(result['nenum'], 0)
+        self.assertEqual(result['ngoto'], 0)
+        self.assertEqual(result['nsigned'], 0)
+        self.assertEqual(result['nstruct'], 0)
+        self.assertEqual(result['nunion'], 0)
+        self.assertEqual(result['nunsigned'], 1)
+        self.assertEqual(result['nvoid'], 2)
+        self.assertEqual(result['halstead_mean'], 124.599)
+        self.assertEqual(result['halstead_median'], 114.714)
+        self.assertEqual(result['halstead_max'], 228.898)
+        self.assertEqual(result['halstead_min'], 30.1851)
+        self.assertEqual(result['statement_nesting_max'], 2)
+
+#     def test_analyze_repository_level(self):
+#         """Test whether qmcalc returns the expected fields data for repository level"""
+
+#         qmc = QMCalc()
+#         kwargs = {
+#             'file_path': self.origin_path,
+#             'repository_level': True
+#         }
+#         results = qmc.analyze(**kwargs)
+#         result = results[next(iter(results))]
+
+#         self.assertIn('blanks', result)
+#         self.assertTrue(type(result['blanks']), int)
+#         self.assertIn('comments', result)
+#         self.assertTrue(type(result['comments']), int)
+#         self.assertIn('loc', result)
+#         self.assertTrue(type(result['loc']), int)
+#         self.assertIn('total_files', result)
+#         self.assertTrue(type(result['total_files']), int)
+
+#     @unittest.mock.patch('subprocess.check_output')
+#     def test_analyze_error(self, check_output_mock):
+#         """Test whether an exception is thrown in case of errors"""
+
+#         check_output_mock.side_effect = subprocess.CalledProcessError(-1, "command", output=b'output')
+
+#         qmc = QMCalc()
+#         kwargs = {'file_path': os.path.join(self.tmp_data_path, ANALYZER_TEST_FILE)}
+#         with self.assertRaises(GraalError):
+#             _ = qmc.analyze(**kwargs)
+
+
+if __name__ == "__main__":
+    unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		nchar nline line_length_min line_length_mean line_length_median line_length_max line_length_sd nempty_line nfunction nstatement statement_nesting_min statement_nesting_mean statement_nesting_median statement_nesting_max statement_nesting_sd ninternal nconst nenum ngoto ninline nnoalias nregister nrestrict nsigned nstruct nunion nunsigned nvoid nvolatile ntypedef ncomment ncomment_char nboilerplate_comment_char ndox_comment ndox_comment_char nfun_comment ncpp_directive ncpp_include ncpp_conditional nfun_cpp_directive nfun_cpp_conditional style_inconsistency nfunction halstead_min halstead_mean halstead_median halstead_max halstead_sd nfunction2 cyclomatic_min cyclomatic_mean cyclomatic_median cyclomatic_max cyclomatic_sd nidentifier identifier_length_min identifier_length_mean identifier_length_median identifier_length_max identifier_length_sd unique_nidentifier unique_identifier_length_min unique_identifier_length_mean unique_identifier_length_median unique_identifier_length_max unique_identifier_length_sd indentation_spacing_count indentation_spacing_min indentation_spacing_mean indentation_spacing_median indentation_spacing_max indentation_spacing_sd nno_space_after_binary_op nno_space_after_closing_brace nno_space_after_comma nno_space_after_keyword nno_space_after_opening_brace nno_space_after_semicolon nno_space_before_binary_op nno_space_before_closing_brace nno_space_before_keyword nno_space_before_opening_brace nspace_after_opening_square_bracket nspace_after_struct_op nspace_after_unary_op nspace_at_end_of_line nspace_before_closing_bracket nspace_before_closing_square_bracket nspace_before_comma nspace_before_opening_square_bracket nspace_before_semicolon nspace_before_struct_op nspace_after_binary_op nspace_after_closing_brace nspace_after_comma nspace_after_keyword nspace_after_opening_brace nspace_after_semicolon nno_space_after_struct_op nspace_before_binary_op nspace_before_closing_brace nspace_before_keyword nspace_before_opening_brace nno_space_before_struct_op nno_space_after_opening_square_bracket nno_space_after_unary_op nno_space_before_closing_bracket nno_space_before_closing_square_bracket nno_space_before_comma nno_space_before_opening_square_bracket nno_space_before_semicolon