Skip to content

Commit

Permalink
Added compare mode one to one (#131)
Browse files Browse the repository at this point in the history
- Added support to get repository pull requests information in webparsers;
- Logic to color text moved into a separated module. Removed unnecessary libraries;
- Initializing some variables moved from run method of the class CodeplagEngine to the constructor;
- Functions for getting features now started to splitting into separated module;
- Added support one to one check mode.
  • Loading branch information
Artanias authored Sep 11, 2022
1 parent b32a542 commit 3ac3d8b
Show file tree
Hide file tree
Showing 26 changed files with 1,202 additions and 849 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/check_n_push_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
push:
branches:
- main
tags:
- 'v*'
paths-ignore:
- 'docs/**'
- '**.md'
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
UTIL_VERSION := 0.2.3
UTIL_VERSION := 0.2.4
UTIL_NAME := codeplag

BASE_DOCKER_TAG := $(shell echo $(UTIL_NAME)-base-ubuntu20.04:$(UTIL_VERSION) | tr A-Z a-z)
Expand Down
5 changes: 0 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@
$ make docker-image
```

- Starting tests with using created image
```
$ make docker-test
```

- Run created a code-plagiarism container

```
Expand Down
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
'clang~=14.0',
'llvmlite~=0.39.0',
'libclang~=14.0.1',
'colorama~=0.4.5',
'termcolor~=1.1.0',
'python-decouple~=3.6',
'requests~=2.28.1',
]
Expand Down
2 changes: 1 addition & 1 deletion src/codeplag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def main() -> Literal[0, 1]:
except Exception:
logger.error(
"An unexpected error occurred while running the utility. "
f"For getting more information, check file '{LOG_PATH}'."
"For getting more information, check file '%s'.", LOG_PATH
)
logger.debug("Trace:", exc_info=True)
return 1
Expand Down
24 changes: 12 additions & 12 deletions src/codeplag/algorithms/featurebased.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def op_shift_metric(ops1: List[str], ops2: List[str]) -> Tuple[int, float]:

if len(y) > 0:
return max_shift, y[max_shift]
else:
return 0, 0.0

return 0, 0.0


def get_children_indexes(tree: List[Tuple[int, int]],
Expand Down Expand Up @@ -109,12 +109,12 @@ def find_max_index(array: np.ndarray) -> np.ndarray:
@param array - matrix of compliance (np.ndarray object)
rows = array.shape[0]
columns = array.shpe[1]
columns = array.shape[1]
O(rows * columns)
'''

maximum = 0
index = np.int64([0, 0])
index: np.ndarray = np.int64([0, 0])
for i in np.arange(0, array.shape[0], 1):
for j in np.arange(0, array.shape[1], 1):
if array[i][j][1] == 0:
Expand All @@ -141,18 +141,18 @@ def matrix_value(array: np.ndarray) -> Tuple[list, list]:
same_struct_metric = [1, 1]
minimal = min(array.shape[0], array.shape[1])
indexes = []
for i in np.arange(0, minimal, 1):
for _ in np.arange(0, minimal, 1):
ind = find_max_index(array)
indexes.append(ind)
same_struct_metric[0] += array[ind[0]][ind[1]][0]
same_struct_metric[1] += array[ind[0]][ind[1]][1]

# Zeroing row
for i in np.arange(0, array.shape[1], 1):
array[ind[0]][i] = [0, 0]
for row in np.arange(0, array.shape[1], 1):
array[ind[0]][row] = [0, 0]
# Zeroing column
for j in np.arange(0, array.shape[0], 1):
array[j][ind[1]] = [0, 0]
for col in np.arange(0, array.shape[0], 1):
array[col][ind[1]] = [0, 0]

return same_struct_metric, indexes

Expand Down Expand Up @@ -201,11 +201,11 @@ def struct_compare(tree1: List[Tuple[int, int]],
count_of_nodes1 = len(tree1)
count_of_nodes2 = len(tree2)

if (count_of_nodes1 == 0 and count_of_nodes2 == 0):
if count_of_nodes1 == 0 and count_of_nodes2 == 0:
return [1, 1]
elif (count_of_nodes1 == 0):
if count_of_nodes1 == 0:
return [1, (count_of_nodes2 + 1)]
elif (count_of_nodes2 == 0):
if count_of_nodes2 == 0:
return [1, (count_of_nodes1 + 1)]

# Add counting of nodes
Expand Down
24 changes: 0 additions & 24 deletions src/codeplag/astfeatures.py

This file was deleted.

51 changes: 34 additions & 17 deletions src/codeplag/codeplagcli.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""
This module consist the CLI of the codeplag util and
necessary internal classes for it.
"""
import argparse
import os
from pathlib import Path
from typing import List

from codeplag.consts import UTIL_NAME, UTIL_VERSION
from webparsers.github_parser import GitHubContentUrl
from codeplag.consts import MODE_CHOICE, UTIL_NAME, UTIL_VERSION
from webparsers.types import GitHubContentUrl


class CheckUniqueStore(argparse.Action):
Expand All @@ -25,38 +29,51 @@ def __call__(
setattr(namespace, self.dest, values)


class DirPath(str):
class DirPath(Path):
"""Path that raising argparse.ArgumentTypeError when parsing CLI
arguments if directory is not exists.
"""

def __new__(cls, path: str):
if not os.path.isdir(path):
def __new__(cls, *args, **kwargs):
path = Path(*args, **kwargs)
if not path.is_dir():
raise argparse.ArgumentTypeError(
f"Directory '{path}' not found or not a directory."
)

return str.__new__(cls, os.path.normpath(path))
return Path.__new__(Path, *args, **kwargs)


class FilePath(str):
class FilePath(Path):
"""Path that raising argparse.ArgumentTypeError when parsing CLI
arguments if file is not exists.
"""

def __new__(cls, path: str):
if not os.path.isfile(path):
def __new__(cls, *args, **kwargs):
path = Path(*args, **kwargs)
if not path.is_file():
raise argparse.ArgumentTypeError(
f"File '{path}' not found or not a file."
)

return str.__new__(cls, os.path.normpath(path))
return Path.__new__(Path, *args, **kwargs)


class EnvPath(str):
class EnvPath(Path):
"""Path that returns None when parsing CLI
arguments if file is not exists.
"""

def __new__(cls, path: str):
if not os.path.isfile(path):
return str.__new__(cls, "")
def __new__(cls, *args, **kwargs):
path = Path(*args, **kwargs)
if not path.is_file():
return None

return str.__new__(cls, os.path.normpath(path))
return Path.__new__(Path, *args, **kwargs)


class CodeplagCLI(argparse.ArgumentParser):
"""The argument parser of the codeplag util."""

def __init__(self):
super(CodeplagCLI, self).__init__(
Expand All @@ -76,7 +93,7 @@ def __init__(self):
help="Choose one of the following modes of searching plagiarism. "
"The 'many_to_many' mode may require more free memory.",
type=str,
choices=["many_to_many"],
choices=MODE_CHOICE,
default="many_to_many"
)
self.add_argument(
Expand Down
18 changes: 11 additions & 7 deletions src/codeplag/consts.py.in
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import re
from typing import Dict
from pathlib import Path
from typing import List

FILE_DOWNLOAD_PATH: str = "/tmp/@UTIL_NAME@_download.out"
GET_FRAZE: str = 'Getting works features from'
LOG_PATH: str = "@CODEPLAG_LOG_PATH@"
SUPPORTED_EXTENSIONS: Dict[str, tuple] = {
from codeplag.types import Mode

FILE_DOWNLOAD_PATH = Path("/tmp/@UTIL_NAME@_download.out")
GET_FRAZE = 'Getting works features from'
LOG_PATH = Path("@CODEPLAG_LOG_PATH@")
MODE_CHOICE: List[Mode] = ["many_to_many", "one_to_one"]
SUPPORTED_EXTENSIONS = {
'py': (
re.compile(r'\.py$'),
),
Expand All @@ -17,5 +21,5 @@ SUPPORTED_EXTENSIONS: Dict[str, tuple] = {
re.compile(r'\..*$'),
)
}
UTIL_NAME: str = "@UTIL_NAME@"
UTIL_VERSION: str = "@UTIL_VERSION@"
UTIL_NAME = "@UTIL_NAME@"
UTIL_VERSION = "@UTIL_VERSION@"
4 changes: 3 additions & 1 deletion src/codeplag/cplag/const.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import List

import ccsyspath
from clang.cindex import CursorKind


def get_compile_args():
def get_compile_args() -> List[str]:
syspath = ccsyspath.system_include_paths('clang++')
incargs = [b'-I' + inc for inc in syspath]

Expand Down
9 changes: 6 additions & 3 deletions src/codeplag/cplag/tree.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
from pathlib import Path
from typing import List

from clang.cindex import Cursor, TokenKind

from codeplag.astfeatures import ASTFeatures
from codeplag.cplag.const import IGNORE, OPERATORS
from codeplag.types import ASTFeatures


def get_not_ignored(tree: Cursor, src: str) -> List[Cursor]:
def get_not_ignored(tree: Cursor, src: Path) -> List[Cursor]:
'''
Function helps to discard unnecessary nodes such as imports
'''

parsed_nodes = []
for child in tree.get_children():
loc = child.location.file
last_loc_part = str(loc).rsplit('/', maxsplit=1)[-1]
last_src_part = str(src).rsplit('/', maxsplit=1)[-1]
if (
str(loc).split('/')[-1] == src.split('/')[-1] and
last_loc_part == last_src_part and
child.kind not in IGNORE
):
parsed_nodes.append(child)
Expand Down
32 changes: 19 additions & 13 deletions src/codeplag/cplag/util.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,46 @@
import os
from pathlib import Path
from typing import List, Optional

from clang.cindex import Index, TranslationUnit
from clang.cindex import Cursor, Index, TranslationUnit

from codeplag.cplag.tree import get_features
from codeplag.types import ASTFeatures


def get_cursor_from_file(filename, args=None):
def get_cursor_from_file(filepath: Path,
args: Optional[List[str]] = None) -> Optional[Cursor]:
'''
Returns clang.cindex.Cursor object or 0 if file is undefined
Returns clang.cindex.Cursor object or None if file is undefined
@param filename - full path to source file
@param args - list of arguments for clang.cindex.Index.parse() method
'''

if args is None:
args = []

if not os.path.isfile(filename):
print(filename, "Is not a file / doesn't exist")
return 0
if not filepath.is_file():
print(filepath, "Is not a file / doesn't exist")
return

index = Index.create()
options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD

file_obj = index.parse(filename, args=args, options=options) or 0
file_obj = index.parse(filepath, args=args, options=options) or 0

return file_obj.cursor


def get_works_from_filepaths(filenames, compile_args):
if not filenames:
def get_works_from_filepaths(
filepaths: List[Path],
compile_args: List[str]
) -> List[ASTFeatures]:
if not filepaths:
return []

works = []
for filename in filenames:
cursor = get_cursor_from_file(filename, compile_args)
features = get_features(cursor, filename)
for filepath in filepaths:
cursor = get_cursor_from_file(filepath, compile_args)
features = get_features(cursor, filepath)
works.append(features)

return works
Loading

0 comments on commit 3ac3d8b

Please sign in to comment.