Skip to content

Commit

Permalink
Adds filter for the work name in the some check options. (#143)
Browse files Browse the repository at this point in the history
- Moved 'show progress' setting from 'check' command to the 'settings modify' command;
- Simplified decoding of content obtained from GitHub by using an option from the system library;
- Renamed GitHub regular expression option into 'repo-regexp';
- Repeated code from features getters moved into abstract class;
- Added new option 'path-regexp' for filtering files;
- Some code optimizations for future work;
- Compiling regular expressions before searching.
  • Loading branch information
Artanias authored Dec 10, 2022
1 parent fc16a1e commit 0825c33
Show file tree
Hide file tree
Showing 21 changed files with 480 additions and 315 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
UTIL_VERSION := 0.2.6
UTIL_VERSION := 0.2.7
UTIL_NAME := codeplag
PWD := $(shell pwd)

Expand Down Expand Up @@ -187,6 +187,9 @@ docker-rmi:
@docker rmi $(BASE_DOCKER_TAG) --force 2> /dev/null || \
echo "Image $(BASE_DOCKER_TAG) is not exists"

todo-list:
@grep --color=auto -r -n 'TODO' ./* --exclude=Makefile --exclude-dir=docs

help:
@echo "Usage:"
@echo " make [command]"
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@
```
$ codeplag check --extension py --files src/codeplag/pyplag/astwalkers.py --directories src/codeplag/pyplag
$ codeplag check --extension py --directories src/codeplag/algorithms src
$ codeplag check --extension py --files src/codeplag/pyplag/astwalkers.py --github-user OSLL --regexp code- --all-branches
$ codeplag check --extension py --github-files https://github.com/OSLL/code-plagiarism/blob/main/src/codeplag/pyplag/utils.py --github-user OSLL --regexp code- --all-branches
$ codeplag check --extension py --files src/codeplag/pyplag/astwalkers.py --github-user OSLL --repo-regexp code- --all-branches
$ codeplag check --extension py --github-files https://github.com/OSLL/code-plagiarism/blob/main/src/codeplag/pyplag/utils.py --github-user OSLL --repo-regexp code- --all-branches
$ codeplag check --extension py --github-files https://github.com/OSLL/code-plagiarism/blob/main/src/codeplag/pyplag/utils.py --directories src/codeplag/pyplag/
$ codeplag check --extension py --directories src/ --github-user OSLL --regexp code-
$ codeplag check --extension py --github-project-folders https://github.com/OSLL/code-plagiarism/blob/main/src/codeplag/pyplag --github-user OSLL --regexp code-
$ codeplag check --extension py --directories src/ --github-user OSLL --repo-regexp code-
$ codeplag check --extension py --github-project-folders https://github.com/OSLL/code-plagiarism/blob/main/src/codeplag/pyplag --github-user OSLL --repo-regexp code-
$ codeplag check --extension py --github-project-folders https://github.com/OSLL/code-plagiarism/blob/main/src/codeplag/pyplag --directories src/codeplag/pyplag/
```

Expand All @@ -122,5 +122,5 @@
$ codeplag check --extension cpp --directories src/codeplag/cplag/tests/data src/ --files test/codeplag/cplag/data/sample1.cpp test/codeplag/cplag/data/sample2.cpp
$ codeplag check --extension cpp --github-files https://github.com/OSLL/code-plagiarism/blob/main/test/codeplag/cplag/data/sample3.cpp https://github.com/OSLL/code-plagiarism/blob/main/test/codeplag/cplag/data/sample4.cpp
$ codeplag check --extension cpp --github-project-folders https://github.com/OSLL/code-plagiarism/tree/main/test
$ codeplag check --extension cpp --github-user OSLL --regexp "code-plag"
$ codeplag check --extension cpp --github-user OSLL --repo-regexp "code-plag"
```
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
'argcomplete~=2.0.0',
'numpy~=1.23.1',
'pandas~=1.4.3',
'tabulate~=0.9.0',
'ccsyspath~=1.1.0',
'clang~=14.0',
'llvmlite~=0.39.0',
Expand Down
1 change: 1 addition & 0 deletions src/codeplag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def main() -> Literal[0, 1, 2]:
from codeplag.utils import CodeplagEngine

pd.set_option("display.float_format", '{:,.2%}'.format)
pd.set_option('display.max_colwidth', None)
logger = get_logger(__name__, LOG_PATH)

cli = CodeplagCLI()
Expand Down
66 changes: 37 additions & 29 deletions src/codeplag/codeplagcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import List, Optional

from codeplag.consts import MODE_CHOICE, UTIL_NAME, UTIL_VERSION
from codeplag.consts import EXTENSION_CHOICE, MODE_CHOICE, UTIL_NAME, UTIL_VERSION
from webparsers.types import GitHubContentUrl


Expand Down Expand Up @@ -111,6 +111,13 @@ def __init__(self):
metavar="DIRECTORY",
type=DirPath,
)
settings_modify.add_argument(
"-sp",
"--show_progress",
help="Show progress of searching plagiarism.",
type=int,
choices=[0, 1],
)
settings_modify.add_argument(
"-t",
"--threshold",
Expand All @@ -130,6 +137,26 @@ def __init__(self):
)

check = subparsers.add_parser("check", help="Start searching similar works.")
check.add_argument(
"-d",
"--directories",
metavar="DIRECTORY",
type=DirPath,
help="Absolute or relative path to a local directories with project files.",
nargs="+",
action=CheckUniqueStore,
default=[],
)
check.add_argument(
"-f",
"--files",
metavar="FILE",
type=FilePath,
help="Absolute or relative path to files on a computer.",
nargs="+",
action=CheckUniqueStore,
default=[],
)
check.add_argument(
"--mode",
help="Choose one of the following modes of searching plagiarism. "
Expand All @@ -139,10 +166,12 @@ def __init__(self):
default="many_to_many",
)
check.add_argument(
"-sp",
"--show_progress",
help="Show current progress of searching plagiarism.",
action="store_true",
"-pe",
"--path-regexp",
# TODO: Check that it used with listed below options
help="A regular expression for filtering checked works by name. "
"Used with options 'directories', 'github-user' and 'github-project-folders'.",
type=str,
)

check_required = check.add_argument_group("required options")
Expand All @@ -151,7 +180,7 @@ def __init__(self):
"--extension",
help="Extension responsible for the analyzed programming language.",
type=str,
choices=["py", "cpp"],
choices=EXTENSION_CHOICE,
required=True,
)

Expand All @@ -163,8 +192,8 @@ def __init__(self):
action="store_true",
)
check_github.add_argument(
"-e",
"--regexp",
"-re",
"--repo-regexp",
type=str,
help="A regular expression to filter searching repositories on GitHub.",
)
Expand Down Expand Up @@ -192,27 +221,6 @@ def __init__(self):
default=[],
)

check.add_argument(
"-f",
"--files",
metavar="FILE",
type=FilePath,
help="Absolute or relative path to files on a computer.",
nargs="+",
action=CheckUniqueStore,
default=[],
)
check.add_argument(
"-d",
"--directories",
metavar="DIRECTORY",
type=DirPath,
help="Absolute or relative path to a local directories with project files.",
nargs="+",
action=CheckUniqueStore,
default=[],
)


if __name__ == "__main__":
cli = CodeplagCLI()
Expand Down
6 changes: 4 additions & 2 deletions src/codeplag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def read_config(file: Path, safe: bool = False) -> Optional[dict]:
return config


# TODO: Handle permission denied
def write_config(file: Path, config: Union[dict, TypedDict]) -> None:
config_for_dump = dict(config)
for key in config_for_dump:
Expand All @@ -44,7 +45,7 @@ def write_config(file: Path, config: Union[dict, TypedDict]) -> None:
def read_settings_conf(logger: logging.Logger) -> Settings:
loaded_settings_config = read_config(CONFIG_PATH, safe=True)
if loaded_settings_config is None:
logger.error(
logger.warning(
"Unsuccessful attempt to read config '%s'. Returning default config.",
CONFIG_PATH
)
Expand Down Expand Up @@ -72,5 +73,6 @@ def write_settings_conf(settings: Settings) -> None:


DefaultSettingsConfig = Settings(
threshold=DEFAULT_THRESHOLD
threshold=DEFAULT_THRESHOLD,
show_progress=0
)
9 changes: 4 additions & 5 deletions src/codeplag/consts.tmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path
from typing import Dict, List

from codeplag.types import Extensions, Mode, Threshold
from codeplag.types import Extension, Extensions, Mode, Threshold

# Paths
CONFIG_PATH = Path("@CONFIG_PATH@")
Expand All @@ -13,7 +13,9 @@

DEFAULT_THRESHOLD: Threshold = 65
MODE_CHOICE: List[Mode] = ["many_to_many", "one_to_one"]
SUPPORTED_EXTENSIONS: Dict[str, Extensions] = {
EXTENSION_CHOICE: List[Extension] = ["py", "cpp"]
ALL_EXTENSIONS = (re.compile(r'\..*$'),)
SUPPORTED_EXTENSIONS: Dict[Extension, Extensions] = {
'py': (
re.compile(r'\.py$'),
),
Expand All @@ -22,9 +24,6 @@
re.compile(r'\.c$'),
re.compile(r'\.h$')
),
'default': (
re.compile(r'\..*$'),
)
}

UTIL_NAME = "@UTIL_NAME@"
Expand Down
54 changes: 27 additions & 27 deletions src/codeplag/cplag/util.py → src/codeplag/cplag/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
from pathlib import Path
from typing import List, Optional, Union
from typing import List, Optional

from clang.cindex import Cursor, Index, TranslationUnit

Expand Down Expand Up @@ -52,6 +53,23 @@ def get_works_from_filepaths(

class CFeaturesGetter(AbstractGetter):

def __init__(
self,
environment: Optional[Path] = None,
all_branches: bool = False,
logger: Optional[logging.Logger] = None,
repo_regexp: str = '',
path_regexp: str = ''
):
super().__init__(
extension='cpp',
environment=environment,
all_branches=all_branches,
logger=logger,
repo_regexp=repo_regexp,
path_regexp=path_regexp
)

def get_from_content(self, file_content: str, url_to_file: str) -> Optional[ASTFeatures]:
with open(FILE_DOWNLOAD_PATH, 'w', encoding='utf-8') as out_file:
out_file.write(file_content)
Expand All @@ -76,29 +94,11 @@ def get_from_files(self, files: List[Path]) -> List[ASTFeatures]:
self.logger.info(f'{GET_FRAZE} files')
return get_works_from_filepaths(files, COMPILE_ARGS)

def get_from_dirs(
self, directories: List[Path], independent: bool = False
) -> Union[List[ASTFeatures], List[List[ASTFeatures]]]:
works = []
for directory in directories:
self.logger.info(f'{GET_FRAZE} {directory}')
filepaths = get_files_path_from_directory(
directory,
extensions=SUPPORTED_EXTENSIONS[self.extension]
)
if independent:
works.append(
get_works_from_filepaths(
filepaths,
COMPILE_ARGS
)
)
else:
works.extend(
get_works_from_filepaths(
filepaths,
COMPILE_ARGS
)
)

return works
def get_works_from_dir(self, directory: Path) -> List[ASTFeatures]:
filepaths = get_files_path_from_directory(
directory,
extensions=SUPPORTED_EXTENSIONS[self.extension],
path_regexp=self.path_regexp
)

return get_works_from_filepaths(filepaths, COMPILE_ARGS)
4 changes: 2 additions & 2 deletions src/codeplag/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def print_compare_result(features1: ASTFeatures,
name='FastMetrics:'
)
)
print(main_metrics_df.to_markdown(tablefmt="psql"))
print(main_metrics_df)
print()

if compare_info.structure is None:
Expand All @@ -120,7 +120,7 @@ def print_compare_result(features1: ASTFeatures,
name='AdditionalMetrics:'
)
)
print(additional_metrics_df.to_markdown(tablefmt="psql"))
print(additional_metrics_df)
print()

if (compare_info.structure.similarity * 100) > threshold:
Expand Down
Loading

0 comments on commit 0825c33

Please sign in to comment.