diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index d158395..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -/*.xml \ No newline at end of file diff --git a/README.md b/README.md index 84c163c..4acd3a6 100644 --- a/README.md +++ b/README.md @@ -12,22 +12,28 @@ pip install -r requirements.txt ## Запуск приложения: 1. Логирование commits ```commandline -python3 main.py [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) [-b, --branch] branch (branch - название конкретной ветки, откуда брать коммиты или all - логгировать все коммиты изо всех веток) +python3 main.py [-c, --commits] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) [-b, --branch] branch (branch - название конкретной ветки, откуда брать коммиты или all - логгировать все коммиты изо всех веток) ``` 2. Логирование issues ```commandline -python3 main.py -i [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) +python3 main.py [-i, --issues] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) ``` 3. Логирование pull requests ```commandline -python3 main.py -p [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) +python3 main.py [-p, --pull_requests] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) ``` 4. Логирование непринятых приглашений в репо ```commandline python3 main.py --invites [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) ``` - - +5. Логирование вики-репозиториев +```commandline +python3 main.py [-w, --wikis] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) --dowland_repos path_drepo (path_drepo - строка пути к директории, где сохраняются вики-репозитории) [-o, --out] out (out - название csv файла, в который будут помещены все логи) +``` +6. Логирование contributors +```commandline +python3 main.py [--contributors] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) +``` ## Получение токена для работы с Google таблицей: Сначала нужно создать проект на сайте [Google Cloud](https://console.cloud.google.com/). Выбираем название проекта, жмем на кнопку "Create". @@ -44,7 +50,7 @@ python3 main.py --invites [-t, --token] token (github токен вместо to ## Экспорт таблицы в Google Sheets: ``` commandline -python3 main.py -p [-t, --token] token (github токен вместо token) [-l,--list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) [--google_token] token.json (файл с google токеном) [--table_id] table_id (id таблицы, указанной в url пути до таблицы) [--sheet_id] sheet_id (id конкретного листа в таблице google) +python3 main.py [-p, --pull_requests] [-t, --token] token (github токен вместо token) [-l,--list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) [--google_token] token.json (файл с google токеном) [--table_id] table_id (id таблицы, указанной в url пути до таблицы) [--sheet_id] sheet_id (id конкретного листа в таблице google) ``` ## Файл со списком репозиториев: diff --git a/commits_parser.py b/commits_parser.py new file mode 100644 index 0000000..23f41fb --- /dev/null +++ b/commits_parser.py @@ -0,0 +1,68 @@ +import csv +import pytz +from time import sleep +from github import Github, Repository, GithubException, PullRequest + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch') + +def log_commit_to_csv(info, csv_name): + with open(csv_name, 'a', newline='') as file: + writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer.writerow(info) + + +def log_commit_to_stdout(info): + print(info) + + +def log_repository_commits(repository: Repository, csv_name, start, finish, branch): + branches = [] + match branch: + case 'all': + for branch in repository.get_branches(): + branches.append(branch.name) + case None: + branches.append(repository.default_branch) + case _: + branches.append(branch) + + for branch in branches: + print(f'Processing branch {branch}') + # TODO add support of since and until in https://pygithub.readthedocs.io/en/stable/github_objects/Repository.html#github.Repository.Repository.get_commits + for commit in repository.get_commits(sha=branch): + if commit.commit.author.date.astimezone( + pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone( + pytz.timezone(TIMEZONE)) > finish: + continue + if commit.commit is not None: + nvl = lambda val: val or EMPTY_FIELD + commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email), + commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch] + info = dict(zip(FIELDNAMES, commit_data)) + + log_commit_to_csv(info, csv_name) + log_commit_to_stdout(info) + sleep(TIMEDELTA) + + +def log_commits(client: Github, working_repos, csv_name, start, finish, branch, fork_flag): + with open(csv_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(FIELDNAMES) + + + for repo in working_repos: + try: + print('=' * 20, repo.full_name, '=' * 20) + log_repository_commits(repo, csv_name, start, finish, branch) + if fork_flag: + for forked_repo in repo.get_forks(): + print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20) + log_repository_commits(forked_repo, csv_name, start, finish, branch) + sleep(TIMEDELTA) + sleep(TIMEDELTA) + except Exception as e: + print(e) diff --git a/contributors_parser.py b/contributors_parser.py new file mode 100644 index 0000000..26834fd --- /dev/null +++ b/contributors_parser.py @@ -0,0 +1,81 @@ +import csv +from time import sleep +from github import Github, Repository +import requests + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ('repository name', 'login', 'name', 'email', 'url', 'permissions', 'total_commits', 'id', 'node_id', 'type', 'bio', 'site_admin') + + +def log_contributors_to_csv(info:dict, csv_name:str): + with open(csv_name, 'a', newline='') as file: + writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer.writerow(info) + + +def log_repository_contributors(repository: Repository, csv_name:str, token:str): + + contributors = repository.get_contributors() + total_commits_dict = get_contributor_commits(repository.owner, repository.name, token) + nvl = lambda val: val or EMPTY_FIELD + for contributor in contributors: + contributor_permissons = repository.get_collaborator_permission(contributor) + contributor_total_commits = total_commits_dict.get(contributor.login, EMPTY_FIELD) + + info_tmp = { + 'repository name': repository.full_name, + 'login': contributor.login, + 'name': nvl(contributor.name), + 'email': nvl(contributor.email), + 'url': contributor.html_url, + 'permissions': nvl(contributor_permissons), + 'total_commits': contributor_total_commits, + 'id': contributor.id, + 'node_id': contributor.node_id, + 'type': contributor.type, + 'bio': nvl(contributor.bio), + 'site_admin': contributor.site_admin, + } + log_contributors_to_csv(info_tmp, csv_name) + print(info_tmp) + sleep(TIMEDELTA) + + +def log_contributors(client: Github, token:str, working_repos:list, csv_name:str, fork_flag:str): + with open(csv_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(FIELDNAMES) + + for repo in working_repos: + try: + print('=' * 20, repo.full_name, '=' * 20) + log_repository_contributors(repo, csv_name,token) + if fork_flag: + for forked_repo in repo.get_forks(): + print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20) + log_repository_contributors(forked_repo, csv_name) + except Exception as e: + print(e) + +def get_contributor_commits(repo_owner, repo_name, token): + headers = {"Authorization": f"Bearer {token}"} + request_name = f"https://api.github.com/repos/{repo_owner.login}/{repo_name}/stats/contributors" + request = requests.get(request_name, headers=headers) + + while request.status_code == 202: + print("Waiting for response...") + sleep(10) + request = requests.get(request_name, headers=headers) + + if request.status_code != 200: + return {} + + response_data = request.json() + total_commits_dict = {} + for contributor in response_data: + contributor_name = contributor["author"]["login"] + total_commits = contributor["total"] + total_commits_dict[contributor_name] = total_commits + return total_commits_dict \ No newline at end of file diff --git a/export_sheets.py b/export_sheets.py index f1c5e09..8e5c16e 100644 --- a/export_sheets.py +++ b/export_sheets.py @@ -20,7 +20,7 @@ def write_data_to_table(csv_path, google_token, table_id, sheet_id): wk_content = sh.worksheet_by_title(sheet_id) if csv_path: - df = pd.read_csv(csv_path, delimiter=',', encoding='cp1251') + df = pd.read_csv(csv_path, delimiter=',', encoding='utf-8') else: df = pd.DataFrame(INT_MASS) @@ -29,5 +29,3 @@ def write_data_to_table(csv_path, google_token, table_id, sheet_id): # Запись новых данных wk_content.set_dataframe(df, 'A1', copy_head=True) - - diff --git a/git_logger.py b/git_logger.py index eb8fa85..aa85441 100644 --- a/git_logger.py +++ b/git_logger.py @@ -1,14 +1,8 @@ -import csv -import requests -import json -import pytz +from github import Github, GithubException, PullRequest from time import sleep -from github import Github, Repository, GithubException, PullRequest - -EMPTY_FIELD = 'Empty field' -timedelta = 0.05 -timezone = 'Europe/Moscow' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' def login(token): client = Github(login_or_token=token) @@ -43,448 +37,13 @@ def get_assignee_story(github_object): events = github_object.get_issue_events() if type( github_object) is PullRequest.PullRequest else github_object.get_events() for event in events: - if event.event == "assigned" or event.event == "unassigned": + if event.event in ["assigned", "unassigned"]: date = event.created_at - if event.event == "assigned": - assigner = github_object.user.login - assignee = event.assignee.login - assignee_result += f"{date}: {assigner} -> {assignee}; " - else: - assigner = github_object.user.login - assignee = event.assignee.login - assignee_result += f"{date}: {assigner} -/> {assignee}; " - sleep(timedelta) + assigner = github_object.user.login + assignee = event.assignee.login + assignee_result += f"{date}: {assigner} -" + if event.event == "unassigned": + assignee_result += "/" + assignee_result += f"> {assignee}; " + sleep(TIMEDELTA) return assignee_result - - -def log_commit_to_csv(info, csv_name): - fieldnames = ['repository name', 'author name', 'author login', 'author email', 'date and time', - 'changed files', 'commit id', 'branch' ] - with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=fieldnames) - writer.writerow(info) - - -def log_commit_to_stdout(info): - print(info) - - -def log_repository_commits(repository: Repository, csv_name, start, finish, branch): - - - branches = [] - if branch == 'all': - for branch in repository.get_branches(): - branches.append(branch.name) - else: - branches.append(branch) - - #print(branches) - - for branch in branches: - print(f'Processing branch {branch}') - for commit in repository.get_commits(): - if commit.commit.author.date.astimezone( - pytz.timezone(timezone)) < start or commit.commit.author.date.astimezone( - pytz.timezone(timezone)) > finish: - continue - if commit.commit is not None: - info = {'repository name': repository.full_name, - 'author name': commit.commit.author.name, - 'author login': EMPTY_FIELD, - 'author email': EMPTY_FIELD, - 'date and time': commit.commit.author.date, - 'changed files': '; '.join([file.filename for file in commit.files]), - 'commit id': commit.commit.sha, - 'branch': branch} - - if commit.author is not None: - info['author login'] = commit.author.login - - if commit.commit.author is not None: - info['author email'] = commit.commit.author.email - - log_commit_to_csv(info, csv_name) - log_commit_to_stdout(info) - sleep(timedelta) - - -def log_issue_to_csv(info, csv_name): - fieldnames = ['repository name', 'number', 'title', 'state', 'task', 'created at', 'creator name', 'creator login', - 'creator email', 'closer name', 'closer login', 'closer email', 'closed at', 'comment body', - 'comment created at', 'comment author name', 'comment author login', 'comment author email', - 'assignee story', 'connected pull requests', 'labels', 'milestone'] - - with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=fieldnames) - writer.writerow(info) - - -def log_issue_to_stdout(info): - print(info) - - -def get_connected_pulls(issue_number, repo_owner, repo_name, token): - access_token = token - repo_owner = repo_owner.login - # Формирование запроса GraphQL - query = """ - { - repository(owner: "%s", name: "%s") { - issue(number: %d) { - timelineItems(first: 50, itemTypes:[CONNECTED_EVENT,CROSS_REFERENCED_EVENT]) { - filteredCount - nodes { - ... on ConnectedEvent { - ConnectedEvent: subject { - ... on PullRequest { - number - title - url - } - } - } - ... on CrossReferencedEvent { - CrossReferencedEvent: source { - ... on PullRequest { - number - title - url - } - } - } - } - } - } - } - }""" % (repo_owner, repo_name, issue_number) - - # Формирование заголовков запроса - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/json" - } - - # Отправка запроса GraphQL - response = requests.post("https://api.github.com/graphql", headers=headers, data=json.dumps({"query": query})) - response_data = response.json() - # Обработка полученных данных - pull_request_data = response_data["data"]["repository"]["issue"] - list_url = [] - if (pull_request_data is not None): - issues_data = pull_request_data["timelineItems"]["nodes"] - for pulls in issues_data: - if (pulls.get("CrossReferencedEvent") != None and pulls.get("CrossReferencedEvent").get("url") not in list_url) : - list_url.append(pulls.get("CrossReferencedEvent").get("url")) - if (pulls.get("ConnectedEvent") != None and pulls.get("ConnectedEvent").get("url") not in list_url): - list_url.append(pulls.get("ConnectedEvent").get("url")) - if (list_url == []): - return 'Empty field' - else: - return ';'.join(list_url) - return 'Empty field' - - -def log_repository_issues(repository: Repository, csv_name, token, start, finish): - for issue in repository.get_issues(state='all'): - if issue.created_at.astimezone(pytz.timezone(timezone)) < start or issue.created_at.astimezone( - pytz.timezone(timezone)) > finish: - continue - info_tmp = { - 'repository name': repository.full_name, 'number': issue.number, 'title': issue.title, - 'state': issue.state, 'task': issue.body, - 'created at': issue.created_at, - 'creator name': EMPTY_FIELD, - 'creator login': EMPTY_FIELD, - 'creator email': EMPTY_FIELD if issue.user.email is None else issue.user.email, - 'closed at': EMPTY_FIELD, - 'closer name': EMPTY_FIELD, - 'closer login': EMPTY_FIELD, - 'closer email': EMPTY_FIELD if issue.closed_by is None else issue.closed_by.email, - 'comment body': EMPTY_FIELD, - 'comment created at': EMPTY_FIELD, - 'comment author name': EMPTY_FIELD, - 'comment author login': EMPTY_FIELD, - 'comment author email': EMPTY_FIELD, - 'assignee story': EMPTY_FIELD, - 'connected pull requests': EMPTY_FIELD, - 'labels': EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]), - 'milestone': EMPTY_FIELD if issue.milestone is None else issue.milestone.title - } - - if issue.number is not None: - info_tmp['connected pull requests'] = get_connected_pulls(issue.number, repository.owner, repository.name, - token) - - info_tmp['assignee story'] = get_assignee_story(issue) - - if issue.user is not None: - info_tmp['creator name'] = issue.user.name - info_tmp['creator login'] = issue.user.login - - if issue.closed_by is not None: - info_tmp['closed at'] = issue.closed_at - info_tmp['closer name'] = issue.closed_by.name - info_tmp['closer login'] = issue.user.login - - if issue.get_comments().totalCount > 0: - for comment in issue.get_comments(): - info = info_tmp - info['comment body'] = comment.body - info['comment created at'] = comment.created_at - info['comment author name'] = comment.user.name - info['comment author login'] = comment.user.login - info['comment author email'] = comment.user.email - log_issue_to_csv(info, csv_name) - log_issue_to_stdout(info) - else: - log_issue_to_csv(info_tmp, csv_name) - log_issue_to_stdout(info_tmp) - sleep(timedelta) - - -def log_pr_to_csv(info, csv_name): - fieldnames = ['repository name', 'title', 'id', 'state', 'commit into', 'commit from', 'created at', 'creator name', - 'creator login', 'creator email', - 'changed files', 'comment body', 'comment created at', 'comment author name', 'comment author login', - 'comment author email', 'merger name', 'merger login', 'merger email', 'source branch', - 'target branch', 'assignee story', 'related issues', 'labels', 'milestone'] - with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=fieldnames) - writer.writerow(info) - - -def log_pr_to_stdout(info): - print(info) - - -def get_related_issues(pull_request_number, repo_owner, repo_name, token): - access_token = token - repo_owner = repo_owner.login - - # Формирование запроса GraphQL - query = """ - { - repository(owner: "%s", name: "%s") { - pullRequest(number: %d) { - id - closingIssuesReferences(first: 50) { - edges { - node { - id - body - number - title - url - } - } - } - } - } - } - """ % (repo_owner, repo_name, pull_request_number) - - # Формирование заголовков запроса - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/json" - } - - # Отправка запроса GraphQL - response = requests.post("https://api.github.com/graphql", headers=headers, data=json.dumps({"query": query})) - response_data = response.json() - # Обработка полученных данных - pull_request_data = response_data["data"]["repository"]["pullRequest"] - issues_data = pull_request_data["closingIssuesReferences"]["edges"] - list_issues_url = [] - # сохранение информации об issues - for issue in issues_data: - issue_node = issue["node"] - list_issues_url.append(issue_node["url"]) - return ';'.join(list_issues_url) - - -def log_repositories_pr(repository: Repository, csv_name, token, start, finish): - for pull in repository.get_pulls(state='all'): - if pull.created_at.astimezone(pytz.timezone(timezone)) < start or pull.created_at.astimezone( - pytz.timezone(timezone)) > finish: - continue - info_tmp = { - 'repository name': repository.full_name, - 'title': pull.title, - 'id': pull.number, - 'state': pull.state, - 'commit into': pull.base.label, - 'commit from': pull.head.label, - 'created at': pull.created_at, - 'creator name': EMPTY_FIELD if pull.user.name is None else pull.user.name, - 'creator login': pull.user.login, - 'creator email': pull.user.email, - 'changed files': '; '.join([file.filename for file in pull.get_files()]), - 'comment body': EMPTY_FIELD, - 'comment created at': EMPTY_FIELD, - 'comment author name': EMPTY_FIELD, - 'comment author login': EMPTY_FIELD, - 'comment author email': EMPTY_FIELD, - 'merger name': EMPTY_FIELD, - 'merger login': EMPTY_FIELD, - 'merger email': EMPTY_FIELD, - 'source branch': pull.head.ref, - 'target branch': pull.base.ref, - 'assignee story': EMPTY_FIELD, - 'related issues': EMPTY_FIELD, - 'labels': EMPTY_FIELD if pull.labels is None else ';'.join([label.name for label in pull.labels]), - 'milestone': EMPTY_FIELD if pull.milestone is None else pull.milestone.title - } - if pull.issue_url is not None: - info_tmp['related issues'] = get_related_issues(pull.number, repository.owner, repository.name, token) - - if pull.merged_by is not None: - info_tmp['merger name'] = pull.merged_by.name - info_tmp['merger login'] = pull.merged_by.login - info_tmp['merger email'] = pull.merged_by.email - - info_tmp['assignee story'] = get_assignee_story(pull) - - if pull.get_comments().totalCount > 0: - for comment in pull.get_comments(): - info = info_tmp - info['comment body'] = comment.body - info['comment created at'] = comment.created_at - info['comment author name'] = comment.user.name - info['comment author login'] = comment.user.login - info['comment author email'] = EMPTY_FIELD if comment.user.email is None else comment.user.email - log_pr_to_csv(info, csv_name) - log_pr_to_stdout(info) - else: - log_pr_to_csv(info_tmp, csv_name) - log_pr_to_stdout(info_tmp) - sleep(timedelta) - - -def log_pull_requests(client: Github, repositories, csv_name, token, start, finish): - with open(csv_name, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow( - ( - 'repository name', - 'title', - 'id', - 'state', - 'commit into', - 'commit from', - 'created at', - 'creator name', - 'creator login', - 'creator email', - 'changed files', - 'comment body', - 'comment created at', - 'comment author name', - 'comment author login', - 'comment author email', - 'merger name', - 'merger login', - 'merger email', - 'source branch', - 'target branch', - 'assignee story', - 'related issues', - 'labels', - 'milestone' - ) - ) - - for repo in get_next_repo(client, repositories): - - try: - log_repositories_pr(repo, csv_name, token, start, finish) - sleep(timedelta) - except Exception as e: - print(e) - - -def log_issues(client: Github, repositories, csv_name, token, start, finish): - with open(csv_name, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow( - ( - 'repository name', - 'number', - 'title', - 'state', - 'task', - 'created at', - 'creator name', - 'creator login', - 'creator email', - 'closer name', - 'closer login', - 'closer email', - 'closed at', - 'comment body', - 'comment created at', - 'comment author name', - 'comment author login', - 'comment author email', - 'assignee story', - 'connected pull requests', - 'labels', - 'milestone' - ) - ) - - for repo in get_next_repo(client, repositories): - - try: - log_repository_issues(repo, csv_name, token, start, finish) - sleep(timedelta) - except Exception as e: - print(e) - - -def log_invitations(client: Github, repositories, csv_name): - with open(csv_name, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow( - ( - 'repository name', - 'invited login', - 'invite creation date', - 'invitation url' - ) - ) - for repo in get_next_repo(client, repositories): - invitations = repo.get_pending_invitations() - for invite in invitations: - try: - invite_info = [repo.full_name,invite.invitee.login, invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), invite.html_url] - writer.writerow(invite_info) - print(invite_info) - sleep(timedelta) - except Exception as e: - print(e) - -def log_commits(client: Github, repositories, csv_name, start, finish, branch): - with open(csv_name, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow( - ( - 'repository name', - 'author name', - 'author login', - 'author email', - 'date and time', - 'changed files', - 'commit id', - 'branch' - ) - ) - - for repo in get_next_repo(client, repositories): - - try: - log_repository_commits(repo, csv_name, start, finish, branch) - sleep(timedelta) - except Exception as e: - print(e) diff --git a/invites_parser.py b/invites_parser.py new file mode 100644 index 0000000..9540c52 --- /dev/null +++ b/invites_parser.py @@ -0,0 +1,28 @@ +import csv +import pytz +import requests +import json +from time import sleep +from github import Github, Repository, GithubException, PullRequest + +FIELDNAMES = ('repository name', 'invited login', 'invite creation date', 'invitation url') + +def log_inviter(repo, invite): + invite_info = [repo.full_name, invite.invitee.login, invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), invite.html_url] + writer.writerow(invite_info) + print(invite_info) + + +def log_invitations(client: Github, working_repos, csv_name): + with open(csv_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(FIELDNAMES) + for repo in working_repos: + print('=' * 20, repo.full_name, '=' * 20) + invitations = repo.get_pending_invitations() + for invite in invitations: + try: + log_inviter(repo, invite) + sleep(timedelta) + except Exception as e: + print(e) diff --git a/issues_parser.py b/issues_parser.py new file mode 100644 index 0000000..df19b45 --- /dev/null +++ b/issues_parser.py @@ -0,0 +1,150 @@ +import csv +import pytz +import requests +import json +from time import sleep +from git_logger import get_assignee_story +from github import Github, Repository, GithubException, PullRequest + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ('repository name', 'number', 'title', 'state', 'task', 'created at', 'creator name', 'creator login', + 'creator email', 'closer name', 'closer login', 'closer email', 'closed at', 'comment body', + 'comment created at', 'comment author name', 'comment author login', 'comment author email', + 'assignee story', 'connected pull requests', 'labels', 'milestone') + +def log_issue_to_csv(info, csv_name): + with open(csv_name, 'a', newline='') as file: + writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer.writerow(info) + + +def log_issue_to_stdout(info): + print(info) + + +def get_connected_pulls(issue_number, repo_owner, repo_name, token): + access_token = token + repo_owner = repo_owner.login + # Формирование запроса GraphQL + query = """ + { + repository(owner: "%s", name: "%s") { + issue(number: %d) { + timelineItems(first: 50, itemTypes:[CONNECTED_EVENT,CROSS_REFERENCED_EVENT]) { + filteredCount + nodes { + ... on ConnectedEvent { + ConnectedEvent: subject { + ... on PullRequest { + number + title + url + } + } + } + ... on CrossReferencedEvent { + CrossReferencedEvent: source { + ... on PullRequest { + number + title + url + } + } + } + } + } + } + } + }""" % (repo_owner, repo_name, issue_number) + + # Формирование заголовков запроса + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + # Отправка запроса GraphQL + response = requests.post("https://api.github.com/graphql", headers=headers, data=json.dumps({"query": query})) + response_data = response.json() + # Обработка полученных данных + pull_request_data = response_data["data"]["repository"]["issue"] + list_url = [] + if (pull_request_data is not None): + issues_data = pull_request_data["timelineItems"]["nodes"] + for pulls in issues_data: + if (pulls.get("CrossReferencedEvent") != None and pulls.get("CrossReferencedEvent").get( + "url") not in list_url): + list_url.append(pulls.get("CrossReferencedEvent").get("url")) + if (pulls.get("ConnectedEvent") != None and pulls.get("ConnectedEvent").get("url") not in list_url): + list_url.append(pulls.get("ConnectedEvent").get("url")) + if (list_url == []): + return 'Empty field' + else: + return ';'.join(list_url) + return 'Empty field' + + +def log_repository_issues(repository: Repository, csv_name, token, start, finish): + for issue in repository.get_issues(state='all'): + if issue.created_at.astimezone(pytz.timezone(TIMEZONE)) < start or issue.created_at.astimezone( + pytz.timezone(TIMEZONE)) > finish: + continue + nvl = lambda val: val or EMPTY_FIELD + get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr) + info_tmp = { + 'repository name': repository.full_name, 'number': issue.number, 'title': issue.title, + 'state': issue.state, 'task': issue.body, + 'created at': issue.created_at, + 'creator name': get_info(issue.user, 'name'), + 'creator login': get_info(issue.user, 'login'), + 'creator email': get_info(issue.user, 'email'), + 'closed at': nvl(issue.closed_at), + 'closer name': get_info(issue.closed_by, 'name'), + 'closer login': get_info(issue.closed_by, 'login'), + 'closer email': get_info(issue.closed_by, 'email'), + 'comment body': EMPTY_FIELD, + 'comment created at': EMPTY_FIELD, + 'comment author name': EMPTY_FIELD, + 'comment author login': EMPTY_FIELD, + 'comment author email': EMPTY_FIELD, + 'assignee story': get_assignee_story(issue), + 'connected pull requests': EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token), + 'labels': EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]), + 'milestone': get_info(issue.milestone, 'title') + } + + if issue.get_comments().totalCount > 0: + for comment in issue.get_comments(): + info = info_tmp + info['comment body'] = comment.body + info['comment created at'] = comment.created_at + info['comment author name'] = comment.user.name + info['comment author login'] = comment.user.login + info['comment author email'] = comment.user.email + log_issue_to_csv(info, csv_name) + log_issue_to_stdout(info) + else: + log_issue_to_csv(info_tmp, csv_name) + log_issue_to_stdout(info_tmp) + sleep(TIMEDELTA) + + +def log_issues(client: Github, working_repo, csv_name, token, start, finish, fork_flag): + with open(csv_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(FIELDNAMES) + + for repo in working_repo: + try: + print('=' * 20, repo.full_name, '=' * 20) + log_repository_issues(repo, csv_name, token, start, finish) + if fork_flag: + for forked_repo in repo.get_forks(): + print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20) + log_repository_issues(forked_repo, csv_name, token, start, finish) + sleep(TIMEDELTA) + sleep(TIMEDELTA) + except Exception as e: + print(e) diff --git a/main.py b/main.py index ee5d06b..64dac9a 100644 --- a/main.py +++ b/main.py @@ -4,19 +4,30 @@ import git_logger import export_sheets +import commits_parser +import pull_requests_parser +import issues_parser +import invites_parser +import wikipars +import contributors_parser def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--invites", help="print pending invites", action="store_true") - parser.add_argument("-p", help="log pull requests", action="store_true") - parser.add_argument("-i", help="log issues", action="store_true") - parser.add_argument("-e", help="export table to google sheets", action="store_true") + parser.add_argument("-c", "--commits", help="log commits", action="store_true") + parser.add_argument("-p", "--pull_requests", help="log pull requests", action="store_true") + parser.add_argument("-i", "--issues", help="log issues", action="store_true") + parser.add_argument("-w", "--wikis", help="log wikis", action="store_true") + parser.add_argument("--contributors", help="log contributors", action="store_true") + parser.add_argument("--forks_include", help="logging data from forks", action="store_true") + parser.add_argument("-e", "--export_google_sheets", help="export table to google sheets", action="store_true") parser.add_argument('-t', '--token', type=str, required=True, help='token github account') parser.add_argument('-l', '--list', type=str, required=True, help='repos names file') + parser.add_argument("--download_repos", type=str, help="path to downloaded repositories", default='./') parser.add_argument('-o', '--out', type=str, required=True, help='output filename') parser.add_argument('-s', '--start', type=str, required=False, help='start time', default='2000/01/01-00:00:00') parser.add_argument('-f', '--finish', type=str, required=False, help='finish time', default='2400/01/01-00:00:00') - parser.add_argument('-b', '--branch', type=str, required=False, help='branch to select commits, default "main", use "all" to get all commits from all branches', default='main') + parser.add_argument('-b', '--branch', type=str, required=False, help='branch to select commits, by default use "default" repository branch, use "all" to get all commits from all branches', default=None) parser.add_argument('--google_token', type=str, required=False, help='Specify path to google token file') parser.add_argument('--table_id', type=str, required=False, help='Specify Google sheet document id (can find in url)') @@ -24,7 +35,7 @@ def parse_args(): help='Specify title for a sheet in a document in which data will be printed') args = parser.parse_args() - if args.e: + if args.export_google_sheets: for action in parser._actions: if action.dest == 'google_token': action.required = True @@ -41,7 +52,7 @@ def parse_time(datetime_str): start = [int(i) for i in start] start_datetime = datetime(year=start[0], month=start[1], day=start[2], hour=start[3], minute=start[4], second=start[5]) - return start_datetime.astimezone(pytz.timezone(git_logger.timezone)) + return start_datetime.astimezone(pytz.timezone(git_logger.TIMEZONE)) def main(): @@ -49,31 +60,33 @@ def main(): token = args.token repositories = args.list csv_name = args.out + path_drepo = args.download_repos + fork_flag = args.forks_include try: client = git_logger.login(token=token) except Exception as e: print(e) else: + working_repos = git_logger.get_next_repo(client, repositories) if args.start: start = parse_time(args.start.split('-')) if args.finish: finish = parse_time(args.finish.split('-')) - if not args.p and not args.i and not args.invites: - git_logger.log_commits(client, repositories, csv_name, start, finish, args.branch) - if (args.e): - export_sheets.write_data_to_table(csv_name, args.google_token, args.table_id, args.sheet_id) - if args.p: - git_logger.log_pull_requests(client, repositories, csv_name, token, start, finish) - if (args.e): - export_sheets.write_data_to_table(csv_name, args.google_token, args.table_id, args.sheet_id) - if args.i: - git_logger.log_issues(client, repositories, csv_name, token, start, finish) - if (args.e): - export_sheets.write_data_to_table(csv_name, args.google_token, args.table_id, args.sheet_id) + if args.commits: + commits_parser.log_commits(client, working_repos, csv_name, start, finish, args.branch, fork_flag) + if args.pull_requests: + pull_requests_parser.log_pull_requests(client, working_repos, csv_name, token, start, finish, fork_flag) + if args.issues: + issues_parser.log_issues(client, working_repos, csv_name, token, start, finish, fork_flag) if args.invites: - git_logger.log_invitations(client, repositories, csv_name) - + invites_parser.log_invitations(client, working_repos, csv_name) + if args.wikis: + wikipars.wikiparser(client, repositories, path_drepo, csv_name) + if args.contributors: + contributors_parser.log_contributors(client, token, working_repos, csv_name, fork_flag) + if args.export_google_sheets: + export_sheets.write_data_to_table(csv_name, args.google_token, args.table_id, args.sheet_id) if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/pull_requests_parser.py b/pull_requests_parser.py new file mode 100644 index 0000000..5cb85bc --- /dev/null +++ b/pull_requests_parser.py @@ -0,0 +1,141 @@ +import csv +import pytz +import requests +import json +from time import sleep +from git_logger import get_assignee_story +from github import Github, Repository, GithubException, PullRequest + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ('repository name', 'title', 'id', 'state', 'commit into', 'commit from', 'created at', 'creator name', + 'creator login', 'creator email', 'changed files', 'comment body', + 'comment created at', 'comment author name', 'comment author login', + 'comment author email', 'merger name', 'merger login', 'merger email', 'source branch', + 'target branch', 'assignee story', 'related issues', 'labels', 'milestone') + +def log_pr_to_stdout(info): + print(info) + + +def log_pr_to_csv(info, csv_name): + with open(csv_name, 'a', newline='') as file: + writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer.writerow(info) + + +def get_related_issues(pull_request_number, repo_owner, repo_name, token): + access_token = token + repo_owner = repo_owner.login + + # Формирование запроса GraphQL + query = """ + { + repository(owner: "%s", name: "%s") { + pullRequest(number: %d) { + id + closingIssuesReferences(first: 50) { + edges { + node { + id + body + number + title + url + } + } + } + } + } + } + """ % (repo_owner, repo_name, pull_request_number) + + # Формирование заголовков запроса + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + # Отправка запроса GraphQL + response = requests.post("https://api.github.com/graphql", headers=headers, data=json.dumps({"query": query})) + response_data = response.json() + # Обработка полученных данных + pull_request_data = response_data["data"]["repository"]["pullRequest"] + issues_data = pull_request_data["closingIssuesReferences"]["edges"] + list_issues_url = [] + # сохранение информации об issues + for issue in issues_data: + issue_node = issue["node"] + list_issues_url.append(issue_node["url"]) + return ';'.join(list_issues_url) + + +def log_repositories_pr(repository: Repository, csv_name, token, start, finish): + for pull in repository.get_pulls(state='all'): + if pull.created_at.astimezone(pytz.timezone(TIMEZONE)) < start or pull.created_at.astimezone( + pytz.timezone(TIMEZONE)) > finish: + continue + nvl = lambda val: val or EMPTY_FIELD + get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr) + info_tmp = { + 'repository name': repository.full_name, + 'title': pull.title, + 'id': pull.number, + 'state': pull.state, + 'commit into': pull.base.label, + 'commit from': pull.head.label, + 'created at': pull.created_at, + 'creator name': nvl(pull.user.name), + 'creator login': pull.user.login, + 'creator email': pull.user.email, + 'changed files': '; '.join([file.filename for file in pull.get_files()]), + 'comment body': EMPTY_FIELD, + 'comment created at': EMPTY_FIELD, + 'comment author name': EMPTY_FIELD, + 'comment author login': EMPTY_FIELD, + 'comment author email': EMPTY_FIELD, + 'merger name': get_info(pull.merged_by, 'name'), + 'merger login': get_info(pull.merged_by, 'login'), + 'merger email': get_info(pull.merged_by, 'email'), + 'source branch': pull.head.ref, + 'target branch': pull.base.ref, + 'assignee story': get_assignee_story(pull), + 'related issues': EMPTY_FIELD if pull.issue_url is None else get_related_issues(pull.number, repository.owner, repository.name, token), + 'labels': EMPTY_FIELD if pull.labels is None else ';'.join([label.name for label in pull.labels]), + 'milestone': get_info(pull.milestone, 'title') + } + + if pull.get_comments().totalCount > 0: + for comment in pull.get_comments(): + info = info_tmp + info['comment body'] = comment.body + info['comment created at'] = comment.created_at + info['comment author name'] = comment.user.name + info['comment author login'] = comment.user.login + info['comment author email'] = nvl(comment.user.email) + log_pr_to_csv(info, csv_name) + log_pr_to_stdout(info) + else: + log_pr_to_csv(info_tmp, csv_name) + log_pr_to_stdout(info_tmp) + sleep(TIMEDELTA) + + +def log_pull_requests(client: Github, working_repos, csv_name, token, start, finish, fork_flag): + with open(csv_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(FIELDNAMES) + + for repo in working_repos: + try: + print('=' * 20, repo.full_name, '=' * 20) + log_repositories_pr(repo, csv_name, token, start, finish) + if fork_flag: + for forked_repo in repo.get_forks(): + print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20) + log_repositories_pr(forked_repo, csv_name, token, start, finish) + sleep(TIMEDELTA) + sleep(TIMEDELTA) + except Exception as e: + print(e) diff --git a/requirements.txt b/requirements.txt index 4121bd5..bf00414 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ +GitPython==3.1.41 PyGithub~=1.55 pygsheets==2.0.5 pandas==1.4.3 pytz~=2023.3 requests~=2.31.0 - diff --git a/wikipars.py b/wikipars.py new file mode 100644 index 0000000..0b8c5a0 --- /dev/null +++ b/wikipars.py @@ -0,0 +1,87 @@ +from git import Repo, exc +import os +import time +import csv + +WIKI_FIELDNAMES = ['repository name', 'author name', 'author login', 'datetime', 'page', 'action', 'revision id', 'added lines', 'deleted lines'] + +def log_wiki_to_csv(info, csv_name): + with open(csv_name, 'a', newline='') as file: + writer = csv.DictWriter(file, fieldnames=WIKI_FIELDNAMES) + writer.writerow(info) + + +def wikiparser(client, repositories, path_drepo, csv_name): + with open(csv_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(WIKI_FIELDNAMES) + + #Создаем список репозиториев из файла + with open(repositories, 'r') as file: + list_repos = [x for x in file.read().split('\n') if x] + error_repos = [] + + data_changes = [] + for name_rep in list_repos: + #Проверяем, есть ли репозиторий в папке + dir_path = path_drepo + "/" + name_rep + if os.path.exists(dir_path): + #Обновляем репозиторий + if len(os.listdir(dir_path)) > 0: + repo = Repo(dir_path) + repo.remotes.origin.pull() + else: + os.rmdir(dir_path) + error_repos.append(name_rep) + continue + else: + #Клонируем репозиторий в папку + dir_path = path_drepo + "/" + name_rep + os.makedirs(dir_path, exist_ok=True) + repo_url = f"git@github.com:{name_rep}.wiki.git" + try: + repo = Repo.clone_from(repo_url, dir_path) + except exc.GitCommandError: + os.rmdir(dir_path) + error_repos.append(name_rep) + continue + + print("=" * 20, name_rep, "=" * 20) + #Вывод изменений + EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" #Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython + wiki_commits = repo.iter_commits(all=True) + activity = {"A" : "Страница добавлена", "M" : "Страница изменена", "D" : "Страница удалена", "R":"Страница переименована"} + #eng_activity = {"A" : "Page added", "M" : "Page modified", "D" : "Page deleted", "R": "Page renamed"} + for commit in wiki_commits: + data_commit = dict() + parent = commit.parents + data_commit["repository name"] = name_rep + data_commit["author name"] = commit.author + if commit.author.email and len(commit.author.email.split('+')) > 1: + data_commit["author login"] = commit.author.email.split('+')[1].split('@users')[0] + else: + data_commit["author login"] = "empty login" + data_commit["datetime"] = time.strftime("%Y-%m-%d %H:%M:%S%z", time.gmtime(commit.committed_date)) + if parent: + data_commit["page"] = ';'.join([diff.b_path for diff in parent[0].diff(commit)]) + data_commit["action"] = ';'.join([activity[diff.change_type] for diff in parent[0].diff(commit)]) + else: + #Первый коммит + data_commit["page"] = ';'.join([diff.b_path for diff in commit.diff(EMPTY_TREE_SHA)]) + data_commit["action"] = ';'.join([activity["A"]]) + data_commit["revision id"] = commit + data_commit["added lines"] = commit.stats.total["insertions"] + data_commit["deleted lines"] = commit.stats.total["deletions"] + for fieldname in data_commit: + print(fieldname, data_commit[fieldname], sep=': ') + print("-" * 40) + log_wiki_to_csv(data_commit, csv_name) + data_changes.append(data_commit) + + #Вывод репозиториев, с которыми возникли ошибки + if error_repos: + print("!=====Проблемные репозитории=====!") + for rep in error_repos: + print(rep) + + return data_changes