-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Scraper githu.ts divided into different files for better understanding
- Loading branch information
1 parent
57e9445
commit 0bb6d00
Showing
14 changed files
with
1,824 additions
and
881 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,4 @@ const octokit = new Octokit({ | |
auth: getGitHubAccessToken(), | ||
}); | ||
|
||
export default octokit; | ||
export default octokit; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,24 @@ | ||
{ | ||
"name": "scraper", | ||
"version": "1.0.0", | ||
"description": "", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
}, | ||
"keywords": [], | ||
"author": "", | ||
"license": "ISC", | ||
"dependencies": { | ||
"@octokit/graphql": "^8.1.1", | ||
"@octokit/types": "^13.5.0", | ||
"date-fns": "^3.6.0", | ||
"octokit": "^4.0.2", | ||
"yargs": "^17.7.2" | ||
}, | ||
"devDependencies": { | ||
"typescript": "^5.4.5" | ||
} | ||
{ | ||
"name": "scraper", | ||
"version": "1.0.0", | ||
"description": "", | ||
"main": "index.js", | ||
"type": "module", | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1", | ||
"scraper": "node --loader ts-node/esm src/github-scraper/index.ts coronasafe ../../data-repo/github" | ||
}, | ||
"keywords": [], | ||
"author": "", | ||
"license": "ISC", | ||
"dependencies": { | ||
"date-fns": "^3.6.0", | ||
"octokit": "^4.0.2", | ||
"yargs": "^17.7.2" | ||
}, | ||
"devDependencies": { | ||
"@types/node": "^16.11.18", | ||
"ts-node": "^10.9.2", | ||
"typescript": "^4.9.5" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import { Octokit } from "octokit"; | ||
|
||
export const GITHUB_TOKEN = process.env.GITHUB_TOKEN; | ||
if (!GITHUB_TOKEN) { | ||
console.error("GITHUB_TOKEN not found in environment"); | ||
process.exit(1); | ||
} | ||
|
||
export const octokit = new Octokit({ | ||
auth: GITHUB_TOKEN, | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import { octokit } from "./config.js"; | ||
import { IGitHubEvent } from "./types.js"; | ||
|
||
export const fetchEvents = async ( | ||
org: string, | ||
startDate: Date, | ||
endDate: Date, | ||
) => { | ||
const events = await octokit.paginate( | ||
"GET /orgs/{org}/events", | ||
{ | ||
org: org, | ||
per_page: 1000, | ||
}, | ||
(response: { data: IGitHubEvent[] }) => { | ||
return response.data; | ||
}, | ||
); | ||
|
||
let eventsCount: number = 0; | ||
let filteredEvents = []; | ||
for (const event of events) { | ||
const eventTime: Date = new Date(event.created_at ?? 0); | ||
|
||
if (eventTime > endDate) { | ||
continue; | ||
} else if (eventTime <= startDate) { | ||
return filteredEvents; | ||
} | ||
const isBlacklisted: boolean = [ | ||
"dependabot", | ||
"snyk-bot", | ||
"codecov-commenter", | ||
"github-actions[bot]", | ||
].includes(event.actor.login); | ||
const isRequiredEventType: boolean = [ | ||
"IssueCommentEvent", | ||
"IssuesEvent", | ||
"PullRequestEvent", | ||
"PullRequestReviewEvent", | ||
].includes(event.type ?? ""); | ||
|
||
if (!isBlacklisted && isRequiredEventType) { | ||
console.log(event.type); | ||
filteredEvents.push(event); | ||
} | ||
eventsCount++; | ||
} | ||
console.log("Fetched " + { eventsCount } + " events"); | ||
|
||
return filteredEvents; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import { octokit } from "./config.js"; | ||
import { OpenPr } from "./types.js"; | ||
import { resolve_autonomy_responsibility } from "./utils.js"; | ||
|
||
export const fetch_merge_events = async (user: string, org: string) => { | ||
console.log("Merge events for : ", user); | ||
|
||
// Fetching closed issues authored by the user | ||
const { data: issues } = await octokit.request("GET /search/issues", { | ||
q: `is:issue is:closed org:${org} author:${user}`, | ||
}); | ||
|
||
let merged_prs = []; | ||
|
||
for (const issue of issues.items) { | ||
const { data: timeline_events } = await octokit.request( | ||
"GET " + issue.timeline_url, | ||
); | ||
|
||
for (const event of timeline_events) { | ||
if (await resolve_autonomy_responsibility(event, user)) { | ||
const pull_request = event.source.issue.pull_request; | ||
if (pull_request && pull_request.merged_at) { | ||
merged_prs.push({ | ||
issue_link: issue.html_url, | ||
pr_link: pull_request.html_url, | ||
}); | ||
} | ||
} | ||
} | ||
} | ||
|
||
return merged_prs; | ||
}; | ||
|
||
export const fetchOpenPulls = async (user: string, org: string) => { | ||
console.log(`Fetching open pull requests for ${user}`); | ||
const { data } = await octokit.request("GET /search/issues", { | ||
q: `is:pr is:open org:${org} author:${user}`, | ||
}); | ||
|
||
type PullsData = (typeof data.items)[0]; | ||
let pulls: PullsData[] = data.items; | ||
let open_prs: OpenPr[] = []; | ||
|
||
pulls.forEach((pr: PullsData) => { | ||
let today: Date = new Date(); | ||
let prLastUpdated: Date = new Date(pr.updated_at); | ||
let staleFor: number = Math.floor( | ||
(today.getTime() - prLastUpdated.getTime()) / (1000 * 60 * 60 * 24), | ||
); | ||
|
||
open_prs.push({ | ||
link: pr.html_url, | ||
title: pr.title, | ||
stale_for: staleFor, | ||
labels: pr.labels.map((label: { name: string }) => label.name), | ||
}); | ||
}); | ||
|
||
console.log(`Fetched ${pulls.length} open pull requests for ${user}`); | ||
return open_prs; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import { formatISO, parseISO, startOfDay, subDays } from "date-fns"; | ||
import { IGitHubEvent, ProcessData } from "./types.js"; | ||
import { fetch_merge_events, fetchOpenPulls } from "./fetchUserData.js"; | ||
import { fetchEvents } from "./fetchEvents.js"; | ||
import { parseEvents } from "./parseEvents.js"; | ||
import { merged_data } from "./saveData.js"; | ||
|
||
let processedData: ProcessData = {}; | ||
|
||
const scrapeGitHub = async ( | ||
org: string, | ||
date: string, | ||
numDays: number = 1, | ||
): Promise<void> => { | ||
const endDate: Date = startOfDay(parseISO(date)); | ||
const startDate: Date = startOfDay(subDays(endDate, numDays)); | ||
console.log( | ||
`Scraping GitHub data for ${org} from ${formatISO(startDate)} to ${formatISO(endDate)}`, | ||
); | ||
|
||
const events: IGitHubEvent[] = (await fetchEvents( | ||
org, | ||
startDate, | ||
endDate, | ||
)) as IGitHubEvent[]; | ||
processedData = await parseEvents(events); | ||
|
||
for (const user of Object.keys(processedData)) { | ||
if (!processedData[user]) { | ||
processedData[user] = { | ||
authored_issue_and_pr: [], | ||
last_updated: "", | ||
activity: [], | ||
open_prs: [], | ||
}; | ||
} | ||
try { | ||
const merged_prs = await fetch_merge_events(user, org); | ||
for (const pr of merged_prs) { | ||
processedData[user].authored_issue_and_pr.push(pr); | ||
} | ||
} catch (e) { | ||
console.error(`Error fetching merge events for ${user}: ${e}`); | ||
} | ||
try { | ||
const open_prs = await fetchOpenPulls(user, org); | ||
for (const pr of open_prs) { | ||
processedData[user].open_prs.push(pr); | ||
} | ||
} catch (e) { | ||
console.error(`Error fetching open pulls for ${user}: ${e}`); | ||
} | ||
} | ||
|
||
console.log("Scraping completed"); | ||
}; | ||
|
||
// Type Done and check done | ||
const main = async () => { | ||
// Extract command line arguments (skip the first two default arguments) | ||
const args: string[] = process.argv.slice(2); | ||
|
||
// Destructure arguments with default values | ||
const [ | ||
orgName, | ||
dataDir, | ||
date = formatISO(subDays(new Date(), 1), { representation: "date" }), | ||
numDays = 1, | ||
] = args; | ||
|
||
if (!orgName || !dataDir) { | ||
console.error("Usage: node script.js <org> <dataDir> [date] [numDays]"); | ||
process.exit(1); | ||
} | ||
|
||
await scrapeGitHub(orgName, date, Number(numDays)); | ||
await merged_data(dataDir, processedData); | ||
console.log("Done"); | ||
}; | ||
|
||
main(); |
Oops, something went wrong.