Skip to content

Commit

Permalink
Implement logic to fetch discussion within daterange (updated or crea…
Browse files Browse the repository at this point in the history
…ted) and merge with old data
  • Loading branch information
dgparmar14 committed Jul 22, 2024
1 parent cc6993c commit 355b5ad
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 72 deletions.
1 change: 0 additions & 1 deletion components/Markdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import remarkGfm from "remark-gfm";
import remarkRehype from "remark-rehype";
import rehypeStringify from "rehype-stringify";
import clsx from "clsx";
import { useMemo } from "react";

export default function Markdown(props: {
children: string;
Expand Down
126 changes: 77 additions & 49 deletions scraper/src/github-scraper/discussion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ import { octokit } from "./config.js";
import { Discussion, ParsedDiscussion } from "./types.js";
import { saveDiscussionData } from "./utils.js";

// Query to fetch discussions from GitHub
const query = `
query($org: String!, $cursor: String) {
const query = `query($org: String!, $cursor: String) {
organization(login: $org) {
repositories(first: 100, after: $cursor) {
pageInfo {
Expand All @@ -14,7 +12,7 @@ query($org: String!, $cursor: String) {
edges {
node {
name
discussions(first: 100) {
discussions(first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
edges {
node {
title
Expand All @@ -24,9 +22,9 @@ query($org: String!, $cursor: String) {
}
url
isAnswered
category{
category {
name
emojiHTML
emojiHTML
}
comments(first: 10) {
edges {
Expand All @@ -38,67 +36,93 @@ query($org: String!, $cursor: String) {
}
}
createdAt
updatedAt
}
}
}
}
}
}
}
}
`;

async function fetchGitHubDiscussions(org: string, cursor = null) {
const variables = { org, cursor };
const response = await octokit.graphql.paginate(query, variables);
}`;

type Edge = typeof response.organization.repositories.edges;
async function fetchGitHubDiscussions(
org: string,
endDate: Date,
startDate: Date,
cursor = null,
) {
const variables = {
org,
cursor,
};
for await (const response of octokit.graphql.paginate.iterator(
query,
variables,
)) {
const repositories = await response.organization.repositories.edges;
type repo = (typeof repositories)[0];
for (const repo of repositories) {
const discussions = await repo.node.discussions.edges.map(
(discussion: repo) => ({
repoName: repo.node.name,
discussion: discussion.node,
}),
);
const discussionsWithinDateRange = await discussions.find((d: repo) => {
const createdAt = new Date(d.discussion.createdAt);
const updatedAt = new Date(d.discussion.updatedAt);

const discussions = response.organization.repositories.edges.map(
(edge: Edge) => ({
repoName: edge.node.name,
discussions: edge.node.discussions.edges,
}),
);
return (
createdAt >= new Date(startDate) || updatedAt >= new Date(startDate)
);
});
if (discussionsWithinDateRange) {
return discussions;
}
}
}

return discussions;
return null;
}

async function parseDiscussionData(
allDiscussions: { repoName: string; discussions: Discussion[] }[],
allDiscussions: { repoName: string; discussion: Discussion }[],
endDate: Date,
startDate: Date,
) {
const parsedDiscussions: ParsedDiscussion[] = allDiscussions.flatMap(
(repo) => {
const filteredDiscussions = repo.discussions.filter((d) => {
const discussionTime: Date = new Date(d.node.createdAt);
return discussionTime > startDate && discussionTime <= endDate;
});
const discussionsWithinDateRange = allDiscussions.filter((d) => {
const createdAt = new Date(d.discussion.createdAt);
const updatedAt = new Date(d.discussion.updatedAt);

return filteredDiscussions.map((d) => {
const participants = Array.from(
new Set(d.node.comments.edges.map((c) => c.node.author.login)),
);
return {
source: "github",
title: d.node.title,
text: d.node.body,
author: d.node.author.login,
link: d.node.url,
isAnswered: d.node.isAnswered,
time: d.node.createdAt,
category: {
name: d.node.category.name,
emoji: d.node.category.emojiHTML.replace(/<\/?div>/g, ""),
},
participants: participants,
repoName: repo.repoName,
};
});
return (
(createdAt >= new Date(startDate) && createdAt <= new Date(endDate)) ||
(updatedAt >= new Date(startDate) && updatedAt <= new Date(endDate))
);
});
const parsedDiscussions: ParsedDiscussion[] = discussionsWithinDateRange.map(
(d) => {
const participants = d.discussion.comments.edges.map(
(comment) => comment.node.author.login,
);
return {
source: "github",
title: d.discussion.title,
text: d.discussion.body,
author: d.discussion.author.login,
link: d.discussion.url,
isAnswered: d.discussion.isAnswered,
time: d.discussion.createdAt,
updateTime: d.discussion.updatedAt,
category: {
name: d.discussion.category.name,
emoji: d.discussion.category.emojiHTML.replace(/<\/?div>/g, ""),
},
participants: participants || [],
repoName: d.repoName,
};
},
);

return parsedDiscussions;
}

Expand All @@ -109,7 +133,11 @@ export async function scrapeDiscussions(
startDate: Date,
) {
try {
const allDiscussions = await fetchGitHubDiscussions(organizationName);
const allDiscussions = await fetchGitHubDiscussions(
organizationName,
endDate,
startDate,
);
const parsedDiscussions = await parseDiscussionData(
allDiscussions,
endDate,
Expand Down
42 changes: 21 additions & 21 deletions scraper/src/github-scraper/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,29 +173,28 @@ export interface AuthoredIssueAndPr {
}

export type Discussion = {
node: {
isAnswered: Boolean;
title: string;
body: string;
author: {
login: string;
};
url: string;
category: {
name: string;
emojiHTML: string;
};
comments: {
edges: {
node: {
author: {
login: string;
};
isAnswered: Boolean;
title: string;
body: string;
author: {
login: string;
};
url: string;
category: {
name: string;
emojiHTML: string;
};
comments: {
edges: {
node: {
author: {
login: string;
};
}[];
};
createdAt: string;
};
}[];
};
createdAt: string;
updatedAt: string;
};

export type ParsedDiscussion = {
Expand All @@ -206,6 +205,7 @@ export type ParsedDiscussion = {
link: string;
isAnswered: Boolean;
time: string;
updateTime: string;
category?: {
name: string;
emoji: string;
Expand Down
29 changes: 28 additions & 1 deletion scraper/src/github-scraper/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,32 @@ export async function saveUserData(
}
}

export async function mergeDiscussions(
oldData: ParsedDiscussion[],
newDiscussions: ParsedDiscussion[],
) {
const mergedDiscussions = [...oldData];

newDiscussions.forEach((newDiscussion) => {
const oldIndex = oldData.findIndex(
(oldDiscussion) => oldDiscussion.link === newDiscussion.link,
);

if (oldIndex !== -1) {
if (
oldData[oldIndex].updateTime !== newDiscussion.updateTime ||
oldData[oldIndex].participants !== newDiscussion.participants
) {
mergedDiscussions[oldIndex] = newDiscussion;
}
} else {
mergedDiscussions.push(newDiscussion);
}
});

return mergedDiscussions;
}

export async function saveDiscussionData(
discussions: ParsedDiscussion[],
dataDir: string,
Expand All @@ -184,7 +210,8 @@ export async function saveDiscussionData(
// Try reading the file
const response = await readFile(file);
const oldData = JSON.parse(response.toString());
const newData = oldData.concat(discussions);

const newData = await mergeDiscussions(oldData, discussions);
const jsonData = JSON.stringify(newData, null, 2);
await writeFile(file, jsonData);
} catch (err) {
Expand Down

0 comments on commit 355b5ad

Please sign in to comment.