-
Notifications
You must be signed in to change notification settings - Fork 94
/
Copy pathlabel_data_utils.ts
192 lines (177 loc) · 5.86 KB
/
label_data_utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import { readdirSync, statSync } from 'fs';
import path from 'path';
import { readFileAsObj } from './utils';
export type PlatformNames = 'GitHub' | 'Gitee' | 'AtomGit' | 'GitLab.com' | 'GitLab.cn' | 'Gitea';
const labelInputDir = '../labeled_data';
const labelInputPath = path.join(__dirname, labelInputDir);
const checkKeysAndTypes = {
labelTypes: new Set<string>([
'Region-0', 'Region-1', 'Company', 'Community', 'Project', 'Foundation', 'University-0', 'Agency-0', 'Institution', 'Tech-0', 'Tech-1', 'Tech-2', 'Tech-3', 'Domain-0', 'Bot'
]),
labelKeys: new Set<string>([
'labels', 'platforms'
]),
platformsNames: new Set([
'GitHub', 'Gitee', 'AtomGit', 'GitLab.com', 'GitLab.cn', 'Gitea',
]),
platformTypes: new Set<string>([
'Code Hosting'
]),
platformKeys: new Set<string>([
'repos', 'orgs', 'users'
]),
};
interface CodeHostingPlatformItem {
id: number;
name: string;
}
interface CodeHostingPlatformData {
name: string;
type: string;
orgs: CodeHostingPlatformItem[],
repos: CodeHostingPlatformItem[],
users: CodeHostingPlatformItem[],
}
interface LabelItem {
identifier: string;
content: {
name: string;
type: string;
meta?: any;
data: any;
},
parents: string[];
children: string[];
parsed: boolean;
platforms: CodeHostingPlatformData[];
}
interface ParsedLabelItem {
identifier: string;
meta?: any;
type: string;
name: string;
parents: string[];
children: string[];
platforms: CodeHostingPlatformData[];
}
export function getLabelData(injectLabelData?: any[]): ParsedLabelItem[] {
if (!statSync(labelInputPath).isDirectory()) {
throw new Error(`${labelInputPath} input path is not a directory.`);
}
const labelMap = new Map<string, LabelItem>();
const indexFileName = `${path.sep}index.yml`;
const labelFileSuffix = '.yml';
readPath(labelInputPath, '', f => {
if (!f.endsWith('.yml')) return;
// convert windows favor path to linux favor path
const identifier = processLabelIdentifier(`:${(f.endsWith(indexFileName) ? f.slice(0, f.indexOf(indexFileName)) : f.slice(0, f.indexOf(labelFileSuffix)))}`);
const content = readFileAsObj(path.join(labelInputPath, f));
labelMap.set(identifier, {
identifier,
content,
platforms: [],
parents: [],
children: [],
parsed: false,
});
});
const data = processLabelItems(labelMap);
if (injectLabelData) injectLabelData.forEach(l => data.push(l));
return data;
}
function readPath(p: string, base: string, fileProcessor: (f: string) => void) {
if (!statSync(p).isDirectory()) {
fileProcessor(base);
} else {
for (const f of readdirSync(p)) {
readPath(path.join(p, f), path.join(base, f), fileProcessor);
}
}
}
function processLabelItems(map: Map<string, LabelItem>): ParsedLabelItem[] {
for (const item of map.values()) {
parseItem(item, map);
}
return Array.from(map.values()).map(item => {
const ret = {
identifier: item.identifier,
meta: item.content.meta,
type: item.content.type,
name: item.content.name,
platforms: item.platforms,
parents: item.parents,
children: item.children,
};
return ret;
});
}
function mergePlatforms(...platformsArray: CodeHostingPlatformData[][]) {
const platforms: CodeHostingPlatformData[] = [];
platformsArray.forEach(ps => {
ps.forEach(p => {
const platform = platforms.find(pp => pp.name === p.name && pp.type === p.type);
if (!platform) {
platforms.push(JSON.parse(JSON.stringify(p)));
} else {
['orgs', 'repos', 'users'].forEach(key => {
if (p[key]) {
p[key].forEach(i => {
if (!platform[key].find(ii => ii.id === i.id)) platform[key].push(i);
});
}
});
}
});
});
return platforms;
}
function parseItem(item: LabelItem, map: Map<string, LabelItem>) {
if (item.parsed) return;
if (item.content.type && !checkKeysAndTypes.labelTypes.has(item.content.type)) {
throw new Error(`Not supported type ${item.content.type}`)
}
for (const key in item.content.data) {
if (!checkKeysAndTypes.labelKeys.has(key)) {
throw new Error(`Not supported element=${key}, identifier=${item.identifier}`);
}
switch (key) {
case 'platforms':
// process platforms first
const platforms = JSON.parse(JSON.stringify(item.content.data[key]));
item.platforms = mergePlatforms(item.platforms, platforms);
['orgs', 'repos', 'users'].forEach(k => item.platforms.forEach(p => p[k] = p[k] ?? []));
break;
case 'labels':
const labels: string[] = item.content.data[key];
for (const label of labels) {
const identifier = label.startsWith(':') ? label : processLabelIdentifier(path.join(item.identifier, label));
const innerItem = map.get(identifier);
if (!innerItem) {
throw new Error(`Can not find nest identifier ${identifier} for ${item.identifier}`);
}
if (!innerItem.parsed) {
parseItem(innerItem, map);
}
// set parents and children relationships
innerItem.parents.push(item.identifier);
item.children.push(innerItem.identifier);
// merge platforms
item.platforms = mergePlatforms(item.platforms, innerItem.platforms);
}
break;
default:
break;
}
}
item.parsed = true;
}
function processLabelIdentifier(identifier: string): string {
return identifier.split(path.sep).join(path.posix.sep);
}
export function getPlatformData(typeOrIds: string[], injectLabelData?: any[]): CodeHostingPlatformData[] {
if (typeOrIds.length === 0) return [];
const data = getLabelData(injectLabelData);
if (!data) return [];
const arr = data.filter(i => typeOrIds.includes(i.type) || typeOrIds.includes(i.identifier));
return mergePlatforms(...arr.map(item => item.platforms));
}