-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.js
162 lines (144 loc) · 4.7 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
const fs = require('fs')
const yaml = require('js-yaml')
const { fetchTracks, fetchCollections, fetchUsers } = require('./lib/queries')
const {
getLatestSiteMapNumber,
createRootSiteIndex,
createOrAppendSiteMap,
createOrAppendSiteIndex,
createSiteDefaults
} = require('./lib/sitemap')
const argv = require('minimist')(process.argv.slice(2))
console.log('Invoked with args: ', argv)
const ROOT = './sitemaps'
const DEFAULTS = './sitemaps/defaults.xml'
const INDEX = './sitemaps/index.xml'
const TRACKS_INDEX = './sitemaps/tracks/index.xml'
const COLLECTIONS_INDEX = './sitemaps/collections/index.xml'
const USERS_INDEX = './sitemaps/users/index.xml'
const TRACKS_SITE_MAP_ROOT = './sitemaps/tracks'
const COLLECTIONS_SITE_MAP_ROOT = './sitemaps/collections'
const USERS_SITE_MAP_ROOT = './sitemaps/users'
const LATEST = './sitemaps/latest.yml'
const DEFAULTS_CONFIG = './defaults.yml'
const makeDirs = () => {
if (!fs.existsSync(ROOT)) {
fs.mkdirSync(ROOT)
}
if (!fs.existsSync(TRACKS_SITE_MAP_ROOT)) {
fs.mkdirSync(TRACKS_SITE_MAP_ROOT)
}
if (!fs.existsSync(COLLECTIONS_SITE_MAP_ROOT)) {
fs.mkdirSync(COLLECTIONS_SITE_MAP_ROOT)
}
if (!fs.existsSync(USERS_SITE_MAP_ROOT)) {
fs.mkdirSync(USERS_SITE_MAP_ROOT)
}
}
// Read stateful config to help us know where to index from
const readLatest = () => {
let ret
try {
const { latest } = yaml.safeLoad(
fs.readFileSync(LATEST, 'utf8')
)
ret = {
latestTrack: latest.track,
latestCollection: latest.collection,
latestUser: latest.user
}
} catch (e) {
ret = {
latestTrack: 0,
latestCollection: 0,
latestUser: 0
}
}
console.log('\nLatest: ', ret, '\n')
return ret
}
// Updates the stateful config
const updateLatest = (latestTrack, latestCollection, latestUser) => {
const obj = {
latest: {
track: latestTrack,
collection: latestCollection,
user: latestUser
}
}
console.log('\nUpdated Latest: ', obj.latest, '\n')
const y = yaml.safeDump(obj)
fs.writeFileSync(LATEST, y)
}
/**
* Indexes audius and creates/appends to sitemaps in the ./sitemaps folder.
* This script is intended to be run as a cron-job and keep state of what it
* has already indexed.
*/
const run = async () => {
try {
makeDirs()
// Create defaults sitemap
createSiteDefaults(DEFAULTS, DEFAULTS_CONFIG)
// Create a master site index if needed
createRootSiteIndex(INDEX, TRACKS_INDEX, COLLECTIONS_INDEX, USERS_INDEX, DEFAULTS)
// Read from the latest.yml config to determine where to start indexing
const { latestTrack, latestCollection, latestUser } = readLatest()
// Fetch tracks, collections, and users for processing
const [fetchedTracks, fetchedCollections, fetchedUsers] = await Promise.all([
fetchTracks(latestTrack, argv.count),
fetchCollections(latestCollection, argv.count),
fetchUsers(latestUser, argv.count)
])
const { items: tracks, latest: newLatestTrack } = fetchedTracks
const { items: collections, latest: newLatestCollection } = fetchedCollections
const { items: users, latest: newLatestUser } = fetchedUsers
console.log('\n')
console.log(`Tracks: Fetched ${newLatestTrack - latestTrack} new`)
console.log(`Collections: Fetched ${newLatestCollection - latestCollection} new`)
console.log(`Users: Fetched ${newLatestUser - latestUser} new`)
console.log('\n')
// Create or append to sitemaps (potentially generating new ones because
// the maximum size of a sitemap is 50K entrires)
const latestTrackNumber = await createOrAppendSiteMap(
tracks,
getLatestSiteMapNumber(latestTrack),
TRACKS_SITE_MAP_ROOT
)
const latestCollectionNumber = await createOrAppendSiteMap(
collections,
getLatestSiteMapNumber(latestCollection),
COLLECTIONS_SITE_MAP_ROOT
)
const latestUserNumber = await createOrAppendSiteMap(
users,
getLatestSiteMapNumber(latestUser),
USERS_SITE_MAP_ROOT
)
// Create or append to sub-indexes
// Note: These may need to be root indexes for most crawlers as
// nesting isn't always supported
await createOrAppendSiteIndex(
TRACKS_INDEX,
TRACKS_SITE_MAP_ROOT,
latestTrackNumber
)
await createOrAppendSiteIndex(
COLLECTIONS_INDEX,
COLLECTIONS_SITE_MAP_ROOT,
latestCollectionNumber
)
await createOrAppendSiteIndex(
USERS_INDEX,
USERS_SITE_MAP_ROOT,
latestUserNumber
)
// Update the latest.yml file that keeps track of what we've indexed
updateLatest(newLatestTrack, newLatestCollection, newLatestUser)
process.exit()
} catch (e) {
console.error('Some error occured, output may be corrupted')
console.error(e)
}
}
run()