Skip to content

Commit

Permalink
add words
Browse files Browse the repository at this point in the history
  • Loading branch information
samhess committed Nov 5, 2024
1 parent 5dfeb53 commit 22ae38d
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 39 deletions.
43 changes: 43 additions & 0 deletions scripts/checkVocabulary.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import db from '../src/lib/server/database.js'
import {readFile, writeFile} from 'fs/promises'

const contents = await readFile('./data/vocabulary.json.json')
const words = JSON.parse(contents)

for (const word of words) {
if (word.term.includes('(') || word.term.includes('/')) {
//console.log(word)
}
else {
const {term,type,language='eng'} = word
const partOfSpeech = type.split(', ')[0]
.replace(/([am])v/,'v')
.replace(/n pl/,'n')
.replace(/exclam/,'interj')
.replace(/^phr$/,'prep phr')
.replace(/phr v/,'phr-v')
.replace(/prep phr/,'prep-phr')
const w = await db.word.findUnique({where:{term_partOfSpeech_language:{term,partOfSpeech,language}}})
if (w) {
await db.word.update({
where: {term_partOfSpeech_language:{term,partOfSpeech,language}},
data: {
term,
language,
partOfSpeech
}
})
if (language!=='eng') {
console.log(`${partOfSpeech} ${word.term} ${language}`)
}
} else {
await db.word.create({
data: {
term,
language,
partOfSpeech
}
})
}
}
}
9 changes: 6 additions & 3 deletions scripts/data/vocabulary.json
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,8 @@
"forever (adv)",
"forget (v)",
"forgive (v)",
"fork (n) form (n)",
"fork (n)",
"form (n)",
"former (adj)",
"fortnight (n)",
"fortunately (adv)",
Expand Down Expand Up @@ -1146,7 +1147,8 @@
"giant (adj)",
"gift (n)",
"giraffe (n)",
"girl (n) girlfriend (n)",
"girl (n)",
"girlfriend (n)",
"give (v)",
"give back (phr v) • Give me back this book on Tuesday, please.",
"give in (phr v) • Have you given in your homework yet? • Do you give in? Shall I tell you the answer now?",
Expand Down Expand Up @@ -2254,7 +2256,8 @@
"river (n)",
"road (n)",
"roast (adj & v)",
"rob (v) robot (n)",
"rob (v)",
"robot (n)",
"rock (n) • rock (music) (n) • a hard piece of rock (n)",
"rocket (n) • They sent a rocket into space.",
"role (n)",
Expand Down
56 changes: 33 additions & 23 deletions scripts/data/vocabulary.json.json
Original file line number Diff line number Diff line change
Expand Up @@ -4205,15 +4205,13 @@
"type": "adv"
},
{
"term": "fall (n & v) • in the fall",
"term": "fall",
"examples": [
"in the fall (n) (Am Eng) (Br Eng: autumn )",
"a fall in the price (n)",
"He fell and hurt his leg. (v)"
],
"language": "ena",
"dialect": " (Am Eng) (Br Eng: autumn ) • a fall in the price (n) • He fell and hurt his leg. (v)",
"type": "n"
"type": "n, v"
},
{
"term": "false",
Expand Down Expand Up @@ -4411,7 +4409,8 @@
"type": "n"
},
{
"term": "film star"
"term": "film star",
"type": "n"
},
{
"term": "final",
Expand Down Expand Up @@ -4654,7 +4653,11 @@
"type": "v"
},
{
"term": "fork (n) form",
"term": "fork",
"type": "n"
},
{
"term": "form",
"type": "n"
},
{
Expand Down Expand Up @@ -4996,7 +4999,11 @@
"type": "n"
},
{
"term": "girl (n) girlfriend",
"term": "girl",
"type": "n"
},
{
"term": "girlfriend",
"type": "n"
},
{
Expand Down Expand Up @@ -5912,10 +5919,6 @@
"term": "hut",
"type": "n"
},
{
"term": "",
"type": "pron"
},
{
"term": "ice",
"type": "n"
Expand Down Expand Up @@ -7014,7 +7017,11 @@
"type": "v"
},
{
"term": "look out (phr v) Look out!"
"term": "look out",
"type": "phr v",
"examples": [
"Look out!"
]
},
{
"term": "look up",
Expand Down Expand Up @@ -10091,7 +10098,11 @@
"type": "adj, v"
},
{
"term": "rob (v) robot",
"term": "rob",
"type": "v"
},
{
"term": "robot",
"type": "n"
},
{
Expand Down Expand Up @@ -11060,12 +11071,12 @@
"type": "adj"
},
{
"term": "smart (adj) • a smart idea",
"term": "smart",
"examples": [
"a smart idea (Am Eng)",
"smart clothes"
],
"language": "ena"
"type": "adj"
},
{
"term": "smartphone",
Expand Down Expand Up @@ -12331,11 +12342,8 @@
"type": "n"
},
{
"term": "through"
},
{
"term": "",
"type": "prep"
"term": "through",
"type": "adj, adv, prep"
},
{
"term": "throw",
Expand Down Expand Up @@ -12722,15 +12730,16 @@
{
"term": "truck",
"language": "ena",
"dialect": " (Am Eng) (Br Eng: lorry )"
"dialect": " (Am Eng) (Br Eng: lorry )",
"type": "n"
},
{
"term": "trunk",
"examples": [
"the trunk of a car"
],
"language": "ena",
"dialect": " (Am Eng) (Br Eng: boot ) • the trunk of a car",
"dialect": " (Am Eng) (Br Eng: boot )",
"type": "n"
},
{
Expand Down Expand Up @@ -13925,7 +13934,8 @@
"type": "n"
},
{
"term": "zebra"
"term": "zebra",
"type": "n"
},
{
"term": "zero",
Expand Down
13 changes: 13 additions & 0 deletions scripts/parseCambridge.js
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,20 @@ async function getVocabulary() {
terms[index-1] += ' ' + term
terms.splice(index,1)
}
if (term==='fork (n) form (n)') {
terms[index] = 'fork (n)'
terms.splice(index+1,0,'form (n)')
}
if (term==='girl (n) girlfriend (n)') {
terms[index] = 'girl (n)'
terms.splice(index+1,0,'girlfriend (n)')
}
if (term==='rob (v) robot (n)') {
terms[index] = 'rob (v)'
terms.splice(index+1,0,'robot (n)')
}
}

return terms
}

Expand Down
44 changes: 31 additions & 13 deletions scripts/parseVocabulary.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import {readFile, writeFile} from 'fs/promises'
const contents = await readFile('./data/vocabulary.json')
const terms = JSON.parse(contents)

const outFile = './data/vocabulary.json.json'

const endings = [
'(av & v)',
'(adj)',
Expand Down Expand Up @@ -75,29 +77,29 @@ const endings = [
]

const words = []
for (const term of terms) {
for (const [index,term] of terms.entries()) {
const word = {term}
const bulletPos = term.indexOf(' •')
if (bulletPos !== -1) {
word.examples = term.slice(bulletPos).replace(' • ','').split(' • ')
word.term = term.slice(0,bulletPos)
word.examples = term.slice(bulletPos).replace(' • ','').split(' • ')
}
const enbPos = term.indexOf(' (Br Eng) (Am Eng:')
const enbPos = word.term.indexOf(' (Br Eng) (Am Eng:')
if (enbPos !== -1) {
word.language = 'enb'
word.dialect = term.slice(enbPos)
word.term = term.slice(0,enbPos)
word.dialect = word.term.slice(enbPos)
word.term = word.term.slice(0,enbPos)
}
const enaPos = term.indexOf(' (Am Eng) (Br Eng:')
const enaPos = word.term.indexOf(' (Am Eng) (Br Eng:')
if (enaPos !== -1) {
word.language = 'ena'
word.dialect = term.slice(enaPos)
word.term = term.slice(0,enaPos)
word.dialect = word.term.slice(enaPos)
word.term = word.term.slice(0,enaPos)
}
const enaPos2 = term.indexOf(' (Am Eng)')
const enaPos2 = word.term.indexOf(' (Am Eng)')
if (enaPos2 !== -1) {
word.language = 'ena'
word.term = term.slice(0,enaPos2)
word.term = word.term.slice(0,enaPos2)
}
for (const ending of endings) {
const term = word.term
Expand All @@ -113,8 +115,24 @@ for (const term of terms) {
word.type = ending.slice(1,-1).replace(' &', ',')
}
}
if (word.term.endsWith(')')) console.log(word.term)
words.push(word)
if (['film star', 'truck', 'zebra'].includes(word.term)) {
word.type = 'n'
}
if (word.term === 'through') {
word.type = 'adj, adv, prep'
}
if (word.term.startsWith('look out')) {
word.term = 'look out'
word.type = 'phr v'
word.examples = ['Look out!']
}
if (word.term.endsWith(')')) {
//console.log(word.term)
}
if (word.term) {
words.push(word)
}
}

await writeFile('./data/vocabulary.json.json', JSON.stringify(words,null,2))
await writeFile(outFile, JSON.stringify(words,null,2))
console.log(`${words.length} words written to ${outFile}`)

0 comments on commit 22ae38d

Please sign in to comment.