Skip to content

Commit

Permalink
scripts: Translate via GPT-4
Browse files Browse the repository at this point in the history
  • Loading branch information
schneefux committed Mar 7, 2024
1 parent 158ce6a commit 542813f
Show file tree
Hide file tree
Showing 57 changed files with 32,405 additions and 2,393 deletions.
1 change: 0 additions & 1 deletion scripts/download_translations.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ if (net.setDefaultAutoSelectFamily) {

const { createWriteStream } = require('fs')
const fs = require('fs').promises
const { promisify } = require('util')
const { pipeline } = require('stream/promises')

const starlistUrl = process.env.BRAWLAPI_URL || 'https://api.brawlapi.com/v1/';
Expand Down
704 changes: 704 additions & 0 deletions scripts/en.yaml

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions scripts/merge_translation.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const { readFile, writeFile } = require('fs/promises')
const yaml = require('js-yaml')

async function main() {
const langs = ['bn', 'br', 'cn', 'cz', 'de', 'es', 'fi', 'fr', 'hi', 'it', 'jp', 'kr', 'lt', 'nl', 'pl', 'pt', 'ru', 'sk', 'sv', 'tr', 'vi', 'zh']

for (const lang of langs) {
let humanTranslation = {}
try {
humanTranslation = JSON.parse(await readFile(`./translations/traduora/${lang}.json`, 'utf8'))
} catch (e) { }
const yamlString = await readFile(`./translations/auto/${lang}.yaml`, 'utf8')
const parsed = yaml.load(yamlString)
const merged = Object.assign({}, humanTranslation, parsed)
await writeFile(`../web/locales/${lang}.json`, JSON.stringify(merged, null, 2))
}
}

main().catch(console.error)
6 changes: 4 additions & 2 deletions scripts/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
"license": "UNLICENSED",
"dependencies": {
"axios": "^0.27.2",
"js-yaml": "^4.1.0",
"jsdom": "^20.0.3",
"openai": "^4.28.4",
"string-similarity": "^4.0.4",
"wtf_wikipedia": "^10.0.5",
"wtf-plugin-api": "^1.0.1"
"wtf-plugin-api": "^1.0.1",
"wtf_wikipedia": "^10.0.5"
}
}
68 changes: 68 additions & 0 deletions scripts/translate.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
const { readFile, writeFile, mkdir } = require('fs/promises')
const OpenAI = require('openai')
const yaml = require('js-yaml')

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })

async function translateChunk(chunk, targetLanguage) {
const completion = await openai.chat.completions.create({
messages: [
{
role: 'system',
content: `
You are a translator for the Brawl Stars app "Brawl Time Ninja".
Translate the following YAML values from 'en' to '${targetLanguage}'.
Output the YAML values in the same format as the input.
Use child-friendly, informal language.`,
},
{
role: 'user',
content: chunk,
},
],
model: 'gpt-4-turbo-preview',
temperature: 0.2,
})

console.log(`consumed ${completion.usage.prompt_tokens} input tokens, ${completion.usage.completion_tokens} completion tokens`)

const translation = completion.choices[0].message.content
return translation.replace(/^```yaml\n/, '').replace(/```$/, '') + '\n'
}

async function translate(lang) {
const enJson = JSON.parse(await readFile('../web/locales/en.json', 'utf8'))
const targetYamlPath = `./translations/auto/${lang}.yaml`

await writeFile(targetYamlPath, '', 'utf-8')

// gpt-4 output is limited to 4096 tokens which is about 200 lines
const chunkSize = 50 // chunk of 50: ~750 prompt tokens, ~2k completion tokens
const allKeys = Object.keys(enJson)
for (let i = 0; i < allKeys.length; i += chunkSize) {
const enChunk = yaml.dump(Object.fromEntries(Object.entries(enJson).slice(i, i + chunkSize)), { forceQuotes: true })

console.log(`Translating chunk ${i / chunkSize + 1} of ${Math.ceil(allKeys.length / chunkSize)} to ${lang}`)
const translatedChunk = await translateChunk(enChunk, lang)

await writeFile(targetYamlPath, translatedChunk, { flag: 'a', 'encoding': 'utf-8' })
}
}

async function main() {
// full run is ~ $10

const langs = ['bn', 'br', 'cn', 'cz', 'de', 'es', 'fi', 'fr', 'hi', 'it', 'jp', 'kr', 'lt', 'nl', 'pl', 'pt', 'ru', 'sk', 'sv', 'tr', 'vi', 'zh']
mkdir('./auto-translations', { recursive: true })
for (const lang of langs) {
try {
await readFile(`./translations/auto/${lang}.yaml`, 'utf8')
console.log(`Skipping ${lang}.yaml because it already exists`)
continue
} catch (e) { }

await translate(lang)
}
}

main().catch(console.error)
647 changes: 647 additions & 0 deletions scripts/translations/auto/bn.yaml

Large diffs are not rendered by default.

Loading

0 comments on commit 542813f

Please sign in to comment.