Skip to content

Commit

Permalink
Fix OCR command when using it in images that are in Chinese
Browse files Browse the repository at this point in the history
  • Loading branch information
MrPowerGamerBR committed Mar 30, 2024
1 parent 494e36c commit 4bb5f78
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import net.perfectdreams.loritta.cinnamon.discord.interactions.commands.Cinnamon
import net.perfectdreams.loritta.cinnamon.discord.interactions.commands.options.LocalizedApplicationCommandOptions
import net.perfectdreams.loritta.cinnamon.discord.interactions.commands.styled
import net.perfectdreams.loritta.cinnamon.discord.interactions.vanilla.utils.declarations.TranslateCommand
import net.perfectdreams.loritta.cinnamon.discord.utils.google.Language
import net.perfectdreams.loritta.cinnamon.discord.utils.google.GoogleTranslateLanguage
import net.perfectdreams.loritta.cinnamon.discord.utils.toKordColor
import net.perfectdreams.loritta.cinnamon.emotes.Emotes
import net.perfectdreams.loritta.common.utils.LorittaColors
Expand All @@ -20,11 +20,10 @@ class TranslateExecutor(loritta: LorittaBot) : CinnamonSlashCommandExecutor(lori
val cinnamonAutocomplete: (AutocompleteContext, FocusedCommandOption, Boolean) -> (Map<String, String>) = { autocompleteContext, focusedCommandOption, includeAuto ->
val value = focusedCommandOption.value

Language.values()
GoogleTranslateLanguage.values()
.asSequence()
.filter { it != Language.UNDETERMINED }
.filter {
if (!includeAuto) it != Language.AUTO_DETECT else true
if (!includeAuto) it != GoogleTranslateLanguage.AUTO_DETECT else true
}
.filter {
autocompleteContext.i18nContext.get(it.languageNameI18nKey).startsWith(value, true)
Expand Down Expand Up @@ -53,7 +52,7 @@ class TranslateExecutor(loritta: LorittaBot) : CinnamonSlashCommandExecutor(lori

override suspend fun execute(context: ApplicationCommandContext, args: SlashCommandArguments) {
val from = try {
Language.fromLanguageCode(args[options.from])
GoogleTranslateLanguage.fromLanguageCode(args[options.from])
} catch (e: NoSuchElementException) {
context.failEphemerally {
styled(
Expand All @@ -64,7 +63,7 @@ class TranslateExecutor(loritta: LorittaBot) : CinnamonSlashCommandExecutor(lori
}

val to = try {
Language.fromLanguageCode(args[options.to])
GoogleTranslateLanguage.fromLanguageCode(args[options.to])
} catch (e: NoSuchElementException) {
context.failEphemerally {
styled(
Expand All @@ -74,7 +73,7 @@ class TranslateExecutor(loritta: LorittaBot) : CinnamonSlashCommandExecutor(lori
}
}

if (to == Language.AUTO_DETECT) {
if (to == GoogleTranslateLanguage.AUTO_DETECT) {
context.failEphemerally {
styled(
context.i18nContext.get(TranslateCommand.I18N_PREFIX.InvalidLanguage),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package net.perfectdreams.loritta.cinnamon.discord.utils.google

import net.dv8tion.jda.api.interactions.DiscordLocale

object GoogleAPIUtils {
val DISCORD_LOCALE_TO_LANGUAGE_MAP = DiscordLocale.values().map {
it to when (it) {
DiscordLocale.PORTUGUESE_BRAZILIAN -> GoogleVisionLanguage.PORTUGUESE
DiscordLocale.BULGARIAN -> GoogleVisionLanguage.BULGARIAN
DiscordLocale.CHINESE_CHINA -> GoogleVisionLanguage.SIMPLIFIED_CHINESE
DiscordLocale.CHINESE_TAIWAN -> GoogleVisionLanguage.TRADITIONAL_CHINESE
DiscordLocale.CROATIAN -> GoogleVisionLanguage.CROATIAN
DiscordLocale.CZECH -> GoogleVisionLanguage.CZECH
DiscordLocale.DANISH -> GoogleVisionLanguage.DANISH
DiscordLocale.DUTCH -> GoogleVisionLanguage.DUTCH
DiscordLocale.FINNISH -> GoogleVisionLanguage.FINNISH
DiscordLocale.FRENCH -> GoogleVisionLanguage.FRENCH
DiscordLocale.GERMAN -> GoogleVisionLanguage.GERMAN
DiscordLocale.GREEK -> GoogleVisionLanguage.GREEK
DiscordLocale.HINDI -> GoogleVisionLanguage.HINDI
DiscordLocale.HUNGARIAN -> GoogleVisionLanguage.HUNGARIAN
DiscordLocale.ITALIAN -> GoogleVisionLanguage.ITALIAN
DiscordLocale.JAPANESE -> GoogleVisionLanguage.JAPANESE
DiscordLocale.KOREAN -> GoogleVisionLanguage.KOREAN
DiscordLocale.LITHUANIAN -> GoogleVisionLanguage.LITHUANIAN
DiscordLocale.NORWEGIAN -> GoogleVisionLanguage.NORWEGIAN
DiscordLocale.POLISH -> GoogleVisionLanguage.POLISH
DiscordLocale.ROMANIAN_ROMANIA -> GoogleVisionLanguage.ROMANIAN
DiscordLocale.RUSSIAN -> GoogleVisionLanguage.RUSSIAN
DiscordLocale.SPANISH -> GoogleVisionLanguage.SPANISH
DiscordLocale.SWEDISH -> GoogleVisionLanguage.SWEDISH
DiscordLocale.THAI -> GoogleVisionLanguage.THAI
DiscordLocale.TURKISH -> GoogleVisionLanguage.TURKISH
DiscordLocale.UKRAINIAN -> GoogleVisionLanguage.UKRAINIAN
DiscordLocale.VIETNAMESE -> GoogleVisionLanguage.VIETNAMESE
DiscordLocale.ENGLISH_UK -> GoogleVisionLanguage.ENGLISH
else -> GoogleVisionLanguage.ENGLISH
}
}.toMap()

// This is a bit of a hack
fun fromVisionLanguageToTranslateLanguage(visionLanguage: GoogleVisionLanguage) = GoogleTranslateLanguage.valueOf(visionLanguage.name)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package net.perfectdreams.loritta.cinnamon.discord.utils.google

import net.perfectdreams.i18nhelper.core.keydata.StringI18nData
import net.perfectdreams.i18nhelper.core.keys.StringI18nKey
import net.perfectdreams.loritta.common.utils.text.TextUtils

enum class GoogleTranslateLanguage(val code: String) {
AUTO_DETECT("auto"), // Not a "real language"
UNDETERMINED("und"),
AFRIKAANS("af"),
ALBANIAN("sq"),
AMHARIC("am"),
ARABIC("ar"),
ARMENIAN("hy"),
AZERBAIJANI("az"),
BASQUE("eu"),
BELARUSIAN("be"),
BENGALI("bn"),
BOSNIAN("bs"),
BULGARIAN("bg"),
CATALAN("ca"),
CEBUANO("ceb"),
SIMPLIFIED_CHINESE("zh-CN"), // Google Translate uses "zh-CN" for simplified chinese, however Google Vision uses only "zh"
TRADITIONAL_CHINESE("zh-TW"),
CORSICAN("co"),
CROATIAN("hr"),
CZECH("cs"),
DANISH("da"),
DUTCH("nl"),
ENGLISH("en"),
ESPERANTO("eo"),
ESTONIAN("et"),
FINNISH("fi"),
FRENCH("fr"),
FRISIAN("fy"),
GALICIAN("gl"),
GEORGIAN("ka"),
GERMAN("de"),
GREEK("el"),
GUJARATI("gu"),
HAITIAN_CREOLE("ht"),
HAUSA("ha"),
HAWAIIAN("haw"),
HEBREW("he"),
HINDI("hi"),
HMONG("hmn"),
HUNGARIAN("hu"),
ICELANDIC("is"),
IGBO("ig"),
INDONESIAN("id"),
IRISH("ga"),
ITALIAN("it"),
JAPANESE("ja"),
JAVANESE("jv"),
KANNADA("kn"),
KAZAKH("kk"),
KHMER("km"),
KINYARWANDA("rw"),
KOREAN("ko"),
KURDISH("ku"),
KYRGYZ("ky"),
LAO("lo"),
LATIN("la"),
LATVIAN("lv"),
LITHUANIAN("lt"),
LUXEMBOURGISH("lb"),
MACEDONIAN("mk"),
MALAGASY("mg"),
MALAY("ms"),
MALAYALAM("ml"),
MALTESE("mt"),
MAORI("mi"),
MARATHI("mr"),
MONGOLIAN("mn"),
MYANMAR("my"),
NEPALI("ne"),
NORWEGIAN("no"),
NYANJA("ny"),
ODIA("or"),
PASHTO("ps"),
PERSIAN("fa"),
POLISH("pl"),
PORTUGUESE("pt"),
PUNJABI("pa"),
ROMANIAN("ro"),
RUSSIAN("ru"),
SAMOAN("sm"),
SCOTS_GAELIC("gd"),
SERBIAN("sr"),
SESOTHO("st"),
SHONA("sn"),
SINDHI("sd"),
SINHALA("si"),
SLOVAK("sk"),
SLOVENIAN("sl"),
SOMALI("so"),
SPANISH("es"),
SUNDANESE("su"),
SWAHILI("sw"),
SWEDISH("sv"),
TAGALOG("tl"),
TAJIK("tg"),
TAMIL("ta"),
TATAR("tt"),
TELUGU("te"),
THAI("th"),
TURKISH("tr"),
TURKMEN("tk"),
UKRAINIAN("uk"),
URDU("ur"),
UYGHUR("ug"),
UZBEK("uz"),
VIETNAMESE("vi"),
WELSH("cy"),
XHOSA("xh"),
YIDDISH("yi"),
YORUBA("yo"),
ZULU("zu");

// It would be better if it was a "when" clause, to avoid any languages missing their translation
// But alas, that would be too big and too boring to fill up
val languageNameI18nKey: StringI18nData
get() = StringI18nData(StringI18nKey("commands.command.translate.languages.${TextUtils.snakeToLowerCamelCase(this.name.lowercase())}"), emptyMap())

companion object {
fun fromLanguageCode(code: String) = GoogleTranslateLanguage.values().first { it.code == code }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ import net.perfectdreams.i18nhelper.core.keydata.StringI18nData
import net.perfectdreams.i18nhelper.core.keys.StringI18nKey
import net.perfectdreams.loritta.common.utils.text.TextUtils

enum class Language(val code: String) {
AUTO_DETECT("auto"), // Not a "real language"
enum class GoogleVisionLanguage(val code: String) {
UNDETERMINED("und"),
AFRIKAANS("af"),
ALBANIAN("sq"),
Expand All @@ -20,7 +19,7 @@ enum class Language(val code: String) {
BULGARIAN("bg"),
CATALAN("ca"),
CEBUANO("ceb"),
SIMPLIFIED_CHINESE("zh-CN"),
SIMPLIFIED_CHINESE("zh"), // Google Translate uses "zh-CN" for simplified chinese, however Google Vision uses only "zh"
TRADITIONAL_CHINESE("zh-TW"),
CORSICAN("co"),
CROATIAN("hr"),
Expand Down Expand Up @@ -123,6 +122,6 @@ enum class Language(val code: String) {
get() = StringI18nData(StringI18nKey("commands.command.translate.languages.${TextUtils.snakeToLowerCamelCase(this.name.lowercase())}"), emptyMap())

companion object {
fun fromLanguageCode(code: String) = Language.values().first { it.code == code }
fun fromLanguageCode(code: String) = GoogleVisionLanguage.values().first { it.code == code }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import kotlinx.serialization.json.jsonPrimitive
class HackyGoogleTranslateClient {
val http = HttpClient(CIO)

suspend fun translate(from: Language, to: Language, input: String) = translate(
suspend fun translate(from: GoogleTranslateLanguage, to: GoogleTranslateLanguage, input: String) = translate(
from.code,
to.code,
input
Expand All @@ -45,7 +45,7 @@ class HackyGoogleTranslateClient {
if (firstElementOnTheArray is JsonNull)
return null

val detectedLanguage = Language.fromLanguageCode(response[2].jsonPrimitive.content)
val detectedLanguage = GoogleTranslateLanguage.fromLanguageCode(response[2].jsonPrimitive.content)

val output = StringBuilder()
firstElementOnTheArray.jsonArray.forEach {
Expand All @@ -65,6 +65,6 @@ class HackyGoogleTranslateClient {

data class GoogleTranslateResponse(
val output: String,
val sourceLanguage: Language
val sourceLanguage: GoogleTranslateLanguage
)
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package net.perfectdreams.loritta.morenitta.interactions.vanilla.utils

import kotlinx.serialization.decodeFromString
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
import net.dv8tion.jda.api.interactions.DiscordLocale
import net.dv8tion.jda.api.interactions.components.buttons.ButtonStyle
import net.perfectdreams.loritta.cinnamon.discord.interactions.commands.styled
import net.perfectdreams.loritta.cinnamon.discord.interactions.vanilla.utils.declarations.TranslateCommand
import net.perfectdreams.loritta.cinnamon.discord.utils.google.GoogleAPIUtils
import net.perfectdreams.loritta.cinnamon.discord.utils.google.GoogleVisionOCRClient
import net.perfectdreams.loritta.cinnamon.discord.utils.google.Language
import net.perfectdreams.loritta.cinnamon.discord.utils.google.GoogleVisionLanguage
import net.perfectdreams.loritta.cinnamon.emotes.Emotes
import net.perfectdreams.loritta.cinnamon.pudding.tables.CachedGoogleVisionOCRResults
import net.perfectdreams.loritta.common.utils.LorittaColors
Expand All @@ -22,41 +22,6 @@ import java.time.Instant
object OCRExecutor {
val I18N_PREFIX = I18nKeysData.Commands.Command.Ocr

private val LOCALE_TO_LANGUAGE_MAP = DiscordLocale.values().map {
it to when (it) {
DiscordLocale.PORTUGUESE_BRAZILIAN -> Language.PORTUGUESE
DiscordLocale.BULGARIAN -> Language.BULGARIAN
DiscordLocale.CHINESE_CHINA -> Language.SIMPLIFIED_CHINESE
DiscordLocale.CHINESE_TAIWAN -> Language.SIMPLIFIED_CHINESE
DiscordLocale.CROATIAN -> Language.CROATIAN
DiscordLocale.CZECH -> Language.CZECH
DiscordLocale.DANISH -> Language.DANISH
DiscordLocale.DUTCH -> Language.DUTCH
DiscordLocale.FINNISH -> Language.FINNISH
DiscordLocale.FRENCH -> Language.FRENCH
DiscordLocale.GERMAN -> Language.GERMAN
DiscordLocale.GREEK -> Language.GREEK
DiscordLocale.HINDI -> Language.HINDI
DiscordLocale.HUNGARIAN -> Language.HUNGARIAN
DiscordLocale.ITALIAN -> Language.ITALIAN
DiscordLocale.JAPANESE -> Language.JAPANESE
DiscordLocale.KOREAN -> Language.KOREAN
DiscordLocale.LITHUANIAN -> Language.LITHUANIAN
DiscordLocale.NORWEGIAN -> Language.NORWEGIAN
DiscordLocale.POLISH -> Language.POLISH
DiscordLocale.ROMANIAN_ROMANIA -> Language.ROMANIAN
DiscordLocale.RUSSIAN -> Language.RUSSIAN
DiscordLocale.SPANISH -> Language.SPANISH
DiscordLocale.SWEDISH -> Language.SWEDISH
DiscordLocale.THAI -> Language.THAI
DiscordLocale.TURKISH -> Language.TURKISH
DiscordLocale.UKRAINIAN -> Language.UKRAINIAN
DiscordLocale.VIETNAMESE -> Language.VIETNAMESE
DiscordLocale.ENGLISH_UK -> Language.ENGLISH
else -> Language.ENGLISH
}
}.toMap()

suspend fun handleOCRCommand(
loritta: LorittaBot,
context: UnleashedContext,
Expand Down Expand Up @@ -103,8 +68,8 @@ object OCRExecutor {
}

val ocrText = textAnnotations.description
val detectedOcrLanguageGoogle = textAnnotations.locale?.let { Language.fromLanguageCode(it) }
val detectedOcrLanguageJDA = LOCALE_TO_LANGUAGE_MAP.entries.firstOrNull { it.value == detectedOcrLanguageGoogle }
val detectedOcrLanguageGoogle = textAnnotations.locale?.let { GoogleVisionLanguage.fromLanguageCode(it) }
val detectedOcrLanguageJDA = GoogleAPIUtils.DISCORD_LOCALE_TO_LANGUAGE_MAP.entries.firstOrNull { it.value == detectedOcrLanguageGoogle }
?.key
val userLocale = context.discordUserLocale

Expand All @@ -123,7 +88,7 @@ object OCRExecutor {
}

if (detectedOcrLanguageGoogle != null && detectedOcrLanguageJDA != null && detectedOcrLanguageJDA != userLocale) {
val userGoogleLocale = LOCALE_TO_LANGUAGE_MAP[userLocale]!!
val userGoogleLocale = GoogleAPIUtils.DISCORD_LOCALE_TO_LANGUAGE_MAP[userLocale]!!

actionRow(
loritta.interactivityManager.buttonForUser(
Expand All @@ -136,7 +101,7 @@ object OCRExecutor {
) {
val deferred = it.deferChannelMessage(isEphemeral)

val translated = loritta.googleTranslateClient.translate(detectedOcrLanguageGoogle, userGoogleLocale, ocrText)
val translated = loritta.googleTranslateClient.translate(GoogleAPIUtils.fromVisionLanguageToTranslateLanguage(detectedOcrLanguageGoogle), GoogleAPIUtils.fromVisionLanguageToTranslateLanguage(userGoogleLocale), ocrText)

if (translated == null) {
deferred.editOriginal {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package net.perfectdreams.loritta.utils

import net.perfectdreams.loritta.cinnamon.discord.utils.google.GoogleAPIUtils
import net.perfectdreams.loritta.cinnamon.discord.utils.google.GoogleVisionLanguage
import org.junit.jupiter.api.Test

class GoogleAPIUtilsTest {
@Test
fun `vision language to translate language conversion`() {
for (visionLanguage in GoogleVisionLanguage.values()) {
val language = GoogleAPIUtils.fromVisionLanguageToTranslateLanguage(visionLanguage)
}
}
}

0 comments on commit 4bb5f78

Please sign in to comment.