diff --git a/code/_globalvars/lists/flavor_misc.dm b/code/_globalvars/lists/flavor_misc.dm index 38072aaec01e..cf803d1c4519 100644 --- a/code/_globalvars/lists/flavor_misc.dm +++ b/code/_globalvars/lists/flavor_misc.dm @@ -307,3 +307,12 @@ GLOBAL_LIST_INIT(status_display_state_pictures, list( "blank", "shuttle", )) + +GLOBAL_LIST_INIT(most_common_words, init_common_words()) + +/proc/init_common_words() + . = list() + var/i = 1 + for(var/word in world.file2list("strings/1000_most_common.txt")) + .[word] = i + i += 1 diff --git a/code/controllers/subsystem/discord.dm b/code/controllers/subsystem/discord.dm index 7efdbfcda6a5..ccfa60e09c5f 100644 --- a/code/controllers/subsystem/discord.dm +++ b/code/controllers/subsystem/discord.dm @@ -43,9 +43,6 @@ SUBSYSTEM_DEF(discord) /// People who have tried to verify this round already var/list/reverify_cache - /// Common words list, used to generate one time tokens - var/list/common_words - /// The file where notification status is saved var/notify_file = file("data/notify.json") @@ -53,7 +50,6 @@ SUBSYSTEM_DEF(discord) var/enabled = FALSE /datum/controller/subsystem/discord/Initialize() - common_words = world.file2list("strings/1000_most_common.txt") reverify_cache = list() // Check for if we are using TGS, otherwise return and disables firing if(world.TgsAvailable()) @@ -156,7 +152,7 @@ SUBSYSTEM_DEF(discord) // While there's a collision in the token, generate a new one (should rarely happen) while(not_unique) //Column is varchar 100, so we trim just in case someone does us the dirty later - one_time_token = trim("[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]-[pick(common_words)]", 100) + one_time_token = trim("[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]-[pick(GLOB.most_common_words)]", 100) not_unique = find_discord_link_by_token(one_time_token, timebound = TRUE) @@ -298,4 +294,3 @@ SUBSYSTEM_DEF(discord) if (length(discord_mention_extraction_regex.group) == 1) return discord_mention_extraction_regex.group[1] return null - diff --git a/code/datums/brain_damage/mild.dm b/code/datums/brain_damage/mild.dm index 513f56840b56..97001f177f19 100644 --- a/code/datums/brain_damage/mild.dm +++ b/code/datums/brain_damage/mild.dm @@ -191,8 +191,6 @@ gain_text = span_warning("You lose your grasp on complex words.") lose_text = span_notice("You feel your vocabulary returning to normal again.") - var/static/list/common_words = world.file2list("strings/1000_most_common.txt") - /datum/brain_trauma/mild/expressive_aphasia/handle_speech(datum/source, list/speech_args) var/message = speech_args[SPEECH_MESSAGE] if(message) @@ -212,7 +210,7 @@ word = copytext(word, 1, suffix_foundon) word = html_decode(word) - if(lowertext(word) in common_words) + if(GLOB.most_common_words[lowertext(word)]) new_message += word + suffix else if(prob(30) && message_split.len > 2) diff --git a/code/game/atoms_movable.dm b/code/game/atoms_movable.dm index 71bfdfcab4df..87c5394d09a1 100644 --- a/code/game/atoms_movable.dm +++ b/code/game/atoms_movable.dm @@ -1521,6 +1521,10 @@ /atom/movable/proc/get_random_understood_language() return get_language_holder().get_random_understood_language() +/// Gets a list of all mutually understood languages. +/atom/movable/proc/get_mutually_understood_languages() + return get_language_holder().get_mutually_understood_languages() + /// Gets a random spoken language, useful for forced speech and such. /atom/movable/proc/get_random_spoken_language() return get_language_holder().get_random_spoken_language() diff --git a/code/game/machinery/telecomms/computers/logbrowser.dm b/code/game/machinery/telecomms/computers/logbrowser.dm index e202a508ecf0..546262b044a7 100644 --- a/code/game/machinery/telecomms/computers/logbrowser.dm +++ b/code/game/machinery/telecomms/computers/logbrowser.dm @@ -59,7 +59,7 @@ message_out = "\"[message_in]\"" else if(!user.has_language(language)) // Language unknown: scramble - message_out = "\"[language_instance.scramble(message_in)]\"" + message_out = "\"[language_instance.scramble_sentence(message_in, user.get_mutually_understood_languages())]\"" else message_out = "(Unintelligible)" packet_out["message"] = message_out diff --git a/code/game/say.dm b/code/game/say.dm index 3a8eb748b6b1..0075e0d2a801 100644 --- a/code/game/say.dm +++ b/code/game/say.dm @@ -213,7 +213,7 @@ GLOBAL_LIST_INIT(freqtospan, list( if(!has_language(language)) var/datum/language/dialect = GLOB.language_datum_instances[language] - raw_message = dialect.scramble(raw_message) + raw_message = dialect.scramble_sentence(raw_message, get_mutually_understood_languages()) return raw_message diff --git a/code/modules/language/_language.dm b/code/modules/language/_language.dm index 3876720cbd44..f68405920c24 100644 --- a/code/modules/language/_language.dm +++ b/code/modules/language/_language.dm @@ -1,5 +1,7 @@ -/// maximum of 50 specific scrambled lines per language +/// Last 50 spoken (uncommon) words will be cached before we start cycling them out (re-randomizing them) #define SCRAMBLE_CACHE_LEN 50 +/// Last 20 spoken sentences will be cached before we start cycling them out (re-randomizing them) +#define SENTENCE_CACHE_LEN 20 /// Datum based languages. Easily editable and modular. /datum/language @@ -18,13 +20,23 @@ var/list/special_characters /// Likelihood of making a new sentence after each syllable. var/sentence_chance = 5 + /// Likelihood of making a new sentence after each word. + var/between_word_sentence_chance = 0 /// Likelihood of getting a space in the random scramble string var/space_chance = 55 + /// Likelyhood of getting a space between words + var/between_word_space_chance = 100 /// Spans to apply from this language var/list/spans /// Cache of recently scrambled text /// This allows commonly reused words to not require a full re-scramble every time. var/list/scramble_cache = list() + /// Cache of recently spoken sentences + /// So if one person speaks over the radio, everyone hears the same thing. + var/list/last_sentence_cache = list() + /// The 1000 most common words get permanently cached + var/list/most_common_cache = list() + /// The language that an atom knows with the highest "default_priority" is selected by default. var/default_priority = 0 /// If TRUE, when generating names, we will always use the default human namelist, even if we have syllables set. @@ -45,6 +57,11 @@ /// What char to place in between randomly generated names var/random_name_spacer = " " + /// Assoc Lazylist of other language types that would have a degree of mutual understanding with this language. + /// For example, you could do `list(/datum/language/common = 50)` to say that this language has a 50% chance to understand common words + /// And yeah if you give a 100% chance, they can basically just understand the language + var/list/mutual_understanding + /// Checks whether we should display the language icon to the passed hearer. /datum/language/proc/display_icon(atom/movable/hearer) var/understands = hearer.has_language(src.type) @@ -109,56 +126,144 @@ return result -/datum/language/proc/check_cache(input) - var/lookup = scramble_cache[input] - if(lookup) +/// Checks the word cache for a word +/datum/language/proc/read_word_cache(input) + SHOULD_NOT_OVERRIDE(TRUE) + if(most_common_cache[input]) + return most_common_cache[input] + + . = scramble_cache[input] + if(. && scramble_cache[1] != input) + // bumps it to the top of the cache scramble_cache -= input - scramble_cache[input] = lookup - . = lookup + scramble_cache[input] = . + return . -/datum/language/proc/add_to_cache(input, scrambled_text) +/// Adds a word to the cache +/datum/language/proc/write_word_cache(input, scrambled_text) + SHOULD_NOT_OVERRIDE(TRUE) + if(GLOB.most_common_words[lowertext(input)]) + most_common_cache[input] = scrambled_text + return // Add it to cache, cutting old entries if the list is too long scramble_cache[input] = scrambled_text if(scramble_cache.len > SCRAMBLE_CACHE_LEN) - scramble_cache.Cut(1, scramble_cache.len-SCRAMBLE_CACHE_LEN-1) + scramble_cache.Cut(1, scramble_cache.len - SCRAMBLE_CACHE_LEN + 1) -/datum/language/proc/scramble(input) +/// Checks the sentence cache for a sentence +/datum/language/proc/read_sentence_cache(input) + SHOULD_NOT_OVERRIDE(TRUE) + . = last_sentence_cache[input] + if(. && last_sentence_cache[1] != input) + // bumps it to the top of the cache (don't anticipate this happening often) + last_sentence_cache -= input + last_sentence_cache[input] = . + return . - if(!length(syllables)) - return stars(input) +/// Adds a sentence to the cache, though the sentence should be modified with a key +/datum/language/proc/write_sentence_cache(input, key, result_scramble) + SHOULD_NOT_OVERRIDE(TRUE) + // Add to the cache (the cache being an assoc list of assoc lists), cutting old entries if the list is too long + LAZYSET(last_sentence_cache[input], key, result_scramble) + if(last_sentence_cache.len > SENTENCE_CACHE_LEN) + last_sentence_cache.Cut(1, last_sentence_cache.len - SENTENCE_CACHE_LEN + 1) + +/// Goes through the input and removes any punctuation from the end of the string. +/proc/strip_punctuation(input) + var/static/list/bad_punctuation = list("!", "?", ".", "~", ";", ":", "-") + var/last_char = copytext_char(input, -1) + while(last_char in bad_punctuation) + input = copytext(input, 1, -1) + last_char = copytext_char(input, -1) + + return trim_right(input) + +/// Find what punctuation is at the end of the input, returns it. +/proc/find_last_punctuation(input) + . = copytext_char(input, -3) + if(. == "...") + return . + . = copytext_char(input, -2) + if(. in list("!!", "??", "..", "?!", "!?")) + return . + . = copytext_char(input, -1) + if(. in list("!", "?" ,".", "~", ";", ":", "-")) + return . + return "" + +/// Scrambles a sentence in this language. +/// Takes into account any languages the hearer knows that has mutual understanding with this language. +/datum/language/proc/scramble_sentence(input, list/mutual_languages) + var/cache_key = "[mutual_languages?[type] || 0]-understanding" + var/list/cache = read_sentence_cache(cache_key) + if(cache?[cache_key]) + return cache[cache_key] + + var/list/real_words = splittext(input, " ") + var/list/scrambled_words = list() + for(var/word in real_words) + var/translate_prob = mutual_languages?[type] || 0 + if(translate_prob > 0) + var/base_word = lowertext(strip_punctuation(word)) + // the probability of managing to understand a word is based on how common it is + // 1000 words in the list, so words outside the list are just treated as "the 1500th most common word" + var/commonness = GLOB.most_common_words[base_word] || 1500 + translate_prob += (translate_prob * 0.2 * (1 - (min(commonness, 1500) / 500))) + if(prob(translate_prob)) + scrambled_words += base_word + continue + + scrambled_words += scramble_word(word) + // start building the word. first word is capitalized and otherwise untouched + . = capitalize(popleft(scrambled_words)) + for(var/word in scrambled_words) + if(prob(between_word_sentence_chance)) + . += ". " + else if(prob(between_word_space_chance)) + . += " " + + . += word + + // scrambling the words will drop punctuation, so re-add it at the end + . += find_last_punctuation(trim_right(input)) + + write_sentence_cache(input, cache_key, .) + + return . + +/// Scrambles a single word in this language. +/datum/language/proc/scramble_word(input) // If the input is cached already, move it to the end of the cache and return it - var/lookup = check_cache(input) - if(lookup) - return lookup - - var/input_size = length_char(input) - var/scrambled_text = "" - var/capitalize = TRUE - - while(length_char(scrambled_text) < input_size) - var/next = (length(scrambled_text) && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables) - if(capitalize) - next = capitalize(next) - capitalize = FALSE - scrambled_text += next - var/chance = rand(100) - if(chance <= sentence_chance) - scrambled_text += ". " - capitalize = TRUE - else if(chance > sentence_chance && chance <= space_chance) - scrambled_text += " " - - scrambled_text = trim(scrambled_text) - var/ending = copytext_char(scrambled_text, -1) - if(ending == ".") - scrambled_text = copytext_char(scrambled_text, 1, -2) - var/input_ending = copytext_char(input, -1) - if(input_ending in list("!","?",".")) - scrambled_text += input_ending - - add_to_cache(input, scrambled_text) - - return scrambled_text + . = read_word_cache(input) + if(.) + return . + + if(!length(syllables)) + . = stars(input) + + else + var/input_size = length_char(input) + var/add_space = FALSE + var/add_period = FALSE + . = "" + while(length_char(.) < input_size) + // add in the last syllable's period or space first + if(add_period) + . += ". " + else if(add_space) + . += " " + // generate the next syllable (capitalize if we just added a period) + var/next = (. && length(special_characters) && prob(1)) ? pick(special_characters) : pick_weight_recursive(syllables) + if(add_period) + next = capitalize(next) + . += next + // determine if the next syllable gets a period or space + add_period = prob(sentence_chance) + add_space = prob(space_chance) + + write_word_cache(input, .) + + return . #undef SCRAMBLE_CACHE_LEN diff --git a/code/modules/language/_language_holder.dm b/code/modules/language/_language_holder.dm index b48a1ab1530a..f061ed2bab35 100644 --- a/code/modules/language/_language_holder.dm +++ b/code/modules/language/_language_holder.dm @@ -176,6 +176,18 @@ Key procs /datum/language_holder/proc/get_random_understood_language() return pick(understood_languages) +/// Gets a list of all mutually understood languages. +/datum/language_holder/proc/get_mutually_understood_languages() + var/list/mutual_languages = list() + for(var/language_type in understood_languages) + var/datum/language/language_instance = GLOB.language_datum_instances[language_type] + for(var/mutual_language_type in language_instance.mutual_understanding) + // add it to the list OR override it if it's a stronger mutual understanding + if(!mutual_languages[mutual_language_type] || mutual_languages[mutual_language_type] < language_instance.mutual_understanding[mutual_language_type]) + mutual_languages[mutual_language_type] = language_instance.mutual_understanding[mutual_language_type] + + return mutual_languages + /// Gets a random spoken language, useful for forced speech and such. /datum/language_holder/proc/get_random_spoken_language() return pick(spoken_languages) diff --git a/code/modules/language/beachbum.dm b/code/modules/language/beachbum.dm index bd319e717ffd..eb2447ded187 100644 --- a/code/modules/language/beachbum.dm +++ b/code/modules/language/beachbum.dm @@ -19,3 +19,8 @@ ) icon_state = "beach" always_use_default_namelist = TRUE + + mutual_understanding = list( + /datum/language/common = 50, + /datum/language/uncommon = 30, + ) diff --git a/code/modules/language/codespeak.dm b/code/modules/language/codespeak.dm index 242095b3bb7f..7c2657c7b285 100644 --- a/code/modules/language/codespeak.dm +++ b/code/modules/language/codespeak.dm @@ -7,10 +7,10 @@ icon_state = "codespeak" always_use_default_namelist = TRUE // No syllables anyways -/datum/language/codespeak/scramble(input) - var/lookup = check_cache(input) - if(lookup) - return lookup +/datum/language/codespeak/scramble_sentence(input, list/mutual_languages) + . = read_word_cache(input) + if(.) + return . . = "" var/list/words = list() @@ -29,4 +29,4 @@ if(input_ending in endings) . += input_ending - add_to_cache(input, .) + write_word_cache(input, .) diff --git a/code/modules/language/common.dm b/code/modules/language/common.dm index 6bad808fef26..764375c4a0d3 100644 --- a/code/modules/language/common.dm +++ b/code/modules/language/common.dm @@ -55,3 +55,8 @@ "his", "ing", "ion", "ith", "not", "ome", "oul", "our", "sho", "ted", "ter", "tha", "the", "thi", ), ) + + mutual_understanding = list( + /datum/language/beachbum = 33, + /datum/language/uncommon = 20, + ) diff --git a/code/modules/language/uncommon.dm b/code/modules/language/uncommon.dm index 117ed1c76fd1..58c1d5bba2cb 100644 --- a/code/modules/language/uncommon.dm +++ b/code/modules/language/uncommon.dm @@ -14,3 +14,8 @@ ) icon_state = "galuncom" default_priority = 90 + + mutual_understanding = list( + /datum/language/common = 33, + /datum/language/beachbum = 20, + ) diff --git a/maplestation_modules/code/modules/antagonists/advanced_cult/clock_cult/clock_language.dm b/maplestation_modules/code/modules/antagonists/advanced_cult/clock_cult/clock_language.dm index 1b14aed539cd..68a1c4bcf9da 100644 --- a/maplestation_modules/code/modules/antagonists/advanced_cult/clock_cult/clock_language.dm +++ b/maplestation_modules/code/modules/antagonists/advanced_cult/clock_cult/clock_language.dm @@ -11,7 +11,7 @@ spans = list(SPAN_ROBOT) icon_state = "ratvar" -/datum/language/ratvarian/scramble(input) +/datum/language/ratvarian/scramble_sentence(input, list/mutual_languages) return text2ratvar(input) /// Regexes used to add ratvarian styling to rot13 english diff --git a/maplestation_modules/code/modules/language/highdraconic.dm b/maplestation_modules/code/modules/language/highdraconic.dm index b671914981b3..6c1e2a6cb57a 100644 --- a/maplestation_modules/code/modules/language/highdraconic.dm +++ b/maplestation_modules/code/modules/language/highdraconic.dm @@ -17,18 +17,18 @@ icon_state = "lizardred" default_priority = 85 -// So I wrote a few unit tests for /tg/ that rely on Lizards not knowing what high draconic is. -// And since rewriting them is out of the questions, Lizards don't know high draconic in unit tests. -#ifndef UNIT_TESTS + mutual_understanding = list( + /datum/language/draconic = 66, + ) -// Edit to the base lizard language holder - lizards can understand high draconic. -/datum/language_holder/lizard - understood_languages = list( - /datum/language/common = list(LANGUAGE_ATOM), - /datum/language/draconic = list(LANGUAGE_ATOM), - /datum/language/impdraconic = list(LANGUAGE_ATOM), +/datum/language/draconic + mutual_understanding = list( + /datum/language/impdraconic = 66, ) +// TG unit test compliance (out of laziness) +#ifndef UNIT_TESTS + // Edit to the silverscale language holder - silverscales can speak high draconic. /datum/language_holder/lizard/silver understood_languages = list(