Skip to content

Commit 82fa74c

Browse files
authored
Refactor PrivacyModule (#752)
* Refactor PrivacyModule; support sensitive text patterns in config Refactors the PrivacyModule to: - allow specifying sensitive text patterns in the config instead of hardcoding them - support regex patterns for sensitive file names Additionally the test class PrivacyModuleTest is refactored to cover more cases and to reuse test code for email address and sensitive text detection. * Fail on unknown config entries This helps detecting mistyped config entries.
1 parent 45a1b49 commit 82fa74c

File tree

8 files changed

+538
-412
lines changed

8 files changed

+538
-412
lines changed

config/config.yml

+17-8
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,23 @@ arisa:
176176
177177
----
178178
Restricted by PrivacyModule ??[~arisabot]??
179-
allowedEmailRegex:
180-
- '^(mailer-daemon|postmaster|nobody|noreply|no-reply|hostmaster|usenet|news|webmaster|www|abuse|noc|security|info|support|uucp|ftp|news|admin|root)@.*'
181-
- '.*@(mojang.com|microsoft.com|minecraft.net|zendesk.com)$'
182-
sensitiveFileNames:
183-
- launcher_accounts.json
184-
- launcher_msa_credentials.json
185-
- launcher_profiles.json
186-
- .jfr
179+
# Important: These email regex patterns must match the email address completely
180+
allowedEmailRegexes:
181+
- '(mailer-daemon|postmaster|nobody|noreply|no-reply|hostmaster|usenet|news|webmaster|www|abuse|noc|security|info|support|uucp|ftp|news|admin|root)@.*'
182+
- '.*@(mojang\.com|microsoft\.com|minecraft\.net|zendesk\.com)'
183+
sensitiveTextRegexes:
184+
- '\(Session ID is token:'
185+
- '--accessToken ey'
186+
- '(?<![^\s])(?=[^\s]*[A-Z])(?=[^\s]*[0-9])[A-Z0-9]{17}(?![^\s])'
187+
# At the moment braintree transaction IDs seem to have 8 chars, but to be future-proof
188+
# match if there are more chars as well
189+
- '\bbraintree:[a-f0-9]{6,12}\b'
190+
sensitiveFileNameRegexes:
191+
- 'launcher_accounts\.json'
192+
- 'launcher_msa_credentials\.json'
193+
- 'launcher_profiles\.json'
194+
# Java Flight Recorder files contain session token in some Minecraft versions
195+
- '.*\.jfr'
187196

188197
removeTriagedMeqs:
189198
resolutions:

docs/Modules.md

+9-4
Original file line numberDiff line numberDiff line change
@@ -239,13 +239,18 @@ Hides privacy information like Email addresses in tickets or comments.
239239
### Checks
240240
- The ticket is not set to private.
241241
#### For Setting Tickets to Private
242-
- Any of the fields and/or text attachments added after last run contains session ID or Email.
243-
- Or any of the attachments has a name specified by `sensitiveFileNames` in the [config](../config/config.yml)
244-
(defaults to empty list)
242+
- Any of the following matches one of the regex patterns specified by `sensitiveTextRegexes` in the
243+
[config](../config/config.yml), or contains an email address which does not match one of the `allowedEmailRegexes`:
244+
- summary, environment, description (if ticket was created after last run)
245+
- text attachment (if it was created after last run)
246+
- changelog entry string value (if it was created after last run)
247+
- Or any of the attachments created after the last run has a name which matches one of `sensitiveFileNameRegexes` in
248+
the [config](../config/config.yml).
245249
#### For Restricting Comments to `staff`
246250
- The comment was added after last run.
247251
- The comment is not restricted.
248-
- The comment contains session ID or Email.
252+
- The comment matches one of the regex patterns specified by `sensitiveTextRegexes` in the
253+
[config](../config/config.yml), or contains an email address which does not match one of the `allowedEmailRegexes`.
249254

250255
## PrivateDuplicate
251256
| Entry | Value |
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
package io.github.mojira.arisa
22

33
import com.uchuhimo.konf.Config
4+
import com.uchuhimo.konf.Feature
45
import com.uchuhimo.konf.source.yaml
56
import io.github.mojira.arisa.infrastructure.config.Arisa
67

78
class ConfigService {
89
val config: Config = Config { addSpec(Arisa) }
9-
.from.yaml.watchFile("config/config.yml")
10-
.from.yaml.watchFile("config/local.yml")
10+
// Only enable strict config parsing for YAML files; environment variables and system properties
11+
// likely contain entries completely unrelated to Arisa
12+
.from.enabled(Feature.FAIL_ON_UNKNOWN_PATH).yaml.watchFile("config/config.yml")
13+
.from.enabled(Feature.FAIL_ON_UNKNOWN_PATH).yaml.watchFile("config/local.yml")
1114
.from.env()
1215
.from.systemProperties()
16+
.validateRequired()
1317
}

src/main/kotlin/io/github/mojira/arisa/infrastructure/config/ArisaConfig.kt

+12-4
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,21 @@ object Arisa : ConfigSpec() {
160160
"",
161161
description = "The text which will be appended at the comments that are restricted by this module."
162162
)
163-
val allowedEmailRegex by optional<List<String>>(
163+
val allowedEmailRegexes by optional<List<String>>(
164164
default = emptyList(),
165-
description = "List of regex for allowed emails"
165+
description = "Regex patterns matching allowed email addresses, that is, email addresses which may" +
166+
" be public. The patterns must match the complete email address; partial matches won't have any" +
167+
" effect."
166168
)
167-
val sensitiveFileNames by optional<List<String>>(
169+
val sensitiveTextRegexes by optional<List<String>>(
168170
default = emptyList(),
169-
description = "Names of attachment files containing sensitive information"
171+
description = "Regex patterns matching sensitive text. The patterns do not have to match the" +
172+
" complete text; partial matches will be detected as well."
173+
)
174+
val sensitiveFileNameRegexes by optional<List<String>>(
175+
default = emptyList(),
176+
description = "Regex patterns matching names of attachment files containing sensitive information." +
177+
" The patterns must match the complete file name; partial matches won't have any effect."
170178
)
171179
}
172180

src/main/kotlin/io/github/mojira/arisa/modules/PrivacyModule.kt

+15-19
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,16 @@ import io.github.mojira.arisa.domain.CommentOptions
77
import io.github.mojira.arisa.domain.Issue
88
import java.time.Instant
99

10+
private fun Iterable<Regex>.anyMatches(string: String) = any { it.matches(string) }
11+
1012
class PrivacyModule(
1113
private val message: String,
1214
private val commentNote: String,
13-
private val allowedEmailsRegex: List<Regex>,
14-
private val sensitiveFileNames: List<String>
15+
private val allowedEmailRegexes: List<Regex>,
16+
private val sensitiveTextRegexes: List<Regex>,
17+
private val sensitiveFileNameRegexes: List<Regex>
1518
) : Module {
16-
private val patterns: List<Regex> = listOf(
17-
""".*\(Session ID is token:.*""".toRegex(),
18-
""".*--accessToken ey.*""".toRegex(),
19-
""".*(?<![^\s])(?=[^\s]*[A-Z])(?=[^\s]*[0-9])[A-Z0-9]{17}(?![^\s]).*""".toRegex(),
20-
// At the moment braintree transaction IDs seem to have 8 chars, but to be future-proof
21-
// match if there are more chars as well
22-
""".*\bbraintree:[a-f0-9]{6,12}\b.*""".toRegex()
23-
)
24-
19+
// Matches an email address, which is not part of a user mention ([~name])
2520
private val emailRegex = "(?<!\\[~)\\b[a-zA-Z0-9.\\-_]+@[a-zA-Z.\\-_]+\\.[a-zA-Z.\\-]{2,15}\\b".toRegex()
2621

2722
override fun invoke(issue: Issue, lastRun: Instant): Either<ModuleError, ModuleResponse> = with(issue) {
@@ -34,9 +29,9 @@ class PrivacyModule(
3429
string += "$summary $environment $description "
3530
}
3631

37-
attachments
32+
val newAttachments = attachments.filter { it.created.isAfter(lastRun) }
33+
newAttachments
3834
.asSequence()
39-
.filter { it.created.isAfter(lastRun) }
4035
.filter { it.mimeType.startsWith("text/") }
4136
.forEach { string += "${String(it.getContent())} " }
4237

@@ -46,19 +41,19 @@ class PrivacyModule(
4641
.filter { it.changedFromString == null }
4742
.forEach { string += "${it.changedToString} " }
4843

49-
val doesStringMatchPatterns = string.matches(patterns)
44+
val doesStringMatchPatterns = string.containsMatch(sensitiveTextRegexes)
5045
val doesEmailMatches = matchesEmail(string)
5146

52-
val doesAttachmentNameMatch = attachments
47+
val doesAttachmentNameMatch = newAttachments
5348
.asSequence()
5449
.map(Attachment::name)
55-
.any(sensitiveFileNames::contains)
50+
.any(sensitiveFileNameRegexes::anyMatches)
5651

5752
val restrictCommentFunctions = comments
5853
.asSequence()
5954
.filter { it.created.isAfter(lastRun) }
6055
.filter { it.visibilityType == null }
61-
.filter { it.body?.matches(patterns) ?: false || matchesEmail(it.body ?: "") }
56+
.filter { it.body?.containsMatch(sensitiveTextRegexes) ?: false || matchesEmail(it.body ?: "") }
6257
.filterNot {
6358
it.getAuthorGroups()?.any { group ->
6459
listOf("helper", "global-moderators", "staff").contains(group)
@@ -86,9 +81,10 @@ class PrivacyModule(
8681
private fun matchesEmail(string: String): Boolean {
8782
return emailRegex
8883
.findAll(string)
89-
.filterNot { email -> allowedEmailsRegex.any { regex -> regex.matches(email.value) } }
84+
.map(MatchResult::value)
85+
.filterNot(allowedEmailRegexes::anyMatches)
9086
.any()
9187
}
9288

93-
private fun String.matches(patterns: List<Regex>) = patterns.any { it.containsMatchIn(this) }
89+
private fun String.containsMatch(patterns: List<Regex>) = patterns.any { it.containsMatchIn(this) }
9490
}

src/main/kotlin/io/github/mojira/arisa/registry/InstantModuleRegistry.kt

+3-2
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,9 @@ class InstantModuleRegistry(config: Config) : ModuleRegistry(config) {
156156
PrivacyModule(
157157
config[Arisa.Modules.Privacy.message],
158158
config[Arisa.Modules.Privacy.commentNote],
159-
config[Arisa.Modules.Privacy.allowedEmailRegex].map(String::toRegex),
160-
config[Arisa.Modules.Privacy.sensitiveFileNames]
159+
config[Arisa.Modules.Privacy.allowedEmailRegexes].map(String::toRegex),
160+
config[Arisa.Modules.Privacy.sensitiveTextRegexes].map(String::toRegex),
161+
config[Arisa.Modules.Privacy.sensitiveFileNameRegexes].map(String::toRegex)
161162
)
162163
)
163164

src/test/kotlin/io/github/mojira/arisa/infrastructure/IntegrationTest.kt

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package io.github.mojira.arisa.infrastructure
22

33
import com.uchuhimo.konf.Config
4+
import com.uchuhimo.konf.Feature
45
import com.uchuhimo.konf.source.yaml
56
import io.github.mojira.arisa.infrastructure.config.Arisa
67
import io.kotest.core.spec.style.StringSpec
@@ -10,14 +11,16 @@ import java.io.File
1011
class IntegrationTest : StringSpec({
1112
"should be able to read the main config file correctly" {
1213
val config = Config { addSpec(Arisa) }
13-
.from.map.flat(
14+
// Only enable strict config parsing for map and YAML files; environment variables and system properties
15+
// likely contain entries completely unrelated to Arisa
16+
.from.enabled(Feature.FAIL_ON_UNKNOWN_PATH).map.flat(
1417
mapOf(
1518
"arisa.credentials.username" to "test",
1619
"arisa.credentials.password" to "test",
1720
"arisa.credentials.dandelionToken" to "test"
1821
)
1922
)
20-
.from.yaml.watchFile("config/config.yml")
23+
.from.enabled(Feature.FAIL_ON_UNKNOWN_PATH).yaml.watchFile("config/config.yml")
2124
.from.env()
2225
.from.systemProperties()
2326

0 commit comments

Comments
 (0)