Skip to content

Commit

Permalink
feat: error classification
Browse files Browse the repository at this point in the history
Resolves #21 by implementing server-side error classification based on content restrictions and
refactoring the deserialization logic into a converter-based strategy:

- Errors are now categorized using the new `error_type` field, and a `business_error` flag
  distinguishes between expected (business) errors and player errors.
- Moved from a deserializer-based approach to a converter, enabling a more extensible processing
  pipeline.
- Introduced a processing pipeline to standardize how different processors apply transformations.

Co-authored-by: Gaëtan Muller <[email protected]>
  • Loading branch information
jboix and MGaetan89 committed Jan 31, 2025
1 parent b00e85b commit 703a44c
Show file tree
Hide file tree
Showing 8 changed files with 351 additions and 126 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package ch.srgssr.pillarbox.monitoring.event.model

/**
* An interface defining a contract for processing and enriching a data node post deserialization.
*/
internal interface DataProcessor {
/**
* Processes and potentially enriches the given data node post deserialization.
*
* Implementations may modify the node to add metadata, validate data, or transform fields
* based on custom logic before the final object is constructed.
*
* @param data The data node to process.
*
* @return The processed JSON node, which may be modified or left unchanged.
*/
fun process(data: MutableMap<String, Any?>): MutableMap<String, Any?>

/**
* Determines whether this processor should be executed based on the event type.
*
* Implementations can override this method to specify which event types they should handle.
*
* @param eventName The name of the event being processed.
*
* @return `true` if the processor should handle this event, `false` otherwise.
*/
fun shouldProcess(eventName: String): Boolean = true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package ch.srgssr.pillarbox.monitoring.event.model

/**
* A processor that identifies and categorizes error messages found in an error data node.
*
* This processor determines whether an error message corresponds to a known content restriction type
* and annotates the data node with an appropriate classification.
*/
internal class ErrorProcessor : DataProcessor {
/**
* Process only on ERROR events.
*/
override fun shouldProcess(eventName: String): Boolean = eventName == "ERROR"

/**
* Processes the given data node to determine the type of error based on its message:
*
* If the message matches a predefined content restriction category, an `error_type` field is added
* and the error is flagged as a business error.
*
* @param data The data node to process.
*
* @return The enriched data node with additional error classification.
*/
override fun process(data: MutableMap<String, Any?>): MutableMap<String, Any?> {
val type =
(data["message"] as? String)?.let {
ContentRestriction.findByMessage(it)
}

data["error_type"] = type?.name
data["business_error"] = type != null

return data
}
}

/**
* Enum representing different content restriction types based on predefined error messages.
*/
internal enum class ContentRestriction(
val messages: List<String>,
) {
AGERATING12(
listOf(
"To protect children this content is only available between 8PM and 6AM.",
"Pour protéger les enfants, ce contenu est accessible entre 20h et 6h.",
"Per proteggere i bambini, questo media è disponibile solo fra le 20 e le 6.",
"Per proteger uffants, è quest cuntegn disponibel mo tranter las 20.00 e las 06.00.",
),
),
AGERATING18(
listOf(
"To protect children this content is only available between 10PM and 5AM.",
"Pour protéger les enfants, ce contenu est accessible entre 23h et 5h.",
"Per proteggere i bambini, questo media è disponibile solo fra le 23 le 5.",
"Per proteger uffants, è quest cuntegn disponibel mo tranter las 23.00 e las 05.00.",
),
),
COMMERCIAL(
listOf(
"This commercial content is not available.",
"Ce contenu n'est actuellement pas disponible.",
"Questo contenuto commerciale non è disponibile.",
"Quest medium commerzial n'è betg disponibel.",
),
),
ENDDATE(
listOf(
"This content is not available anymore.",
"Ce contenu n'est plus disponible.",
"Questo media non è più disponibile.",
"Quest cuntegn n'è betg pli disponibel.",
),
),
GEOBLOCK(
listOf(
"This content is not available outside Switzerland.",
"La RTS ne dispose pas des droits de diffusion en dehors de la Suisse.",
"Questo media non è disponibile fuori dalla Svizzera.",
"Quest cuntegn n'è betg disponibel ordaifer la Svizra.",
),
),
JOURNALISTIC(
listOf(
"This content is temporarily unavailable for journalistic reasons.",
"Ce contenu est temporairement indisponible pour des raisons éditoriales.",
"Questo contenuto è temporaneamente non disponibile per motivi editoriali.",
"Quest cuntegn na stat ad interim betg a disposiziun per motivs publicistics.",
),
),
LEGAL(
listOf(
"This content is not available due to legal restrictions.",
"Pour des raisons juridiques, ce contenu n'est pas disponible.",
"Il contenuto non è fruibile a causa di restrizioni legali.",
"Quest cuntegn n'è betg disponibel perquai ch'el è scadì.",
),
),
STARTDATE(
listOf(
"This content is not available yet.",
"Ce contenu n'est pas encore disponible. Veuillez réessayer plus tard.",
"Il contenuto non è ancora disponibile. Per cortesia prova più tardi.",
"Quest cuntegn n'è betg anc disponibel. Empruvai pli tard.",
),
),
UNKNOWN(
listOf(
"This content is not available.",
"Ce contenu n'est actuellement pas disponible.",
"Questo media non è disponibile.",
"Quest cuntegn n'è betg disponibel.",
),
),
;

companion object {
private val messageToTypeMap: Map<String, ContentRestriction> by lazy {
entries
.flatMap { type ->
type.messages.map { message -> message to type }
}.toMap()
}

fun findByMessage(message: String): ContentRestriction? = messageToTypeMap[message]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,7 @@ package ch.srgssr.pillarbox.monitoring.event.model

import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.node.ObjectNode
import nl.basjes.parse.useragent.UserAgent
import nl.basjes.parse.useragent.UserAgentAnalyzer
import org.springframework.data.annotation.Id
import org.springframework.data.elasticsearch.annotations.DateFormat
import org.springframework.data.elasticsearch.annotations.Document
Expand All @@ -34,6 +27,7 @@ import org.springframework.data.elasticsearch.annotations.WriteTypeHint
writeTypeHint = WriteTypeHint.FALSE,
storeIdInSource = false,
)
@JsonDeserialize(converter = EventRequestDataConverter::class)
data class EventRequest(
@Id
@JsonIgnore
Expand All @@ -52,89 +46,7 @@ data class EventRequest(
var ip: String?,
@JsonProperty(required = true)
var version: Long,
@JsonDeserialize(using = DataDeserializer::class)
@JsonProperty(required = true)
var data: Any,
var session: Any?,
)

/**
* Custom deserializer for the 'data' field in [EventRequest].
*
* This deserializer processes the incoming JSON data to extract the user agent string from the
* `browser.agent` field and enriches the JSON node with detailed information about the browser,
* device, and operating system.
*
* If the `browser.agent` field is not present, the deserializer returns the node unmodified.
*/
private class DataDeserializer : JsonDeserializer<Any?>() {
companion object {
private val userAgentAnalyzer =
UserAgentAnalyzer
.newBuilder()
.hideMatcherLoadStats()
.withCache(10000)
.build()
}

fun isHackerOrRobot(userAgent: UserAgent): Boolean =
listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY)
.any { field ->
userAgent.getValue(field)?.let { value ->
value.startsWith("Hacker", ignoreCase = true) ||
value.startsWith("Robot", ignoreCase = true)
} ?: false
}

override fun deserialize(
parser: JsonParser,
ctxt: DeserializationContext,
): Any? {
val node: JsonNode = parser.codec.readTree(parser)
val browserNode = (node as? ObjectNode)?.get("browser")
val userAgent =
(browserNode as? ObjectNode)
?.get("user_agent")
?.asText()
?.let(userAgentAnalyzer::parse) ?: return parser.codec.treeToValue(node, Any::class.java)

node.set<ObjectNode>(
"browser",
browserNode.apply {
put("name", userAgent.getValueOrNull(UserAgent.AGENT_NAME))
put("version", userAgent.getValueOrNull(UserAgent.AGENT_VERSION))
},
)

node.set<ObjectNode>(
"device",
ObjectNode(ctxt.nodeFactory).apply {
put("model", userAgent.getValueOrNull(UserAgent.DEVICE_NAME))
put("type", userAgent.getValueOrNull(UserAgent.DEVICE_CLASS))
},
)

node.set<ObjectNode>(
"os",
ObjectNode(ctxt.nodeFactory).apply {
put("name", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME))
put("version", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION))
},
)

node.put("robot", isHackerOrRobot(userAgent))

return parser.codec.treeToValue(node, Any::class.java)
}
}

/**
* Private extension function for [UserAgent] to return `null` instead of "??" for unknown values.
*
* @param fieldName The name of the field to retrieve.
* @return The value of the field, or `null` if the value is "??".
*/
private fun UserAgent.getValueOrNull(fieldName: String): String? {
val value = this.getValue(fieldName)
return if (value == "??") null else value
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package ch.srgssr.pillarbox.monitoring.event.model

import com.fasterxml.jackson.databind.util.StdConverter

/**
* Custom converter for [EventRequest].
*
* This converter enriches the incoming event request data node and applies transformations using
* registered processors before deserializing it into the appropriate format.
*
* If no transformation is needed, the converter returns the data node unmodified.
*
* @see [DataProcessor]
*/
internal class EventRequestDataConverter : StdConverter<EventRequest, EventRequest>() {
private val processors = listOf(UserAgentProcessor(), ErrorProcessor())

@Suppress("UNCHECKED_CAST")
override fun convert(value: EventRequest): EventRequest {
(value.data as? MutableMap<String, Any?>)?.let { data ->
processors
.filter { it.shouldProcess(value.eventName) }
.forEach { processor -> value.data = processor.process(data) }
}

return value
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package ch.srgssr.pillarbox.monitoring.event.model

import nl.basjes.parse.useragent.UserAgent
import nl.basjes.parse.useragent.UserAgentAnalyzer

/**
* A processor that analyzes and enriches user agent within data node.
*
* This processor extracts relevant details from the `user_agent` string using [UserAgentAnalyzer]
* and enriches the data node with structured information about the browser, device, and operating system.
* It also determines whether the user agent belongs to a robot.
*/
internal class UserAgentProcessor : DataProcessor {
companion object {
private val userAgentAnalyzer =
UserAgentAnalyzer
.newBuilder()
.hideMatcherLoadStats()
.withCache(10000)
.build()
}

private fun isHackerOrRobot(userAgent: UserAgent): Boolean =
listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY)
.any { field ->
userAgent.getValue(field)?.let { value ->
value.startsWith("Hacker", ignoreCase = true) ||
value.startsWith("Robot", ignoreCase = true)
} ?: false
}

/**
* Process only on START events.
*/
override fun shouldProcess(eventName: String): Boolean = eventName == "START"

/**
* Processes the given data node to extract and enrich user agent details.
*
* - If the `user_agent` field is found under the `browser` node, it is parsed using [UserAgentAnalyzer].
* - Extracted data is used to populate the `browser`, `device`, and `os` fields with structured information.
* - The `robot` field is set to `true` if the user agent is identified as a robot.
*
* @param data The data node to process.
*
* @return The enriched data node with additional user agent classification.
*/
@Suppress("UNCHECKED_CAST")
override fun process(data: MutableMap<String, Any?>): MutableMap<String, Any?> {
val browserNode = data["browser"] as? MutableMap<String, Any?>
val userAgent = (browserNode?.get("user_agent") as? String)?.let(userAgentAnalyzer::parse) ?: return data

browserNode["name"] = userAgent.getValueOrNull(UserAgent.AGENT_NAME)
browserNode["version"] = userAgent.getValueOrNull(UserAgent.AGENT_VERSION)

data["device"] =
(data["device"] as? MutableMap<String, Any?> ?: mutableMapOf()).also {
it["model"] = userAgent.getValueOrNull(UserAgent.DEVICE_NAME)
it["type"] = userAgent.getValueOrNull(UserAgent.DEVICE_CLASS)
}

data["os"] =
(data["os"] as? MutableMap<String, Any?> ?: mutableMapOf()).also {
it["name"] = userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME)
it["version"] = userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION)
}

data["robot"] = isHackerOrRobot(userAgent)

return data
}
}

/**
* Private extension function for [UserAgent] to return `null` instead of "??" for unknown values.
*
* @param fieldName The name of the field to retrieve.
* @return The value of the field, or `null` if the value is "??".
*/
private fun UserAgent.getValueOrNull(fieldName: String): String? {
val value = this.getValue(fieldName)
return if (value == "??") null else value
}
Loading

0 comments on commit 703a44c

Please sign in to comment.