diff --git a/README.md b/README.md index 51ddffe..3bb6837 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ For the exact syntax to use these commands, see [the relevant section](#commands Robot stores three types of information: - Configuration details. This includes things like channels to connect to, how frequently to send messages, and who has certain [privileges](#privileges) (including "privacy" privileges). For the most part, this information is relevant only to bot owners, broadcasters, and mods. -- Fifteen-minute history. Robot records all chat messages received in the last fifteen minutes, storing the username of the sender, the channel it was sent to, the time it was received, and the full message text. Robot uses this information to delete messages it's learned under [certain circumstances](#tools-for-broadcasters-and-mods). Whenever Robot receives a new message, all records older than fifteen minutes are removed. Robot also records the messages it's generated in the last fifteen minutes. +- Fifteen-minute history. Robot records all chat messages received in the last fifteen minutes, storing a hash identifying the sender, the channel it was sent to, the time it was received, and the full message text. Robot uses this information to delete messages it's learned under [certain circumstances](#tools-for-broadcasters-and-mods). Whenever Robot receives a new message, all records older than fifteen minutes are removed. Robot also records the messages it's generated in the last fifteen minutes. - Markov chain tuples. This is the majority of Robot's data, a simple list of prefix and suffix words tagged with the location that prefix and suffix may be used. This data is anonymous; Robot does not know who sent the messages that were used to obtain this information. If you want Robot not to record information from you for any reason, contact the bot owner asking to be given privacy privileges. Ask the broadcaster how to reach the bot owner if you aren't sure. Once you're set up to be private, none of your messages will enter her history or Markov chain data. @@ -168,7 +168,7 @@ Robot's database tables are: - `history` - messages learned from in the last fifteen minutes + `tid` - Twitch IRC message ID + `time` - timestamp of message receipt - + `sender` - username of the message sender + + `senderh` - hash corresponding to the message sender + `chan` - channel received in + `tag` - tag used to learn the message + `msg` - message text diff --git a/brain/brain.go b/brain/brain.go index bf12978..4921c9f 100644 --- a/brain/brain.go +++ b/brain/brain.go @@ -25,6 +25,7 @@ package brain import ( "context" + "crypto/sha256" "database/sql" "fmt" "strings" @@ -69,8 +70,8 @@ type brainStmts struct { // should be used with Exec in a Tx with record. learn *sql.Stmt // record is the statement to add a message to the history. Parameters are, - // in order, id, time, sender, channel, tag, message. This statement should - // be used with Exec in a Tx with learn. + // in order, id, time, sender's user hash, channel, tag, message. This + // statement should be used with Exec in a Tx with learn. record *sql.Stmt // think is the statements to match a tuple and retrieve suffixes. First // parameter is the tag, then up to (order) more for the tuple. This @@ -91,11 +92,12 @@ type brainStmts struct { // with QueryRow. The result is the rowid, tag, and message. Generally this // statement would be paired with forgets and an expunge in a Tx. historyID *sql.Stmt - // historyName is the statement to select all messages from history by - // sender name. The parameters are channel and name. This statement should - // be used with Query. The results are rowid, tag, and message. Generally - // this statement would be paired with forgets and expunges in a Tx. - historyName *sql.Stmt + // historyHash is the statement to select all messages from history by + // sender user hash. The parameters are channel and name. This statement + // should be used with Query. The results are rowid, tag, and message. + // Generally this statement would be paired with forgets and expunges in a + // Tx. + historyHash *sql.Stmt // historyPattern is the statement to select all messages from history by // partial message text. The parameters are the channel and message // pattern. This statement should be used with Query. The results are @@ -251,7 +253,7 @@ CREATE TABLE IF NOT EXISTS history ( id INTEGER PRIMARY KEY ASC, tid TEXT, -- message id from Twitch tags time DATETIME NOT NULL, -- message timestamp - sender TEXT NOT NULL, -- name of sender converted to lowercase + senderh BLOB(32) NOT NULL, -- hashed name of sender chan TEXT NOT NULL, tag TEXT NOT NULL, -- tag used to learn this message msg TEXT NOT NULL @@ -274,7 +276,7 @@ CREATE TABLE IF NOT EXISTS emotes ( weight INTEGER NOT NULL DEFAULT 1 ); CREATE INDEX IF NOT EXISTS history_id_index ON history(tid); -CREATE INDEX IF NOT EXISTS history_sender_index ON history(chan, sender); +CREATE INDEX IF NOT EXISTS history_senderh_index ON history(chan, senderh); CREATE TRIGGER IF NOT EXISTS history_limit AFTER INSERT ON history BEGIN DELETE FROM history WHERE strftime('%s', time) < strftime('%s', 'now', '-15 minutes'); END; @@ -334,7 +336,7 @@ func prepStmts(ctx context.Context, db *sql.DB, order int) brainStmts { if err != nil { panic(err) } - stmts.record, err = db.PrepareContext(ctx, `INSERT INTO history (tid, time, sender, chan, tag, msg) VALUES (?, ?, ?, ?, ?, ?);`) + stmts.record, err = db.PrepareContext(ctx, `INSERT INTO history (tid, time, senderh, chan, tag, msg) VALUES (?, ?, ?, ?, ?, ?);`) if err != nil { panic(err) } @@ -362,7 +364,7 @@ func prepStmts(ctx context.Context, db *sql.DB, order int) brainStmts { if err != nil { panic(err) } - stmts.historyName, err = db.PrepareContext(ctx, `SELECT id, tag, msg FROM history WHERE chan=? AND sender=?`) + stmts.historyHash, err = db.PrepareContext(ctx, `SELECT id, tag, msg FROM history WHERE chan=? AND senderh=?`) if err != nil { panic(err) } @@ -401,3 +403,12 @@ func (b *Brain) Query(ctx context.Context, query string, args ...interface{}) (* func (b *Brain) QueryRow(ctx context.Context, query string, args ...interface{}) *sql.Row { return b.db.QueryRowContext(ctx, query, args...) } + +// UserHash obfuscates a username for inclusion in history. +func UserHash(channel, name string) [32]byte { + var b [64]byte + name = strings.ToLower(name) + copy(b[:32], name) + copy(b[32:], channel) + return sha256.Sum256(b[:]) +} diff --git a/brain/forget.go b/brain/forget.go index fd6ff37..7b2bd6b 100644 --- a/brain/forget.go +++ b/brain/forget.go @@ -61,7 +61,8 @@ func (b *Brain) ClearChat(ctx context.Context, channel, user string) error { } expunge := tx.StmtContext(ctx, b.stmts.expunge) forget := tx.StmtContext(ctx, b.stmts.forget) - rows, err := tx.StmtContext(ctx, b.stmts.historyName).QueryContext(ctx, channel, user) + h := UserHash(channel, user) + rows, err := tx.StmtContext(ctx, b.stmts.historyHash).QueryContext(ctx, channel, h[:]) if err != nil { tx.Rollback() return err diff --git a/brain/learn.go b/brain/learn.go index 09b01c4..94a44d9 100644 --- a/brain/learn.go +++ b/brain/learn.go @@ -74,8 +74,9 @@ func (b *Brain) Learn(ctx context.Context, msg irc.Message) error { return fmt.Errorf("Learn: error learning end-of-message %+v: %w", args, err) } // Add the message to history. + h := UserHash(channel, msg.Nick) id, _ := msg.Tag("id") - if _, err := tx.StmtContext(ctx, b.stmts.record).ExecContext(ctx, id, msg.Time, msg.Nick, channel, tag, msg.Trailing); err != nil { + if _, err := tx.StmtContext(ctx, b.stmts.record).ExecContext(ctx, id, msg.Time, h[:], channel, tag, msg.Trailing); err != nil { tx.Rollback() return fmt.Errorf("Learn: error recording message: %w", err) }