Skip to content

Commit

Permalink
brain: use a hash instead of sender username in history
Browse files Browse the repository at this point in the history
Updates #4.
  • Loading branch information
zephyrtronium committed Oct 11, 2020
1 parent 3c1b86c commit eff5311
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 15 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ For the exact syntax to use these commands, see [the relevant section](#commands
Robot stores three types of information:

- Configuration details. This includes things like channels to connect to, how frequently to send messages, and who has certain [privileges](#privileges) (including "privacy" privileges). For the most part, this information is relevant only to bot owners, broadcasters, and mods.
- Fifteen-minute history. Robot records all chat messages received in the last fifteen minutes, storing the username of the sender, the channel it was sent to, the time it was received, and the full message text. Robot uses this information to delete messages it's learned under [certain circumstances](#tools-for-broadcasters-and-mods). Whenever Robot receives a new message, all records older than fifteen minutes are removed. Robot also records the messages it's generated in the last fifteen minutes.
- Fifteen-minute history. Robot records all chat messages received in the last fifteen minutes, storing a hash identifying the sender, the channel it was sent to, the time it was received, and the full message text. Robot uses this information to delete messages it's learned under [certain circumstances](#tools-for-broadcasters-and-mods). Whenever Robot receives a new message, all records older than fifteen minutes are removed. Robot also records the messages it's generated in the last fifteen minutes.
- Markov chain tuples. This is the majority of Robot's data, a simple list of prefix and suffix words tagged with the location that prefix and suffix may be used. This data is anonymous; Robot does not know who sent the messages that were used to obtain this information.

If you want Robot not to record information from you for any reason, contact the bot owner asking to be given privacy privileges. Ask the broadcaster how to reach the bot owner if you aren't sure. Once you're set up to be private, none of your messages will enter her history or Markov chain data.
Expand Down Expand Up @@ -168,7 +168,7 @@ Robot's database tables are:
- `history` - messages learned from in the last fifteen minutes
+ `tid` - Twitch IRC message ID
+ `time` - timestamp of message receipt
+ `sender` - username of the message sender
+ `senderh` - hash corresponding to the message sender
+ `chan` - channel received in
+ `tag` - tag used to learn the message
+ `msg` - message text
Expand Down
33 changes: 22 additions & 11 deletions brain/brain.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package brain

import (
"context"
"crypto/sha256"
"database/sql"
"fmt"
"strings"
Expand Down Expand Up @@ -69,8 +70,8 @@ type brainStmts struct {
// should be used with Exec in a Tx with record.
learn *sql.Stmt
// record is the statement to add a message to the history. Parameters are,
// in order, id, time, sender, channel, tag, message. This statement should
// be used with Exec in a Tx with learn.
// in order, id, time, sender's user hash, channel, tag, message. This
// statement should be used with Exec in a Tx with learn.
record *sql.Stmt
// think is the statements to match a tuple and retrieve suffixes. First
// parameter is the tag, then up to (order) more for the tuple. This
Expand All @@ -91,11 +92,12 @@ type brainStmts struct {
// with QueryRow. The result is the rowid, tag, and message. Generally this
// statement would be paired with forgets and an expunge in a Tx.
historyID *sql.Stmt
// historyName is the statement to select all messages from history by
// sender name. The parameters are channel and name. This statement should
// be used with Query. The results are rowid, tag, and message. Generally
// this statement would be paired with forgets and expunges in a Tx.
historyName *sql.Stmt
// historyHash is the statement to select all messages from history by
// sender user hash. The parameters are channel and name. This statement
// should be used with Query. The results are rowid, tag, and message.
// Generally this statement would be paired with forgets and expunges in a
// Tx.
historyHash *sql.Stmt
// historyPattern is the statement to select all messages from history by
// partial message text. The parameters are the channel and message
// pattern. This statement should be used with Query. The results are
Expand Down Expand Up @@ -251,7 +253,7 @@ CREATE TABLE IF NOT EXISTS history (
id INTEGER PRIMARY KEY ASC,
tid TEXT, -- message id from Twitch tags
time DATETIME NOT NULL, -- message timestamp
sender TEXT NOT NULL, -- name of sender converted to lowercase
senderh BLOB(32) NOT NULL, -- hashed name of sender
chan TEXT NOT NULL,
tag TEXT NOT NULL, -- tag used to learn this message
msg TEXT NOT NULL
Expand All @@ -274,7 +276,7 @@ CREATE TABLE IF NOT EXISTS emotes (
weight INTEGER NOT NULL DEFAULT 1
);
CREATE INDEX IF NOT EXISTS history_id_index ON history(tid);
CREATE INDEX IF NOT EXISTS history_sender_index ON history(chan, sender);
CREATE INDEX IF NOT EXISTS history_senderh_index ON history(chan, senderh);
CREATE TRIGGER IF NOT EXISTS history_limit AFTER INSERT ON history BEGIN
DELETE FROM history WHERE strftime('%s', time) < strftime('%s', 'now', '-15 minutes');
END;
Expand Down Expand Up @@ -334,7 +336,7 @@ func prepStmts(ctx context.Context, db *sql.DB, order int) brainStmts {
if err != nil {
panic(err)
}
stmts.record, err = db.PrepareContext(ctx, `INSERT INTO history (tid, time, sender, chan, tag, msg) VALUES (?, ?, ?, ?, ?, ?);`)
stmts.record, err = db.PrepareContext(ctx, `INSERT INTO history (tid, time, senderh, chan, tag, msg) VALUES (?, ?, ?, ?, ?, ?);`)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -362,7 +364,7 @@ func prepStmts(ctx context.Context, db *sql.DB, order int) brainStmts {
if err != nil {
panic(err)
}
stmts.historyName, err = db.PrepareContext(ctx, `SELECT id, tag, msg FROM history WHERE chan=? AND sender=?`)
stmts.historyHash, err = db.PrepareContext(ctx, `SELECT id, tag, msg FROM history WHERE chan=? AND senderh=?`)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -401,3 +403,12 @@ func (b *Brain) Query(ctx context.Context, query string, args ...interface{}) (*
func (b *Brain) QueryRow(ctx context.Context, query string, args ...interface{}) *sql.Row {
return b.db.QueryRowContext(ctx, query, args...)
}

// UserHash obfuscates a username for inclusion in history.
func UserHash(channel, name string) [32]byte {
var b [64]byte
name = strings.ToLower(name)
copy(b[:32], name)
copy(b[32:], channel)
return sha256.Sum256(b[:])
}
3 changes: 2 additions & 1 deletion brain/forget.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ func (b *Brain) ClearChat(ctx context.Context, channel, user string) error {
}
expunge := tx.StmtContext(ctx, b.stmts.expunge)
forget := tx.StmtContext(ctx, b.stmts.forget)
rows, err := tx.StmtContext(ctx, b.stmts.historyName).QueryContext(ctx, channel, user)
h := UserHash(channel, user)
rows, err := tx.StmtContext(ctx, b.stmts.historyHash).QueryContext(ctx, channel, h[:])
if err != nil {
tx.Rollback()
return err
Expand Down
3 changes: 2 additions & 1 deletion brain/learn.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ func (b *Brain) Learn(ctx context.Context, msg irc.Message) error {
return fmt.Errorf("Learn: error learning end-of-message %+v: %w", args, err)
}
// Add the message to history.
h := UserHash(channel, msg.Nick)
id, _ := msg.Tag("id")
if _, err := tx.StmtContext(ctx, b.stmts.record).ExecContext(ctx, id, msg.Time, msg.Nick, channel, tag, msg.Trailing); err != nil {
if _, err := tx.StmtContext(ctx, b.stmts.record).ExecContext(ctx, id, msg.Time, h[:], channel, tag, msg.Trailing); err != nil {
tx.Rollback()
return fmt.Errorf("Learn: error recording message: %w", err)
}
Expand Down

0 comments on commit eff5311

Please sign in to comment.