-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathencode.py
50 lines (41 loc) · 1.36 KB
/
encode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import argparse
import himitsu
def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Encode a secret message into cover text.")
parser.add_argument(
"secret", type=str, help="A secret message to encode. It should be a string of 0s and 1s."
)
parser.add_argument(
"--language",
type=str,
default="en",
choices=["en", "ja", "ru"],
help="Language of the GPT-2 model",
)
parser.add_argument(
"--prompt", type=str, default="Hi Bob.", help="A context prompt used to generate cover text"
)
parser.add_argument("--device", type=str, default="cpu", help="Device to use")
parser.add_argument(
"--min-prob",
type=float,
default=0.01,
help="a minimum probability for a token to be a generated candidate",
)
return parser
def main():
args = get_parser().parse_args()
model = himitsu.load_model(args.language, args.device)
tokenizer, byte_level_vocab, special_tokens = himitsu.load_tokenizer(args.language)
encoded = himitsu.encode(
model=model,
tokenizer=tokenizer,
secret=args.secret,
prompt=args.prompt,
min_prob=args.min_prob,
special_tokens=special_tokens,
byte_level_vocab=byte_level_vocab,
)
print(encoded)
if __name__ == "__main__":
main()