Skip to content

Commit

Permalink
Debug prints for the demo
Browse files Browse the repository at this point in the history
  • Loading branch information
Waino committed Mar 4, 2024
1 parent 0a09ff2 commit ddf3c26
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
2 changes: 2 additions & 0 deletions mammoth/inputters/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def _tokenize(self, string, side='src'):

def _numericalize(self, tokens, side='src'):
"""Convert list of strings into list of indices"""
print(side, tokens)
vocab = self.vocabs[side]
bos = vocab[DefaultTokens.BOS]
eos = vocab[DefaultTokens.EOS]
Expand All @@ -124,6 +125,7 @@ def _numericalize(self, tokens, side='src'):
*(vocab.stoi.get(token, unk) for token in tokens),
eos,
], device='cpu')
print(indices)
return indices

def to(self, device):
Expand Down
1 change: 1 addition & 0 deletions mammoth/transforms/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def _tokenize(self, tokens, side='src', is_train=False):
segmented = sp_model.encode(
sentence, out_type=str, enable_sampling=True, alpha=alpha, nbest_size=nbest_size
)
print(f'segmented: {segmented}')
return segmented

def apply(self, example, is_train=False, stats=None, **kwargs):
Expand Down

0 comments on commit ddf3c26

Please sign in to comment.