-
Notifications
You must be signed in to change notification settings - Fork 46
/
klcpos3.pl
executable file
·43 lines (41 loc) · 1.06 KB
/
klcpos3.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env perl
# Computes KLcpos3 (target, source).
# Copyright © 2018 Dan Zeman <[email protected]>
# License: GNU GPL
use utf8;
use open ':utf8';
binmode(STDIN, ':utf8');
binmode(STDOUT, ':utf8');
binmode(STDERR, ':utf8');
my $tgtdistfile = shift(@ARGV);
my $srcdistfile = shift(@ARGV);
open(TGT, $tgtdistfile) or die("Cannot read $tgtdistfile: $!");
while(<TGT>)
{
s/\r?\n$//;
my ($trigram, $count, $relcount) = split(/\t/, $_);
$tgt{$trigram} = $count;
$rtgt{$trigram} = $relcount;
# Prepare add-1 smoothing for source distribution.
$src{$trigram} = 1;
}
close(TGT);
open(SRC, $srcdistfile) or die("Cannot read $srcdistfile: $!");
while(<SRC>)
{
s/\r?\n$//;
my ($trigram, $count, $relcount) = split(/\t/, $_);
$src{$trigram} += $count;
$srctotal += $src{$trigram};
}
close(SRC);
foreach my $trigram (keys(%src))
{
$rsrc{$trigram} = $src{$trigram}/$srctotal;
}
# Compute KLcpos3(tgt, src).
foreach my $trigram (keys(%tgt))
{
$klcpos3 += $rtgt{$trigram} * log($rtgt{$trigram}/$rsrc{$trigram});
}
print("KLcpos3 = $klcpos3\n");