-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtextutil.js
181 lines (168 loc) · 5.14 KB
/
textutil.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
const Ksanapos=require("./ksanapos");
const tt=require("./tokentypes");
const TokenTypes=tt.TokenTypes;
const isPunc=function(c){
const c2tt=this.tokenizer.code2TokenType;
return (c2tt[c]===TokenTypes.PUNC || c2tt[c]===TokenTypes.SPACE);
}
const openbracket=function(s){
return s=='「'||s=='《'||s=='『'||s=='('||s=='〔'||s=='【'||s=='〈';
}
const trimRight=function(str,chcount,tailing) {
if (!str) return "";
var c=chcount,dis=0,t,s=str,code;
t=this.koffset(s,c,tailing);
dis+=t;
return str.substr(0,dis);
}
const trimLeft=function(str,chcount,tailing) {
if (!str) return "";
var c=chcount,dis=0,t,s=str;
t=this.koffset(s,c,tailing);
dis+=t;
return str.substr(dis);
}
//linetpos: tpos of input text
//output linetpos, only works for no breaks,
const layoutText=function(text,startkpos,breaks,linetpos){
var page=0,prevpage=0,lines=[],linetext="",ltpos=[];
var linebreaks=[],pagebreaks=[],kpos=startkpos,nbreak=0;
var nextkpos;//kpos of next line start
for (var i=0;i<text.length;i++) {
nextkpos=advanceLineChar.call(this,startkpos,i+1);
page=this.pageOf(kpos)-1;
if (prevpage!==page) {
while (lines.length>0&&!lines[lines.length-1].trim()) {
linebreaks.pop();
lines.pop(); //remove extra tailing blank lines
ltpos.length&<pos.pop();
}
pagebreaks.push(kpos); //show page break on screen
}
if (breaks) {
var breakcount=0,t=text[i],consumed=0;
//one text line might consist more than one p
while (nbreak<breaks.length&&nextkpos>breaks[nbreak]) {
breakcount++;
const leftpart=trimRight.call(this,text[i],breaks[nbreak]-kpos,true);
lines.push(linetext+leftpart.substr(consumed));
consumed=leftpart.length;
linetext="";
nbreak++;
}
if (!breakcount) {
const delimiter=this.tokenizer.isConcatable(text[i])?"":" ";
linetext+=delimiter+text[i];
} else {
linetext=text[i].substr(consumed);//remaining
}
} else {
lines.push(text[i].replace(/\r?\n/g," "));
linetpos&<pos.push(linetpos[i]);
linebreaks.push(kpos);
}
prevpage=page;
kpos=nextkpos;
}
if (breaks) {
linebreaks=breaks;
linebreaks.unshift(startkpos);
lines.push(linetext);
}
linetpos&<pos.push(linetpos[linetpos.length-1]);//termintor
return {linebreaks:linebreaks,pagebreaks:pagebreaks,lines:lines,linetpos:ltpos};
}
const extractKPos=function(text){
var out={},pat=this.addressPattern,articleOf=this.articleOf.bind(this);
text.replace(this.addressRegex,function(m,m1){
const range=Ksanapos.parse(m1,pat);
if (typeof range!=="undefined") {
var f=articleOf(range);
if (!f.articlename) return;
if (!out[f.articlename]) out[f.articlename]=[];
out[f.articlename].push(range);
}
});
return out;
}
/*
add advline to kpos and return new kpos
advline can be more than maxChar
crossing vol is not allowed
*/
const advanceLineChar=function(kpos,advline,linetext){
const pat=this.addressPattern;
kpos+=advline*pat.maxchar;
if (linetext) {
var arr=Ksanapos.unpack(kpos,pat);
arr[3]=this.kcount(linetext);
return Ksanapos.makeKPos(arr,pat);
} else {
return kpos;
}
}
const parseRange=function(range,pat){
if (typeof pat=="undefined") pat=this.addressPattern;
if (typeof range=="string") {
range=Ksanapos.parse(range,pat);
}
const r=Ksanapos.breakRange(range,pat);
const startarr=Ksanapos.unpack(r.start,pat);
var endarr=Ksanapos.unpack(r.end,pat);
return {startarr:startarr,endarr:endarr,start:r.start,end:r.end,range:range};
}
const kPosUnpack=function(kpos,pat){
pat=pat||this.addressPattern;
const startarr=Ksanapos.unpack(kpos,pat);
return startarr;
}
const bookOf=function(address){
const r=parseRange(address,this.addressPattern);
const arr=kPosUnpack.call(this,r.start);
return arr[0];
}
const pageOf=function(address){
const r=parseRange(address,this.addressPattern);
const arr=kPosUnpack.call(this,r.start);
return arr[1];
}
const bookLineOf=function(address){ //line counting from this book
const r=parseRange(address,this.addressPattern);
const arr=kPosUnpack.call(this,r.start);
return arr[1]*this.addressPattern.maxline+arr[2];
}
const lineOf=function(address){
const r=parseRange(address,this.addressPattern);
const arr=kPosUnpack.call(this,r.start);
return arr[2];
}
const charOf=function(address){
const r=parseRange(address,this.addressPattern);
const arr=kPosUnpack.call(this,r.start);
return arr[3];
}
const pageStart=function(address){//return address of begining of page
const r=parseRange(address,this.addressPattern);
const arr=kPosUnpack.call(this,r.start);
const pat=this.addressPattern;
arr[2]=0;
arr[3]=0;
return Ksanapos.makeKPos(arr,pat);
}
const getParagraphBreaks=function(fields){
var out=[];
if (fields.head) {
const headpos=fields.head.pos.map(function(p){
const r=this.parseRange(p);
return r.start;
}.bind(this));
out=out.concat(headpos);
}
if (fields.p) out=out.concat(fields.p.pos);
out.sort();
return out;
}
module.exports={trimLeft:trimLeft,trimRight:trimRight,parseRange:parseRange,
bookOf:bookOf,pageOf:pageOf,lineOf:lineOf,charOf:charOf,pageStart:pageStart,
bookLineOf:bookLineOf, layoutText:layoutText,isPunc:isPunc,
extractKPos:extractKPos,advanceLineChar:advanceLineChar,getParagraphBreaks:getParagraphBreaks};