Skip to content

Commit

Permalink
capture all terminators and quotes in the sentence (#360)
Browse files Browse the repository at this point in the history
* capture all terminators and quotes in the sentence

* fix negative position, add documents and tests

* fix comments giving wrong semantics

* add test case coverage

* remove cursor namespace for possible performance penalties

* while loop optimization
  • Loading branch information
Casheeew authored Dec 19, 2023
1 parent c661eaf commit 521e87d
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 16 deletions.
65 changes: 49 additions & 16 deletions ext/js/dom/document-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,27 +113,45 @@ export class DocumentUtil {
const text = source.text();
const textLength = text.length;
const textEndAnchor = textLength - endLength;
let pos1 = startLength;
let pos2 = textEndAnchor;

/** Relative start position of the sentence (inclusive). */
let cursorStart = startLength;
/** Relative end position of the sentence (exclusive). */
let cursorEnd = textEndAnchor;

// Move backward
let quoteStack = [];
for (; pos1 > 0; --pos1) {
const c = text[pos1 - 1];
for (; cursorStart > 0; --cursorStart) {
// Check if the previous character should be included.
let c = text[cursorStart - 1];
if (c === '\n' && terminateAtNewlines) { break; }

if (quoteStack.length === 0) {
const terminatorInfo = terminatorMap.get(c);
let terminatorInfo = terminatorMap.get(c);
if (typeof terminatorInfo !== 'undefined') {
if (terminatorInfo[0]) { --pos1; }
// Include the previous character while it is a terminator character and is included at start.
while (terminatorInfo[0] && cursorStart > 0) {
--cursorStart;
if (cursorStart === 0) { break; }
c = text[cursorStart - 1];
terminatorInfo = terminatorMap.get(c);
if (typeof terminatorInfo === 'undefined') { break; }
}
break;
}
}

let quoteInfo = forwardQuoteMap.get(c);
if (typeof quoteInfo !== 'undefined') {
if (quoteStack.length === 0) {
if (quoteInfo[1]) { --pos1; }
// Include the previous character while it is a quote character and is included at start.
while (quoteInfo[1] && cursorStart > 0) {
--cursorStart;
if (cursorStart === 0) { break; }
c = text[cursorStart - 1];
quoteInfo = forwardQuoteMap.get(c);
if (typeof quoteInfo === 'undefined') { break; }
}
break;
} else if (quoteStack[0] === c) {
quoteStack.pop();
Expand All @@ -149,22 +167,37 @@ export class DocumentUtil {

// Move forward
quoteStack = [];
for (; pos2 < textLength; ++pos2) {
const c = text[pos2];
for (; cursorEnd < textLength; ++cursorEnd) {
// Check if the following character should be included.
let c = text[cursorEnd];
if (c === '\n' && terminateAtNewlines) { break; }

if (quoteStack.length === 0) {
const terminatorInfo = terminatorMap.get(c);
let terminatorInfo = terminatorMap.get(c);
if (typeof terminatorInfo !== 'undefined') {
if (terminatorInfo[1]) { ++pos2; }
// Include the following character while it is a terminator character and is included at end.
while (terminatorInfo[1] && cursorEnd < textLength) {
++cursorEnd;
if (cursorEnd === textLength) { break; }
c = text[cursorEnd];
terminatorInfo = terminatorMap.get(c);
if (typeof terminatorInfo === 'undefined') { break; }
}
break;
}
}

let quoteInfo = backwardQuoteMap.get(c);
if (typeof quoteInfo !== 'undefined') {
if (quoteStack.length === 0) {
if (quoteInfo[1]) { ++pos2; }
// Include the following character while it is a quote character and is included at end.
while (quoteInfo[1] && cursorEnd < textLength) {
++cursorEnd;
if (cursorEnd === textLength) { break; }
c = text[cursorEnd];
quoteInfo = forwardQuoteMap.get(c);
if (typeof quoteInfo === 'undefined') { break; }
}
break;
} else if (quoteStack[0] === c) {
quoteStack.pop();
Expand All @@ -179,13 +212,13 @@ export class DocumentUtil {
}

// Trim whitespace
for (; pos1 < startLength && this._isWhitespace(text[pos1]); ++pos1) { /* NOP */ }
for (; pos2 > textEndAnchor && this._isWhitespace(text[pos2 - 1]); --pos2) { /* NOP */ }
for (; cursorStart < startLength && this._isWhitespace(text[cursorStart]); ++cursorStart) { /* NOP */ }
for (; cursorEnd > textEndAnchor && this._isWhitespace(text[cursorEnd - 1]); --cursorEnd) { /* NOP */ }

// Result
return {
text: text.substring(pos1, pos2),
offset: startLength - pos1
text: text.substring(cursorStart, cursorEnd),
offset: startLength - cursorStart
};
}

Expand Down
32 changes: 32 additions & 0 deletions test/data/html/test-document1.html
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,38 @@ <h1>Yomitan Tests</h1>
<span>ありがとございます。ありがとございます。</span>
</div>

<div
class="test"
data-test-type="scan"
data-element-from-point-selector="span"
data-caret-range-from-point-selector="span"
data-start-node-selector="span"
data-start-offset="4"
data-end-node-selector="span"
data-end-offset="4"
data-result-type="TextSourceRange"
data-sentence-scan-extent="100"
data-sentence="ありがとございます。!?"
>
<span>ありがとございます。!?ありがとございます。!?</span>
</div>

<div
class="test"
data-test-type="scan"
data-element-from-point-selector="span"
data-caret-range-from-point-selector="span"
data-start-node-selector="span"
data-start-offset="4"
data-end-node-selector="span"
data-end-offset="4"
data-result-type="TextSourceRange"
data-sentence-scan-extent="100"
data-sentence="ありがとございます!!!"
>
<span>ありがとございます!!!ありがとございます!!!</span>
</div>

<div
class="test"
data-test-type="scan"
Expand Down

0 comments on commit 521e87d

Please sign in to comment.