Skip to content

Commit

Permalink
Merge "Add totalResources to index search"
Browse files Browse the repository at this point in the history
  • Loading branch information
margaretha authored and Gerrit Code Review committed Aug 4, 2023
2 parents 0c8d83d + 7f08b35 commit 2da6b4c
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 42 deletions.
3 changes: 3 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
0.61.3 2023-07-11
- Add totalResources to results (diewald)

0.61.2 2023-04-05
- [bugfix] Fix pagebreak retrieval (margaretha, diewald)
- [feature] Support token lists for match infos (solved #88,
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
<version>0.61.2</version>
<version>0.61.3</version>
<packaging>jar</packaging>

<name>Krill</name>
Expand Down
94 changes: 56 additions & 38 deletions src/main/java/de/ids_mannheim/korap/KrillIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,6 @@ public FieldDocument upsertDoc (FieldDocument doc) {
if (this.reader.getRefCount() == 0) {
// Retry update
// System.err.println("Retry update!");
break;
};
*/
Expand Down Expand Up @@ -1416,7 +1415,8 @@ public Result search (Krill ks) {
};

// Some initializations ...
int i = 0;
int i = 0; // matchcount
int j = 0; // matchdoccount
int startIndex = kr.getStartIndex();
int count = kr.getItemsPerPage();
int hits = kr.getItemsPerPage() + startIndex;
Expand Down Expand Up @@ -1464,7 +1464,6 @@ public Result search (Krill ks) {
if (DEBUG)
log.trace("Rewritten query is {}", query.toString());


// Todo: run this in a separated thread
for (LeafReaderContext atomic : this.reader().leaves()) {

Expand All @@ -1479,9 +1478,8 @@ public Result search (Krill ks) {
*/
final FixedBitSet bitset = collection.bits(atomic);

if (bitset.nextSetBit(0) == DocIdSetIterator.NO_MORE_DOCS) {
if (bitset.nextSetBit(0) == DocIdSetIterator.NO_MORE_DOCS)
continue;
};

final PositionsToOffset pto = snippets ? new PositionsToOffset(atomic, field) : null;

Expand All @@ -1497,11 +1495,14 @@ public Result search (Krill ks) {

if (DEBUG)
log.trace("Match Nr {}/{}", i, count);

// There are no more spans to find
if (!spans.next())
break;

// Increment resource counter
itemsPerResourceCounter++;

// Timeout!
if (tthread.getTime() > timeout) {
kr.setTimeExceeded(true);
Expand All @@ -1511,30 +1512,38 @@ public Result search (Krill ks) {

localDocID = spans.doc();

// Count hits per resource
if (itemsPerResource > 0) {
// IDS are identical
if (localDocID == oldLocalDocID
|| oldLocalDocID == -1) {

// IDS are identical
if (localDocID == oldLocalDocID
|| oldLocalDocID == -1) {
if (itemsPerResourceCounter++ >= itemsPerResource) {
// Count hits per resource
if (itemsPerResource > 0) {

// End of resourcecounter is reached
if (itemsPerResourceCounter > itemsPerResource) {

// Skip to next resource
if (spans.skipTo(localDocID + 1) != true) {
break;
}
else {
itemsPerResourceCounter = 1;
localDocID = spans.doc();
};

itemsPerResourceCounter = 1;
localDocID = spans.doc();
};
}
}

// Reset counter
else
itemsPerResourceCounter = 0;
// localDoc is new
else
itemsPerResourceCounter = 1;

oldLocalDocID = localDocID;
};

if (itemsPerResourceCounter == 1)
j++;

oldLocalDocID = localDocID;


// The next matches are not yet part of the result
if (startIndex > i)
continue;
Expand Down Expand Up @@ -1612,34 +1621,42 @@ public Result search (Krill ks) {
break;
};

// Count hits per resource
if (itemsPerResource > 0) {
localDocID = spans.doc();
// Increment resource counter
itemsPerResourceCounter++;

localDocID = spans.doc();

if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
break;
if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
break;

// IDS are identical
if (localDocID == oldLocalDocID
|| oldLocalDocID == -1) {
if (localDocID == -1)
break;
// IDS are identical
if (localDocID == oldLocalDocID
|| oldLocalDocID == -1) {

if (localDocID == -1)
break;

// Count hits per resource
if (itemsPerResource > 0) {

if (itemsPerResourceCounter++ >= itemsPerResource) {
// End of resourcecounter is reached
if (itemsPerResourceCounter > itemsPerResource) {
if (spans.skipTo(localDocID + 1) != true) {
break;
};
itemsPerResourceCounter = 1;
localDocID = spans.doc();
};
}
}
// Reset counter
else
itemsPerResourceCounter = 1;

// Reset counter
else
itemsPerResourceCounter = 0;

oldLocalDocID = localDocID;
};
if (itemsPerResourceCounter == 1)
j++;

oldLocalDocID = localDocID;
i++;
};
atomicMatches.clear();
Expand All @@ -1649,6 +1666,7 @@ public Result search (Krill ks) {
kr.setItemsPerResource(itemsPerResource);

kr.setTotalResults(cutoff ? (long) -1 : (long) i);
kr.setTotalResources(cutoff ? (long) -1 : (long) j);
}

catch (IOException e) {
Expand Down
31 changes: 28 additions & 3 deletions src/test/java/de/ids_mannheim/korap/search/TestKrill.java
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,12 @@ public void searchJSONitemsPerResource () throws IOException {
String json = getJsonString(getClass()
.getResource("/queries/bsp-itemsPerResource.jsonld").getFile());

Krill ks = new Krill(json);
Result kr = ks.apply(ki);
Krill ks;
Result kr;
KrillMeta meta;

ks = new Krill(json);
kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 10);
assertEquals(0, kr.getStartIndex());
assertEquals(20, kr.getItemsPerPage());
Expand All @@ -452,6 +456,7 @@ public void searchJSONitemsPerResource () throws IOException {
assertEquals("WPD_AAA.00002", kr.getMatch(7).getDocID());
assertEquals("WPD_AAA.00002", kr.getMatch(8).getDocID());
assertEquals("WPD_AAA.00004", kr.getMatch(9).getDocID());
assertEquals(kr.getTotalResources(), 3);

ks = new Krill(json);
ks.getMeta().setItemsPerResource(1);
Expand All @@ -463,6 +468,7 @@ public void searchJSONitemsPerResource () throws IOException {
assertEquals("WPD_AAA.00004", kr.getMatch(2).getDocID());

assertEquals(kr.getTotalResults(), 3);
assertEquals(kr.getTotalResources(), 3);
assertEquals(0, kr.getStartIndex());
assertEquals(20, kr.getItemsPerPage());

Expand All @@ -478,11 +484,12 @@ public void searchJSONitemsPerResource () throws IOException {
assertEquals("WPD_AAA.00004", kr.getMatch(4).getDocID());

assertEquals(kr.getTotalResults(), 5);
assertEquals(kr.getTotalResources(), 3);
assertEquals(0, kr.getStartIndex());
assertEquals(20, kr.getItemsPerPage());

ks = new Krill(json);
KrillMeta meta = ks.getMeta();
meta = ks.getMeta();
meta.setItemsPerResource(1);
meta.setStartIndex(1);
meta.setCount(1);
Expand All @@ -492,10 +499,28 @@ public void searchJSONitemsPerResource () throws IOException {
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());

assertEquals(kr.getTotalResults(), 3);
assertEquals(kr.getTotalResources(), 3);
assertEquals(1, kr.getStartIndex());
assertEquals(1, kr.getItemsPerPage());

assertEquals((short) 1, kr.getItemsPerResource());

ks = new Krill(json);
meta = ks.getMeta();
meta.setItemsPerResource(2);
meta.setStartIndex(2);
meta.setCount(1);

kr = ks.apply(ki);

assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());

assertEquals(kr.getTotalResults(), 5);
assertEquals(kr.getTotalResources(), 3);
assertEquals(2, kr.getStartIndex());
assertEquals(1, kr.getItemsPerPage());


};


Expand Down

0 comments on commit 2da6b4c

Please sign in to comment.