Skip to content

Commit

Permalink
Merge pull request #111 from studentinovisad/as/fix/search-relevance
Browse files Browse the repository at this point in the history
fix(search): include identifier (index) in order by relevance
  • Loading branch information
aleksasiriski authored Dec 29, 2024
2 parents f77081b + 26b8ceb commit af9046f
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 12 deletions.
17 changes: 13 additions & 4 deletions src/lib/server/db/employee.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ export async function getEmployees(
entryBuilding: employeeEntry.building,
exitTimestamp: max(employeeExit.timestamp),
leastDistance: sqlLeast([
sqlLevenshteinDistance(sqlConcat([employee.fname], ' '), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([employee.lname], ' '), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([employee.identifier]), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([employee.fname]), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([employee.lname]), nonEmptySearchQuery),
sqlLevenshteinDistance(
sqlConcat([employee.fname, employee.lname], ' '),
nonEmptySearchQuery
Expand All @@ -69,7 +70,11 @@ export async function getEmployees(
sqlConcat([employee.lname, employee.fname], ' '),
nonEmptySearchQuery
)
]).as('least_distance')
]).as('least_distance'),
leastDistanceIdentifier: sqlLevenshteinDistance(
sqlConcat([employee.identifier]),
nonEmptySearchQuery
).as('least_distance_identifier')
})
.from(employee)
.leftJoin(maxEntrySubquery, eq(maxEntrySubquery.employeeId, employee.id))
Expand Down Expand Up @@ -107,7 +112,11 @@ export async function getEmployees(
maxEntrySubquery.maxEntryTimestamp,
employeeEntry.building
)
.orderBy(({ leastDistance, identifier }) => [leastDistance, identifier])
.orderBy(({ leastDistance, leastDistanceIdentifier, identifier }) => [
leastDistance,
leastDistanceIdentifier,
identifier
])
.limit(limit)
.offset(offset)
: await db
Expand Down
37 changes: 33 additions & 4 deletions src/lib/server/db/fuzzysearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ type FuzzySearchFiltersOptions = {
substr?: boolean;
};

type LevenshteinOptions = {
insertCost: number;
deleteCost: number;
substitutionCost: number;
};

const defaultLevenshteinOptions: LevenshteinOptions = {
insertCost: 1,
deleteCost: 3,
substitutionCost: 2
};

export function fuzzySearchFilters(
dbFields: Column[],
searchQuery: string,
Expand Down Expand Up @@ -45,6 +57,14 @@ export function fuzzySearchFilters(
* Returns the sql for concatenating multiple columns with a separator using CONCAT_WS
*/
export function sqlConcat(cols: Column[], separator?: string): SQL<Column> {
if (cols.length === 0) {
throw new Error('Passed columns length is 0');
}

if (cols.length === 1) {
return sql<Column>`${cols[0]}`;
}

const sqlCols = cols
.map((col) => sql<Column>`${col}`)
.reduce((prev, curr) => sql`${prev}, ${curr}`);
Expand All @@ -64,13 +84,22 @@ export function sqlLeast(cols: SQL<number>[]): SQL<number> {
/*
* Returns the sql for determining if the levenshtein distance is less than or equal to the passed distance
*/
export function sqlLevenshtein(col: SQL<Column>, input: string, distance: number): SQL<boolean> {
return sql<boolean>`LEVENSHTEIN(LOWER(${col}), LOWER(${input})) <= ${distance}`;
export function sqlLevenshtein(
col: SQL<Column>,
input: string,
distance: number,
opts: LevenshteinOptions = defaultLevenshteinOptions
): SQL<boolean> {
return sql<boolean>`LEVENSHTEIN(LOWER(${input}), LOWER(${col}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost}) <= ${distance}`;
}

/*
* Returns the sql for getting the levenshtein distance
*/
export function sqlLevenshteinDistance(col: SQL<Column>, input: string): SQL<number> {
return sql<number>`LEVENSHTEIN(LOWER(${col}), LOWER(${input}))`;
export function sqlLevenshteinDistance(
col: SQL<Column>,
input: string,
opts: LevenshteinOptions = defaultLevenshteinOptions
): SQL<number> {
return sql<number>`LEVENSHTEIN(LOWER(${input}), LOWER(${col}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost})`;
}
17 changes: 13 additions & 4 deletions src/lib/server/db/student.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ export async function getStudents(
entryBuilding: studentEntry.building,
exitTimestamp: max(studentExit.timestamp),
leastDistance: sqlLeast([
sqlLevenshteinDistance(sqlConcat([student.fname], ' '), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([student.lname], ' '), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([student.index]), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([student.fname]), nonEmptySearchQuery),
sqlLevenshteinDistance(sqlConcat([student.lname]), nonEmptySearchQuery),
sqlLevenshteinDistance(
sqlConcat([student.fname, student.lname], ' '),
nonEmptySearchQuery
Expand All @@ -69,7 +70,11 @@ export async function getStudents(
sqlConcat([student.lname, student.fname], ' '),
nonEmptySearchQuery
)
]).as('least_distance')
]).as('least_distance'),
leastDistanceIdentifier: sqlLevenshteinDistance(
sqlConcat([student.index]),
nonEmptySearchQuery
).as('least_distance_identifier')
})
.from(student)
.leftJoin(maxEntrySubquery, eq(maxEntrySubquery.studentId, student.id))
Expand Down Expand Up @@ -105,7 +110,11 @@ export async function getStudents(
maxEntrySubquery.maxEntryTimestamp,
studentEntry.building
)
.orderBy(({ leastDistance, index }) => [leastDistance, index])
.orderBy(({ leastDistance, leastDistanceIdentifier, index }) => [
leastDistance,
leastDistanceIdentifier,
index
])
.limit(limit)
.offset(offset)
: await db
Expand Down

0 comments on commit af9046f

Please sign in to comment.