Skip to content

Commit

Permalink
Merge pull request #333 from ubermichael/main
Browse files Browse the repository at this point in the history
Experiment with similar titles
  • Loading branch information
ubermichael authored Aug 8, 2022
2 parents 53d4b11 + 255c272 commit 5075038
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 2 deletions.
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"twig/cssinliner-extra": "^3.3",
"twig/extra-bundle": "^2.12|^3.0",
"twig/inky-extra": "^3.3",
"twig/intl-extra": "^3.4",
"twig/markdown-extra": "^3.3",
"twig/string-extra": "^3.0",
"twig/twig": "^2.12|^3.0",
Expand Down
71 changes: 70 additions & 1 deletion composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion src/Controller/TitleController.php
Original file line number Diff line number Diff line change
Expand Up @@ -313,14 +313,17 @@ public function importMarcAction(Request $request, EstcMarcImporter $importer, $
* @Template
*
* @return array<string,mixed> */
public function showAction(Title $title, SourceLinker $linker) {
public function showAction(Title $title, SourceLinker $linker, TitleRepository $repo) {
if ( ! $this->getUser() && ! $title->getFinalattempt() && ! $title->getFinalcheck()) {
throw new AccessDeniedHttpException('This title has not been verified and is not available to the public.');
}

$similar = $repo->moreLike($title);

return [
'title' => $title,
'linker' => $linker,
'similar' => $similar,
];
}

Expand Down
39 changes: 39 additions & 0 deletions src/Repository/TitleRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use Doctrine\ORM\Query;
use Doctrine\ORM\QueryBuilder;
use Doctrine\Persistence\ManagerRegistry;
use Symfony\Component\Console\Input\InputOption;

/**
* Title Repository.
Expand Down Expand Up @@ -90,6 +91,44 @@ public function typeaheadQuery($q) {
return $qb->getQuery()->execute();
}

/**
* @param Title $title
*
* @return array
*/
public function moreLike(Title $title) {
$qb = $this->createQueryBuilder('title');
$qb->addSelect('MATCH(title.title) AGAINST (:title BOOLEAN) AS score');
$qb->setParameter('title', $title->getTitle());
$qb->andHaving('score > 5.0');
$qb->orderBy('score', 'desc');
$result = $qb->getQuery()->execute();

// MySQL's full text indexing is good, but not good enough for this. It
// finds a lot of false positives, so filter them out with a quick
// levenshtein().
$similar = [];
// No mb_substr here, as levenshtein() is hard-limited to 255 bytes!
$t1 = substr($title->getTitle(), 0, 255);
foreach($result as $row) {
$t = $row[0];
if($t->getId() === $title->getId()) {
continue;
}
// No mb_substr here, as levenshtein() is hard-limited to 255 bytes!
$t2 = substr($t->getTitle(), 0, 255);
$lev = 1.0 - levenshtein($t1, $t2) / max(mb_strlen($t1), mb_strlen($t2));
if($lev > 0.3) {
$similar[] = [
'title' => $t,
'score' => $row['score'],
'lev' => $lev,
];
}
}
return $similar;
}

/**
* Build a complex search query from form data.
*
Expand Down
70 changes: 70 additions & 0 deletions templates/title/show.html.twig
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,76 @@
{% endembed %}
</div>

{% if is_granted('ROLE_USER') %}
<div class='similarity'>
<h2>Similar Titles</h2>
{% set count = similar | length %}
{% if count > 0 %}
<p><i>These titles are selected from the database automatically,
based on text analysis. The list may be incomplete or contain
unrelated titles.</i></p>
<table class='table'>
<thead>
<th>ID</th>
<th>Title</th>
<th>Contributors</th>
<th>Firms</th>
<th>Edition</th>
{% if is_granted('ROLE_ADMIN') %}
<th></th>
{% endif %}
</thead>
<tbody>
{% for row in similar %}
{% set title = row.title %}
<tr>
<td>
<a href="{{ path('title_show', { 'id': title.id }) }}">
{{ title.id }}
</a>
</td>
<td>
<a href="{{ path('title_show', { 'id': title.id }) }}">
{{ title.title }}
</a>
</td>
<td>
{% for tr in title.titleRoles|slice(0,title_index_authors) %}
{% set person = tr.person %}
<a href="{{ path('person_show', {'id': person.id}) }}">{{ person.lastName }}
, {{ person.firstName }}</a>
({{ tr.role }})
<br/>
{% endfor %}
{% if title.titleRoles|length > title_index_authors %}and {{ title.titleRoles|length - title_index_authors }} more.{% endif %}
</td>
<td>
{% for tfr in title.titleFirmRoles|slice(0,title_index_authors) %}
{% set firm = tfr.firm %}
<a href="{{ path('firm_show', {'id': firm.id}) }}">{{ firm.name }}</a>
({{ tfr.firmrole }})
<br/>
{% endfor %}
{% if title.titleFirmRoles|length > title_index_authors %}and {{ title.titleFirmRoles|length - title_index_authors }} more.{% endif %}
</td>
<td class="numeric">{{ title.pubdate }}</td>
<td class="numeric">{{ title.editionNumber }}</td>
{% if is_granted('ROLE_ADMIN') %}
<td class='numeric'>
{{ row.score|format_number({fraction_digit: 1}) }}<br>
{{ row.lev|format_number(style='percent') }}
</td>
{% endif %}
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p><i>No similar titles found in the database.</i></p>
{% endif %}
</div>
{% endif %}

{% set relations = title.relatedTitles|merge(title.titlesRelated) %}
{% if relations|length > 0 %}
<div>
Expand Down

0 comments on commit 5075038

Please sign in to comment.