Skip to content

Commit

Permalink
assing: fix author can_claim check
Browse files Browse the repository at this point in the history
  • Loading branch information
drjova committed Jun 14, 2024
1 parent 8604af5 commit 976b3bc
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 23 deletions.
51 changes: 29 additions & 22 deletions backend/inspirehep/assign/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#
# inspirehep is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

import structlog
from flask import request
from inspire_dojson.utils import get_recid_from_ref
Expand All @@ -14,6 +15,7 @@
from invenio_pidstore.models import PersistentIdentifier
from invenio_records.models import RecordMetadata
from sqlalchemy.orm.exc import NoResultFound
from unidecode import unidecode

from inspirehep.accounts.api import get_current_user_orcid
from inspirehep.records.api import AuthorsRecord
Expand Down Expand Up @@ -108,30 +110,35 @@ def can_claim(data, author_profile_recid):
if not lit_record:
return False

author_parsed_name = ParsedName.loads(current_author_profile["name"]["value"])
author_names = {
current_author_profile["name"]["value"],
author_parsed_name.last,
str(author_parsed_name), # removes ',' and puts it in normal order
}
author_names.update(
[
author_name.split(",")[0]
for author_name in get_value(
current_author_profile, "name.name_variants", []
)
]
)
def get_last_names(name):
parsed_name = ParsedName.loads(name)
# corner case for single name (ie. "Smith")
if len(parsed_name) == 1:
return {unidecode(parsed_name.first)}
# corner case for full names without comma,
# we are treating them as last names (ie. "Smith Davis")
if "," not in name:
names = name.split()
else:
names = parsed_name.last_list

last_names = set()
for last_name in names:
last_name = unidecode(last_name)
last_names.add(last_name)
return last_names

author_last_names = set()
author_last_names.update(get_last_names(current_author_profile["name"]["value"]))
for variant in get_value(current_author_profile, "name.name_variants", []):
author_last_names.update(get_last_names(variant))

lit_author = get_author_by_recid(lit_record, int(author_profile_recid))
lit_author_parsed_name = ParsedName.loads(lit_author.get("full_name", ""))
lit_author_names = {
lit_author.get("full_name", ""),
lit_author_parsed_name.last,
str(lit_author_parsed_name),
}
lit_author_last_names = set()
if lit_author:
lit_author_last_names.update(get_last_names(lit_author.get("full_name", "")))

return lit_author_names & author_names
return bool(author_last_names & lit_author_last_names)


def _check_names_compability(lit_record, author_parsed_name, last_names_only=False):
Expand All @@ -141,7 +148,7 @@ def _check_names_compability(lit_record, author_parsed_name, last_names_only=Fal
author_name_to_compare = (
author_parsed_name.last
if last_names_only
else f"{ author_parsed_name.last}, {author_parsed_name.first}".strip(", ")
else f"{author_parsed_name.last}, {author_parsed_name.first}".strip(", ")
)
matched_authors_recids = [
recid
Expand Down
2 changes: 1 addition & 1 deletion backend/inspirehep/assign/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def assign_different_profile(args):

for literature_id in literature_ids:
record = LiteratureRecord.get_record_by_pid_value(literature_id)
if record.get("curated") and not is_from_author_stub:
if record.get("curated_relation") and not is_from_author_stub:
literature_ids_already_claimed.append(literature_id)
if not can_claim(record, from_author_recid):
literature_ids_not_compatible_name.append(literature_id)
Expand Down
50 changes: 50 additions & 0 deletions backend/tests/unit/assign/test_assign_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2019 CERN.
#
# inspirehep is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

import pytest
from mock import patch

from inspirehep.assign.utils import can_claim


@pytest.mark.parametrize(
"authors_full_name, profile_name, name_variants, expected",
[
("Smith, John", "Smith, J.", [], True),
("Smith, J.", "Smith, John", [], True),
("Smith Davis, J.", "Smith, Robert", [], True),
("Davis, J.", "Smith Davis, P.", [], True),
("Smith, J.", "Davis, Smith", [], False),
("Smïth, J.", "Smith, J.", [], True),
("Smith Davis", "Smith, J.", [], True),
("Smith, J.", "Smith", [], True),
],
)
@patch("inspirehep.assign.utils._get_current_user_author_profile")
@patch("inspirehep.assign.utils._get_lit_record_from_db")
@patch("inspirehep.assign.utils.get_author_by_recid")
def test_can_claim(
mock_get_author_by_recid,
mock_get_lit_record_from_db,
mock_get_current_user_author_profile,
authors_full_name,
profile_name,
name_variants,
expected,
):
mock_get_current_user_author_profile.return_value = {
"name": {"value": profile_name},
"name_variants": name_variants,
}
mock_get_lit_record_from_db.return_value = {"control_number": 123}
mock_get_author_by_recid.return_value = {"full_name": authors_full_name}

data = {"control_number": 123}
author_profile_recid = 1

result = can_claim(data, author_profile_recid)
assert result == expected

0 comments on commit 976b3bc

Please sign in to comment.