From 38400478d9910d70e3065ad9ab7445ec2d8ea4c2 Mon Sep 17 00:00:00 2001 From: DonHaul Date: Fri, 22 Nov 2024 17:01:06 +0100 Subject: [PATCH] hep: add backend for data and software * ref: cern-sis/issues-inspire/issues/611 --- backend/data/records/data/1170584.json | 12 ++++ backend/data/records/data/1177718.json | 12 ++++ backend/data/records/data/1177719.json | 12 ++++ backend/data/records/data/1177720.json | 12 ++++ backend/data/records/data/1177724.json | 12 ++++ backend/inspirehep/config.py | 5 +- backend/inspirehep/records/api/data.py | 19 +----- backend/inspirehep/records/config.py | 33 +++++++++- .../inspirehep/records/marshmallow/data/ui.py | 19 ++++++ .../records/serializers/json/data.py | 49 ++++++++++++++ backend/inspirehep/search/facets.py | 6 ++ backend/inspirehep/sitemap/collections.py | 6 ++ backend/inspirehep/submissions/loaders.py | 2 + .../submissions/marshmallow/__init__.py | 1 + .../submissions/marshmallow/data.py | 26 ++++++++ backend/inspirehep/submissions/views.py | 19 ++++++ .../minters/test_minters_control_number.py | 2 +- .../records/views/test_views_data.py | 38 ++++++++++- .../submissions/test_submissions_views.py | 65 +++++++++++++++++++ 19 files changed, 325 insertions(+), 25 deletions(-) create mode 100644 backend/data/records/data/1170584.json create mode 100644 backend/data/records/data/1177718.json create mode 100644 backend/data/records/data/1177719.json create mode 100644 backend/data/records/data/1177720.json create mode 100644 backend/data/records/data/1177724.json create mode 100644 backend/inspirehep/records/marshmallow/data/ui.py create mode 100644 backend/inspirehep/records/serializers/json/data.py create mode 100644 backend/inspirehep/submissions/marshmallow/data.py diff --git a/backend/data/records/data/1170584.json b/backend/data/records/data/1170584.json new file mode 100644 index 0000000000..4e936d08b5 --- /dev/null +++ b/backend/data/records/data/1170584.json @@ -0,0 +1,12 @@ +{ + "$schema":"https://inspirehep.net/schemas/records/data.json", + "_collections":[ + "Data" + ], + "control_number":1170584, + "deleted":false, + "legacy_version":"20120831055015.0", + "self":{ + "$ref":"https://inspirehep.net/api/data/1170584" + } +} diff --git a/backend/data/records/data/1177718.json b/backend/data/records/data/1177718.json new file mode 100644 index 0000000000..5a95b1a7bc --- /dev/null +++ b/backend/data/records/data/1177718.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://inspirehep.net/schemas/records/data.json", + "_collections": [ + "Data" + ], + "control_number": 1177718, + "deleted": false, + "legacy_version": "20120831060601.0", + "self": { + "$ref": "https://inspirehep.net/api/data/1177718" + } +} diff --git a/backend/data/records/data/1177719.json b/backend/data/records/data/1177719.json new file mode 100644 index 0000000000..9be75926a9 --- /dev/null +++ b/backend/data/records/data/1177719.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://inspirehep.net/schemas/records/data.json", + "_collections": [ + "Data" + ], + "control_number": 1177719, + "deleted": false, + "legacy_version": "20120831060602.0", + "self": { + "$ref": "https://inspirehep.net/api/data/1177719" + } +} diff --git a/backend/data/records/data/1177720.json b/backend/data/records/data/1177720.json new file mode 100644 index 0000000000..163bd8f279 --- /dev/null +++ b/backend/data/records/data/1177720.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://inspirehep.net/schemas/records/data.json", + "_collections": [ + "Data" + ], + "control_number": 1177720, + "deleted": false, + "legacy_version": "20120831060602.0", + "self": { + "$ref": "https://inspirehep.net/api/data/1177720" + } +} diff --git a/backend/data/records/data/1177724.json b/backend/data/records/data/1177724.json new file mode 100644 index 0000000000..bd6eb12f3b --- /dev/null +++ b/backend/data/records/data/1177724.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://inspirehep.net/schemas/records/data.json", + "_collections": [ + "Data" + ], + "control_number": 1177724, + "deleted": false, + "legacy_version": "20120831060602.0", + "self": { + "$ref": "https://inspirehep.net/api/data/1177724" + } +} diff --git a/backend/inspirehep/config.py b/backend/inspirehep/config.py index bd2b11d3c5..307fe7d363 100644 --- a/backend/inspirehep/config.py +++ b/backend/inspirehep/config.py @@ -47,8 +47,8 @@ # Web services and APIs # ===================== -AUTHENTICATION_TOKEN = "CHANGE_ME" -INSPIRE_NEXT_URL = "http://localhost:5000" +AUTHENTICATION_TOKEN = "cZiS4W7K8sqyebkxQzpnSwuUKLr5Ne6qPfnoOAjP7M2IvHxQhKmwiCJpp2QC" +INSPIRE_NEXT_URL = "https://inspirebeta.net" LEGACY_BASE_URL = "https://old.inspirehep.net" LEGACY_RECORD_URL_PATTERN = "http://inspirehep.net/record/{recid}" INSPIRE_BACKOFFICE_URL = "https://backoffice.dev.inspirebeta.net" @@ -99,6 +99,7 @@ "Jobs": "jobs", "Institutions": "institutions", "Experiments": "experiments", + "Data": "data", } NON_PRIVATE_LITERATURE_COLLECTIONS = { diff --git a/backend/inspirehep/records/api/data.py b/backend/inspirehep/records/api/data.py index 84289f208a..313843c11b 100644 --- a/backend/inspirehep/records/api/data.py +++ b/backend/inspirehep/records/api/data.py @@ -16,22 +16,5 @@ class DataRecord(CitationMixin, InspireRecord): pid_type = "dat" pidstore_handler = PidStoreData - @classmethod - def create( - cls, - data, - *args, - **kwargs, - ): - record = super().create(data, **kwargs) - record.update_refs_in_citation_table() - return record - - def update( - self, - data, - *args, - **kwargs, - ): - super().update(data) + def update_record_relationships(self): self.update_refs_in_citation_table() diff --git a/backend/inspirehep/records/config.py b/backend/inspirehep/records/config.py index 1f5f919540..203103e47f 100644 --- a/backend/inspirehep/records/config.py +++ b/backend/inspirehep/records/config.py @@ -58,6 +58,7 @@ must_match_all_or_missing_filter, range_author_count_filter, records_conferences, + records_data, records_experiments, records_hep, records_hep_cataloger, @@ -503,6 +504,22 @@ def default_handler(error): "search_factory_imp": ( "inspirehep.search.factories.search:search_factory_with_aggs" ), + "create_permission_factory_imp": SessionSuperuserPermission, + "update_permission_factory_imp": SessionCatalogerPermission, + } +) + +DATA_FACETS = deepcopy(DATA) +DATA_FACETS.update( + { + "default_endpoint_prefix": False, + "search_factory_imp": ( + "inspirehep.search.factories.search:search_factory_only_with_aggs" + ), + "list_route": "/data/facets/", + "search_serializers": { + "application/json": f"{INSPIRE_SERIALIZERS}:facets_json_response_search" + }, } ) @@ -608,6 +625,7 @@ def default_handler(error): "conferences": CONFERENCES, "conferences_facets": CONFERENCES_FACETS, "data": DATA, + "data_facets": DATA_FACETS, "institutions": INSTITUTIONS, "seminars": SEMINARS, "seminars_facets": SEMINARS_FACETS, @@ -632,6 +650,7 @@ def default_handler(error): "subject": must_match_all_or_missing_filter("facet_inspire_categories", "Unknown"), "arxiv_categories": must_match_all_filter("facet_arxiv_categories"), "experiments": must_match_all_filter("facet_experiment"), + "data:": must_match_all_filter("facet_data"), "rpp": filter_from_filters_aggregation(hep_rpp(order=1)), } @@ -655,7 +674,9 @@ def default_handler(error): "accessibility": accessibility_filter(), } -EXPERIMENTS_FILTERS = { +EXPERIMENTS_FILTERS = {} + +DATA_FILTERS = { "experiments": must_match_all_filter("facet_inspire_classification"), "institution": must_match_all_filter("institutions.value"), } @@ -672,6 +693,7 @@ def default_handler(error): "records-conferences": records_conferences, "records-seminars": records_seminars, "records-experiments": records_experiments, + "records-data": records_data, "hep-experiment-papers": hep_experiment_papers, } CATALOGER_RECORDS_REST_FACETS = deepcopy(RECORDS_REST_FACETS) @@ -726,6 +748,15 @@ def default_handler(error): "order": 2, }, }, + "records-data": { + "bestmatch": {"title": "Best match", "fields": ["-_score"], "order": 1}, + "mostrecent": { + "title": "Most Recent", + "fields": ["legacy_name"], + "default_order": "asc", + "order": 2, + }, + }, "records-conferences": { "dateasc": {"title": "Date ascending", "fields": ["opening_date"], "order": 1}, "datedesc": { diff --git a/backend/inspirehep/records/marshmallow/data/ui.py b/backend/inspirehep/records/marshmallow/data/ui.py new file mode 100644 index 0000000000..0caceb2e8a --- /dev/null +++ b/backend/inspirehep/records/marshmallow/data/ui.py @@ -0,0 +1,19 @@ +# +# Copyright (C) 2019 CERN. +# +# inspirehep is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +from inspirehep.records.marshmallow.data.base import DataPublicSchema + + +class DataBaseSchema(DataPublicSchema): + pass + + +class DataDetailSchema(DataBaseSchema): + pass + + +class DataListSchema(DataBaseSchema): + pass diff --git a/backend/inspirehep/records/serializers/json/data.py b/backend/inspirehep/records/serializers/json/data.py new file mode 100644 index 0000000000..8818fce64e --- /dev/null +++ b/backend/inspirehep/records/serializers/json/data.py @@ -0,0 +1,49 @@ +# +# Copyright (C) 2019 CERN. +# +# inspirehep is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +from inspirehep.accounts.api import is_superuser_or_cataloger_logged_in +from inspirehep.records.marshmallow.base import wrap_schema_class_with_metadata +from inspirehep.records.marshmallow.data.base import ( + DataAdminSchema, + DataPublicSchema, +) +from inspirehep.records.marshmallow.data.ui import ( + DataDetailSchema, + DataListSchema, +) +from inspirehep.serializers import ( + ConditionalMultiSchemaJSONSerializer, + JSONSerializer, + record_responsify, + search_responsify, +) + +data_json = ConditionalMultiSchemaJSONSerializer( + [ + ( + lambda _: is_superuser_or_cataloger_logged_in(), + wrap_schema_class_with_metadata(DataAdminSchema), + ), + (None, wrap_schema_class_with_metadata(DataPublicSchema)), + ] +) + +data_json_response = record_responsify(data_json, "application/json") + +data_json_response_search = search_responsify(data_json, "application/json") + +data_json_detail = JSONSerializer(wrap_schema_class_with_metadata(DataDetailSchema)) +data_json_detail_response = record_responsify( + data_json_detail, "application/vnd+inspire.record.ui+json" +) + +data_json_list = JSONSerializer( + wrap_schema_class_with_metadata(DataListSchema), + index_name="records-data", +) +data_json_list_response = search_responsify( + data_json_list, "application/vnd+inspire.record.ui+json" +) diff --git a/backend/inspirehep/search/facets.py b/backend/inspirehep/search/facets.py index 8d26091e8c..85d06984b4 100644 --- a/backend/inspirehep/search/facets.py +++ b/backend/inspirehep/search/facets.py @@ -641,3 +641,9 @@ def records_experiments(order=None): **experiment_institution_aggregation(order=next(order)), }, } + + +def records_data(order=None): + if order is None: + order = count(start=1) + return {"filters": {**current_app.config["DATA_FILTERS"]}, "aggs": {}} diff --git a/backend/inspirehep/sitemap/collections.py b/backend/inspirehep/sitemap/collections.py index 515d9ff4af..b2f5985a5d 100644 --- a/backend/inspirehep/sitemap/collections.py +++ b/backend/inspirehep/sitemap/collections.py @@ -9,6 +9,7 @@ from inspirehep.search.api import ( AuthorsSearch, ConferencesSearch, + DataSearch, ExperimentsSearch, InstitutionsSearch, JobsSearch, @@ -42,6 +43,10 @@ def experiments(): return ExperimentsSearch() +def data(): + return DataSearch() + + def institutions(): return InstitutionsSearch() @@ -54,5 +59,6 @@ def get_indexable_record_searches(): conferences(), seminars(), experiments(), + data(), institutions(), ] diff --git a/backend/inspirehep/submissions/loaders.py b/backend/inspirehep/submissions/loaders.py index 8a15322df4..d3c9d84374 100644 --- a/backend/inspirehep/submissions/loaders.py +++ b/backend/inspirehep/submissions/loaders.py @@ -13,6 +13,7 @@ from inspirehep.submissions.marshmallow import ( Author, Conference, + Data, Experiment, Institution, Job, @@ -54,5 +55,6 @@ def json_loader(): literature_v1 = inspire_submission_marshmallow_loader(Literature) seminar_v1 = inspire_submission_marshmallow_loader(Seminar) experiment_v1 = inspire_submission_marshmallow_loader(Experiment) +data_v1 = inspire_submission_marshmallow_loader(Data) institution_v1 = inspire_submission_marshmallow_loader(Institution) journal_v1 = inspire_submission_marshmallow_loader(Journal) diff --git a/backend/inspirehep/submissions/marshmallow/__init__.py b/backend/inspirehep/submissions/marshmallow/__init__.py index eed21440da..bdb162fd1c 100644 --- a/backend/inspirehep/submissions/marshmallow/__init__.py +++ b/backend/inspirehep/submissions/marshmallow/__init__.py @@ -11,6 +11,7 @@ from .author import Author from .conferences import Conference from .experiments import Experiment +from .data import Data from .institutions import Institution from .job import Job from .literature import Literature diff --git a/backend/inspirehep/submissions/marshmallow/data.py b/backend/inspirehep/submissions/marshmallow/data.py new file mode 100644 index 0000000000..a75706001e --- /dev/null +++ b/backend/inspirehep/submissions/marshmallow/data.py @@ -0,0 +1,26 @@ +from flask import url_for +from inspire_utils.record import get_value +from marshmallow import Schema, fields, post_load + +from inspirehep.submissions.errors import InvalidDataError + + +class Data(Schema): + legacy_name = fields.Raw() + project_type = fields.Raw() + + @post_load + def build_data(self, data): + if get_value(data, "legacy_name") and get_value(data, "data_type"): + return { + "_collections": ["Data"], + "$schema": url_for( + "invenio_jsonschemas.get_schema", + schema_path="records/data.json", + _external=True, + ), + "legacy_name": get_value(data, "legacy_name"), + "project_type": get_value(data, "data_type"), + } + + raise InvalidDataError("Data is missing a value or values.") diff --git a/backend/inspirehep/submissions/views.py b/backend/inspirehep/submissions/views.py index ce1194e5e0..fdd34ffe3b 100644 --- a/backend/inspirehep/submissions/views.py +++ b/backend/inspirehep/submissions/views.py @@ -32,6 +32,7 @@ from inspirehep.records.api import ( AuthorsRecord, ConferencesRecord, + DataRecord, ExperimentsRecord, InstitutionsRecord, JobsRecord, @@ -323,6 +324,21 @@ def post(self): return (jsonify({"control_number": record["control_number"]}), 201) +class DataSubmissionsResource(BaseSubmissionsResource): + decorators = [login_required_with_roles([Roles.cataloger.value])] + + def load_data_from_request(self): + return experiment_loader_v1() + + def post(self): + """Adds new data record""" + + data = self.load_data_from_request() + record = DataRecord(data=data).create(data) + db.session.commit() + return (jsonify({"control_number": record["control_number"]}), 201) + + class SeminarSubmissionsResource(BaseSubmissionsResource): def post(self): """Adds new conference record""" @@ -661,6 +677,9 @@ def post(self): ) blueprint.add_url_rule("/experiments", view_func=experiment_submission_view) +data_submission_view = DataSubmissionsResource.as_view("data_submission_view") +blueprint.add_url_rule("/data", view_func=data_submission_view) + conference_submission_view = ConferenceSubmissionsResource.as_view( "conference_submissions_view" ) diff --git a/backend/tests/integration/pidstore/minters/test_minters_control_number.py b/backend/tests/integration/pidstore/minters/test_minters_control_number.py index 0d521dc4db..fd700e456f 100644 --- a/backend/tests/integration/pidstore/minters/test_minters_control_number.py +++ b/backend/tests/integration/pidstore/minters/test_minters_control_number.py @@ -221,6 +221,7 @@ def test_control_number_data_with_control_number(inspire_app): data = record.json DataMinter.mint(record.id, data) + expected_pid_value = str(data["control_number"]) expected_pid_type = "dat" expected_pid_object_uuid = record.id @@ -237,7 +238,6 @@ def test_control_number_data_without_control_number(inspire_app): data = record.json DataMinter.mint(record.id, data) - expected_pid_value = str(data["control_number"]) expected_pid_type = "dat" expected_pid_object_uuid = record.id diff --git a/backend/tests/integration/records/views/test_views_data.py b/backend/tests/integration/records/views/test_views_data.py index 9d6881a41b..81a91b9ddf 100644 --- a/backend/tests/integration/records/views/test_views_data.py +++ b/backend/tests/integration/records/views/test_views_data.py @@ -3,7 +3,8 @@ # # inspirehep is free software; you can redistribute it and/or modify it under # the terms of the MIT License; see LICENSE file for more details. -from helpers.utils import create_record, create_record_factory +from helpers.providers.faker import faker +from helpers.utils import create_record, create_record_factory, create_user_and_token def test_data_application_json_get(inspire_app): @@ -18,7 +19,24 @@ def test_data_application_json_get(inspire_app): assert expected_status_code == response_status_code -def test_data_application_json_put(inspire_app): +def test_data_application_json_put_with_token(inspire_app): + record = create_record("dat") + record_control_number = record["control_number"] + token = create_user_and_token() + + expected_status_code = 200 + + headers = {"Authorization": "BEARER " + token.access_token, "If-Match": '"0"'} + with inspire_app.test_client() as client: + response = client.put( + f"/data/{record_control_number}", headers=headers, json=record + ) + response_status_code = response.status_code + + assert expected_status_code == response_status_code + + +def test_data_application_json_put_without_token(inspire_app): record = create_record_factory("dat", with_indexing=True) record_control_number = record.json["control_number"] @@ -42,7 +60,21 @@ def test_data_application_json_delete(inspire_app): assert expected_status_code == response_status_code -def test_data_application_json_post(inspire_app): +def test_data_application_json_post_with_token(inspire_app): + expected_status_code = 201 + token = create_user_and_token() + + headers = {"Authorization": "BEARER " + token.access_token} + rec_data = faker.record("data") + + with inspire_app.test_client() as client: + response = client.post("/data", headers=headers, json=rec_data) + + response_status_code = response.status_code + assert expected_status_code == response_status_code + + +def test_data_application_json_post_without_token(inspire_app): expected_status_code = 401 with inspire_app.test_client() as client: response = client.post("/data") diff --git a/backend/tests/integration/submissions/test_submissions_views.py b/backend/tests/integration/submissions/test_submissions_views.py index ad8847f62d..333ad81f0e 100644 --- a/backend/tests/integration/submissions/test_submissions_views.py +++ b/backend/tests/integration/submissions/test_submissions_views.py @@ -24,6 +24,7 @@ from inspirehep.records.api import ( AuthorsRecord, ConferencesRecord, + DataRecord, ExperimentsRecord, InstitutionsRecord, JobsRecord, @@ -2241,6 +2242,70 @@ def test_new_experiment_submission_with_no_cataloger_role(inspire_app): assert response.status_code == 403 +REQUIRED_DATA_RECORD_DATA = { + "$schema": "http://localhost:8080/schemas/records/data.json", + "legacy_name": "TileCal17", + "_collections": ["Data"], + "project_type": ["data_type1"], +} +DATA_FORM_DATA = {"legacy_name": "TileCal17", "project_type": ["data_type1"]} + + +@pytest.mark.parametrize( + ("form_data", "expected_record_data"), + [(deepcopy(DATA_FORM_DATA), REQUIRED_DATA_RECORD_DATA)], +) +def test_new_data_submission(form_data, expected_record_data, inspire_app): + user = create_user(role=Roles.cataloger.value) + with inspire_app.test_client() as client: + login_user_via_session(client, email=user.email) + response = client.post( + "/submissions/data", + content_type="application/json", + data=orjson.dumps({"data": form_data}), + ) + assert response.status_code == 201 + + payload = orjson.loads(response.data) + data_id = payload["control_number"] + data_record = DataRecord.get_record_by_pid_value(data_id) + data_record_data = { + key: value + for (key, value) in data_record.items() + if key in expected_record_data + } + assert data_record_data == expected_record_data + + +def test_new_data_submission_with_empty_data( + inspire_app, +): + form_data = {} + user = create_user(role=Roles.cataloger.value) + with inspire_app.test_client() as client: + login_user_via_session(client, email=user.email) + response = client.post( + "/submissions/data", + content_type="application/json", + data=orjson.dumps({"data": form_data}), + ) + assert response.status_code == 400 + assert response.json["message"][0] == "Data is missing a value or values." + + +def test_new_data_submission_with_no_cataloger_role(inspire_app): + form_data = {} + user = create_user() + with inspire_app.test_client() as client: + login_user_via_session(client, email=user.email) + response = client.post( + "/submissions/data", + content_type="application/json", + data=orjson.dumps({"data": form_data}), + ) + assert response.status_code == 403 + + REQUIRED_INSTITUTION_RECORD_DATA = { "$schema": "http://localhost:5000/schemas/records/institutions.json", "_collections": ["Institutions"],