diff --git a/indexd/index/blueprint.py b/indexd/index/blueprint.py index 6a009182..eb2f2323 100644 --- a/indexd/index/blueprint.py +++ b/indexd/index/blueprint.py @@ -184,7 +184,8 @@ def post_index_record(): ''' Create a new record. ''' - try: jsonschema.validate(flask.request.json, POST_RECORD_SCHEMA) + try: + jsonschema.validate(flask.request.json, POST_RECORD_SCHEMA) except jsonschema.ValidationError as err: raise UserError(err) @@ -197,6 +198,7 @@ def post_index_record(): hashes = flask.request.json['hashes'] file_name = flask.request.json.get('file_name') metadata = flask.request.json.get('metadata') + urls_metadata = flask.request.json.get('urls_metadata') version = flask.request.json.get('version') baseid = flask.request.json.get('baseid') @@ -206,6 +208,7 @@ def post_index_record(): size=size, file_name=file_name, metadata=metadata, + urls_metadata=urls_metadata, version=version, urls=urls, acl=acl, @@ -239,6 +242,7 @@ def put_index_record(record): urls = flask.request.json.get('urls') acl = flask.request.json.get('acl') metadata = flask.request.json.get('metadata') + urls_metadata = flask.request.json.get('urls_metadata') did, baseid, rev = blueprint.index_driver.update( record, @@ -248,6 +252,7 @@ def put_index_record(record): urls=urls, acl=acl, metadata=metadata, + urls_metadata=urls_metadata, ) ret = { @@ -291,9 +296,10 @@ def add_index_record_version(record): urls = flask.request.json['urls'] acl = flask.request.json.get('acl', []) hashes = flask.request.json['hashes'] - file_name = flask.request.json.get('file_name', None) - metadata = flask.request.json.get('metadata', None) - version = flask.request.json.get('version', None) + file_name = flask.request.json.get('file_name') + metadata = flask.request.json.get('metadata') + urls_metadata = flask.request.json.get('urls_metadata') + version = flask.request.json.get('version') did, baseid, rev = blueprint.index_driver.add_version( record, @@ -304,6 +310,7 @@ def add_index_record_version(record): acl=acl, file_name=file_name, metadata=metadata, + urls_metadata=urls_metadata, version=version, hashes=hashes, ) diff --git a/indexd/index/driver.py b/indexd/index/driver.py index 6bc5425d..0c38540b 100644 --- a/indexd/index/driver.py +++ b/indexd/index/driver.py @@ -37,7 +37,10 @@ def hashes_to_urls(self, size, hashes, start=0, limit=100): raise NotImplementedError('TODO') @abc.abstractmethod - def add(self, form, did=None, size=None, urls=None, hashes=None, file_name=None, metadata=None, version=None): + def add( + self, form, did=None, size=None, urls=None, + hashes=None, file_name=None, metadata=None, + urls_metadata=None, version=None): ''' Creates record for given data. ''' @@ -53,7 +56,7 @@ def get(self, did): @abc.abstractmethod def update(self, did, rev, urls=None, file_name=None, - version=None, metadata=None): + urls_metadata=None, version=None, metadata=None): ''' Updates record with new values. ''' @@ -69,7 +72,8 @@ def delete(self, did, rev): @abc.abstractmethod def add_version( self, did, form, size=None, - file_name=None, metadata=None, urls=None, hashes=None, version=None): + file_name=None, metadata=None, urls=None, + urls_metadata=None, hashes=None, version=None): ''' Add a record version given did ''' diff --git a/indexd/index/drivers/alchemy.py b/indexd/index/drivers/alchemy.py index f6579940..c8dc7442 100644 --- a/indexd/index/drivers/alchemy.py +++ b/indexd/index/drivers/alchemy.py @@ -1,5 +1,6 @@ -import uuid import datetime +from future.utils import iteritems +import uuid from cdislogging import get_logger from contextlib import contextmanager @@ -82,6 +83,37 @@ class IndexRecord(Base): cascade='all, delete-orphan', ) + def to_document_dict(self): + """ + Get the full index document + """ + urls = [u.url for u in self.urls] + acl = [u.ace for u in self.acl] + hashes = {h.hash_type: h.hash_value for h in self.hashes} + metadata = {m.key: m.value for m in self.index_metadata} + + urls_metadata = { + u.url: {m.key: m.value for m in u.url_metadata} for u in self.urls} + created_date = self.created_date.isoformat() + updated_date = self.updated_date.isoformat() + + return { + 'did': self.did, + 'baseid': self.baseid, + 'rev': self.rev, + 'size': self.size, + 'file_name': self.file_name, + 'version': self.version, + 'urls': urls, + 'urls_metadata': urls_metadata, + 'acl': acl, + 'hashes': hashes, + 'metadata': metadata, + 'form': self.form, + 'created_date': created_date, + "updated_date": updated_date, + } + class IndexRecordUrl(Base): """ @@ -104,9 +136,9 @@ class IndexRecordUrl(Base): class IndexRecordACE(Base): - ''' + """ index record access control entry representation. - ''' + """ __tablename__ = 'index_record_ace' @@ -121,7 +153,7 @@ class IndexRecordACE(Base): class IndexRecordMetadata(Base): """ - Table to track current database's schema version + Metadata attached to index document """ __tablename__ = 'index_record_metadata' @@ -134,7 +166,7 @@ class IndexRecordMetadata(Base): class IndexRecordUrlMetadata(Base): """ - Table to track current database's schema version + Metadata attached to url """ __tablename__ = 'index_record_url_metadata' @@ -164,6 +196,20 @@ class IndexRecordHash(Base): ) +def create_urls_metadata(urls_metadata, record, session): + """ + create url metadata record in database + """ + urls = {u.url for u in record.urls} + for url, url_metadata in iteritems(urls_metadata): + if url not in urls: + raise UserError( + 'url {} in urls_metadata does not exist'.format(url)) + for k, v in iteritems(url_metadata): + session.add(IndexRecordUrlMetadata( + url=url, key=k, value=v, did=record.did)) + + class SQLAlchemyIndexDriver(IndexDriverABC): """ SQLAlchemy implementation of index driver. @@ -315,13 +361,15 @@ def add(self, size=None, file_name=None, metadata=None, + urls_metadata=None, version=None, urls=None, acl=None, hashes=None, baseid=None): """ - Creates a new record given size, urls, acl, hashes, metadata, file name and version + Creates a new record given size, urls, acl, hashes, metadata, + urls_metadata file name and version if did is provided, update the new record with the did otherwise create it """ @@ -334,6 +382,8 @@ def add(self, if metadata is None: metadata = {} + if urls_metadata is None: + urls_metadata = {} with self.session as session: record = IndexRecord() @@ -374,11 +424,11 @@ def add(self, key=m_key, value=m_value ) for m_key, m_value in metadata.items()] - session.merge(base_version) try: session.add(record) + create_urls_metadata(urls_metadata, record, session) session.commit() except IntegrityError: raise UserError('did "{did}" already exists'.format(did=record.did), 400) @@ -403,45 +453,11 @@ def get(self, did): raise NoRecordFound('no record found') except MultipleResultsFound: raise MultipleRecordsFound('multiple records found') - did = record.did - baseid = record.baseid - rev = record.rev - - form = record.form - size = record.size - - file_name = record.file_name - version = record.version - - urls = [u.url for u in record.urls] - acl = [u.ace for u in record.acl] - hashes = {h.hash_type: h.hash_value for h in record.hashes} - metadata = {m.key: m.value for m in record.index_metadata} - - created_date = record.created_date.isoformat() - updated_date = record.updated_date.isoformat() - - ret = { - 'did': did, - 'baseid': baseid, - 'rev': rev, - 'size': size, - 'file_name': file_name, - 'version': version, - 'urls': urls, - 'acl': acl, - 'hashes': hashes, - 'metadata': metadata, - 'form': form, - 'created_date': created_date, - "updated_date": updated_date, - } - - return ret + return record.to_document_dict() def update(self, did, rev, urls=None, acl=None, file_name=None, - version=None, metadata=None): + version=None, metadata=None, urls_metadata=None): """ Updates an existing record with new values. """ @@ -487,6 +503,13 @@ def update(self, value=m_value ) for m_key, m_value in metadata.items()] + if urls_metadata is not None: + for url in record.urls: + for url_metadata in url.url_metadata: + session.delete(url_metadata) + + create_urls_metadata(urls_metadata, record, session) + if file_name is not None: record.file_name = file_name @@ -526,6 +549,7 @@ def add_version(self, size=None, file_name=None, metadata=None, + urls_metadata=None, version=None, urls=None, acl=None, @@ -541,6 +565,8 @@ def add_version(self, hashes = {} if metadata is None: metadata = {} + if urls_metadata is None: + urls_metadata = {} with self.session as session: query = session.query(IndexRecord).filter_by(did=current_did) @@ -587,6 +613,7 @@ def add_version(self, try: session.add(record) + create_urls_metadata(urls_metadata, record, session) session.commit() except IntegrityError: raise UserError('{did} already exists'.format(did=did), 400) @@ -618,37 +645,8 @@ def get_all_versions(self, did): records = query.filter(IndexRecord.baseid == baseid).all() for idx, record in enumerate(records): - rev = record.rev - did = record.did - baseid = record.baseid - form = record.form - - size = record.size - file_name = record.file_name - version = record.version - urls = [u.url for u in record.urls] - acl = [u.ace for u in record.acl] - hashes = {h.hash_type: h.hash_value for h in record.hashes} - metadata = {m.key: m.value for m in record.index_metadata} - - created_date = record.created_date.isoformat() - updated_date = record.updated_date.isoformat() - - ret[idx] = { - 'did': did, - 'baseid': baseid, - 'rev': rev, - 'size': size, - 'file_name': file_name, - 'metadata': metadata, - 'version': version, - 'urls': urls, - 'acl': acl, - 'hashes': hashes, - 'form': form, - 'created_date': created_date, - 'updated_date': updated_date, - } + + ret[idx] = record.to_document_dict() return ret @@ -676,42 +674,7 @@ def get_latest_version(self, did): if (not record): raise NoRecordFound('no record found') - - rev = record.rev - did = record.did - baseid = record.baseid - - form = record.form - size = record.size - file_name = record.file_name - - metadata = {m.key: m.value for m in record.index_metadata} - version = record.version - - urls = [u.url for u in record.urls] - acl = [u.ace for u in record.acl] - hashes = {h.hash_type: h.hash_value for h in record.hashes} - - created_date = record.created_date.isoformat() - updated_date = record.updated_date.isoformat() - - ret = { - 'did': did, - 'baseid': baseid, - 'rev': rev, - 'size': size, - 'file_name': file_name, - 'metadata': metadata, - 'version': version, - 'urls': urls, - 'acl': acl, - 'hashes': hashes, - 'form': form, - 'created_date': created_date, - 'updated_date': updated_date, - } - - return ret + return record.to_document_dict() def health_check(self): """ diff --git a/indexd/index/schema.py b/indexd/index/schema.py index d53c5f9f..dac52fd3 100644 --- a/indexd/index/schema.py +++ b/indexd/index/schema.py @@ -34,6 +34,10 @@ "description": "optional metadata of the object", "type": "object" }, + "urls_metadata": { + "description": "optional urls metadata of the object", + "type": "object", + }, "version": { "description": "optional version string of the object", "type": "string", @@ -145,6 +149,9 @@ "metadata": { "type": "object" }, + "urls_metadata": { + "type": "object" + }, "rev": { "type": "string", "pattern": "^[0-9a-f]{8}$", diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index 2f342a27..5658e673 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -549,7 +549,9 @@ definitions: file_name: type: string metadata: - type: string + type: object + urls_metadata: + type: object version: type: string urls: @@ -588,7 +590,9 @@ definitions: file_name: type: string metadata: - type: string + type: object + urls_metadata: + type: object version: type: string urls: @@ -612,6 +616,8 @@ definitions: type: string metadata: type: object + urls_metadata: + type: object version: type: string urls: @@ -747,6 +753,9 @@ definitions: metadata: type: object description: user defined optional metadata + urls_metadata: + type: object + description: url level optional metadata SystemVersionOutputRef: type: object properties: diff --git a/setup.py b/setup.py index ebdef83c..0daa977a 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,8 @@ 'jsonschema==2.5.1', 'sqlalchemy==1.0.8', 'sqlalchemy-utils>=0.32.21', - 'psycopg2>=2.7', + 'psycopg2>=2.7', + 'future>=0.16.0,<1.0.0', 'cdislogging', 'indexclient', 'doiclient', diff --git a/tests/test_client.py b/tests/test_client.py index 395e7c13..f73166fb 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -6,7 +6,7 @@ from indexd.index.blueprint import ACCEPTABLE_HASHES -def get_doc(has_metadata=True, has_baseid=False): +def get_doc(has_metadata=True, has_baseid=False, has_urls_metadata=False): doc = { 'form': 'object', 'size': 123, @@ -17,6 +17,9 @@ def get_doc(has_metadata=True, has_baseid=False): doc['metadata'] = {'project_id': 'bpa-UChicago'} if has_baseid: doc['baseid'] = 'e044a62c-fd60-4203-b1e5-a62d1005f027' + if has_urls_metadata: + doc['urls_metadata'] = { + 's3://endpointurl/bucket/key': {'file_state': 'uploaded'}} return doc @@ -50,6 +53,20 @@ def test_index_list_with_params(client, user): assert r_2.json['did'] in r.json['ids'] +def test_urls_metadata(swg_index_client): + data = get_doc(has_urls_metadata=True) + result = swg_index_client.add_entry(data) + + doc = swg_index_client.get_entry(result.did) + assert doc.urls_metadata == data['urls_metadata'] + + updated = {'urls_metadata': {data['urls'][0]: {'test': 'b'}}} + swg_index_client.update_entry(doc.did, rev=doc.rev, body=updated) + + doc = swg_index_client.get_entry(result.did) + assert doc.urls_metadata == updated['urls_metadata'] + + def test_index_create(swg_index_client): data = get_doc(has_baseid=True)