From e0320122875c1cec2bef8d17a3aa3f869828d932 Mon Sep 17 00:00:00 2001 From: ian-hoyle Date: Tue, 11 Jun 2024 11:09:50 +0100 Subject: [PATCH] Tdrd 215 validate metadata using schema validation library (#73) use latest tdr-metadata-validation using schema validation --- project/Dependencies.scala | 4 +- .../draftmetadatavalidator/CSVHandler.scala | 14 ++++++ .../draftmetadatavalidator/Lambda.scala | 15 ++++--- src/test/resources/invalid-sample.csv | 4 +- .../CSVHandlerSpec.scala | 45 ++++++++++++++++++- .../draftmetadatavalidator/LambdaSpec.scala | 17 +++++-- 6 files changed, 86 insertions(+), 13 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index b5fb8c4..c6e338f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,4 +1,4 @@ -import sbt._ +import sbt.* object Dependencies { @@ -7,7 +7,7 @@ object Dependencies { lazy val scalaCsv = "com.github.tototoshi" %% "scala-csv" % "1.3.10" lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.2.15" - lazy val metadataValidation = "uk.gov.nationalarchives" %% "tdr-metadata-validation" % "0.0.21" + lazy val metadataValidation = "uk.gov.nationalarchives" %% "tdr-metadata-validation" % "0.0.27" lazy val generatedGraphql = "uk.gov.nationalarchives" %% "tdr-generated-graphql" % "0.0.372" lazy val graphqlClient = "uk.gov.nationalarchives" %% "tdr-graphql-client" % "0.0.144" lazy val authUtils = "uk.gov.nationalarchives" %% "tdr-auth-utils" % "0.0.187" diff --git a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala index 338b59d..932d7bd 100644 --- a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala +++ b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala @@ -23,6 +23,20 @@ class CSVHandler { FileData(allRowsWithHeader, fileRows) } + /** Reads a CSV file into a list of FileRows The FileRow.fileName is the identifier for the row and has been used to store the UUID in above loadCSV def (expecting the UUID to be + * in the last column). What the identifier to be used is to be decided FileRow metadata key(header) unaltered and the value maintained as a string + * @param filePath + * path to csv + * @return + * List of FileRows + */ + def loadCSV(filePath: String): List[FileRow] = { + val reader = CSVReader.open(filePath) + val all: Seq[Map[String, String]] = reader.allWithHeaders() + val fileRows = all.map(row => FileRow(row("UUID"), row.map(columnHeaderValue => Metadata(columnHeaderValue._1, columnHeaderValue._2)).toList)) + fileRows.toList + } + def writeCsv(rows: List[List[String]], filePath: String): Unit = { val bas = new ByteArrayOutputStream() val writer = CSVWriter.open(bas) diff --git a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala index 0225229..0ad9805 100644 --- a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala +++ b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala @@ -2,7 +2,7 @@ package uk.gov.nationalarchives.draftmetadatavalidator import cats.effect.IO import com.amazonaws.services.lambda.runtime.{Context, RequestHandler} -import com.amazonaws.services.lambda.runtime.events.{APIGatewayProxyResponseEvent} +import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyResponseEvent import graphql.codegen.GetCustomMetadata.customMetadata.CustomMetadata import graphql.codegen.GetCustomMetadata.{customMetadata => cm} import graphql.codegen.GetDisplayProperties.displayProperties.DisplayProperties @@ -24,7 +24,8 @@ import uk.gov.nationalarchives.draftmetadatavalidator.ApplicationConfig._ import uk.gov.nationalarchives.draftmetadatavalidator.Lambda.{DraftMetadata, getFilePath} import uk.gov.nationalarchives.tdr.GraphQLClient import uk.gov.nationalarchives.tdr.keycloak.{KeycloakUtils, TdrKeycloakDeployment} -import uk.gov.nationalarchives.tdr.validation.Metadata +import uk.gov.nationalarchives.tdr.validation.{FileRow, Metadata} +import uk.gov.nationalarchives.tdr.validation.schema.MetadataValidationJsonSchema import java.net.URI import java.sql.Timestamp @@ -76,12 +77,16 @@ class Lambda extends RequestHandler[java.util.Map[String, Object], APIGatewayPro for { customMetadata <- graphQlApi.getCustomMetadata(draftMetadata.consignmentId, clientSecret) displayProperties <- graphQlApi.getDisplayProperties(draftMetadata.consignmentId, clientSecret) - metadataValidator = MetadataValidationUtils.createMetadataValidation(customMetadata) result <- { val csvHandler = new CSVHandler() val filePath = getFilePath(draftMetadata) - val fileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata)) - val errors = metadataValidator.validateMetadata(fileData.fileRows) + // Loading CSV twice as validation and writing of CSV currently done using different style + // The important fact is the .fileName that is used to match errors to rows written. + // Currently using last column UUID. If it is decided to use the UUID the 'fileName' attribute + // should be renamed + val fileData: FileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata)) + val fileRows: List[FileRow] = csvHandler.loadCSV(filePath) + val errors = MetadataValidationJsonSchema.validate(fileRows) if (errors.values.exists(_.nonEmpty)) { val updatedFileRows = "Error" :: fileData.fileRows.map(file => { errors(file.fileName).map(p => s"${p.propertyName}: ${p.errorCode}").mkString(" | ") diff --git a/src/test/resources/invalid-sample.csv b/src/test/resources/invalid-sample.csv index bb1ea83..80ccb7a 100644 --- a/src/test/resources/invalid-sample.csv +++ b/src/test/resources/invalid-sample.csv @@ -1,4 +1,4 @@ Filename,Filepath,Date last modified,Closure status,Closure Start Date,Closure Period,FOI exemption code,FOI decision asserted,Is the title sensitive for the public?,Add alternative title without the file extension,Description,Is the description sensitive for the public?,Alternative description,Language,Date of the record,Translated title of record,Former reference,UUID -test3.txt,test/test3.txt,2024-03-26,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c -test1.txt,test/test1.txt,2024-03-26,Closed,1990-01-01,12,27(1)|27(2),1990-01-01,Yes,asd,hello,No,,English,,,,cbf2cba5-f1dc-45bd-ae6d-2b042336ce6c +test3.txt,test/test3.txt,12/2/2345,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c +test1.txt,test/test1.txt,2024-03-26,Closed,1990-01-01,12,78|27(1)|27(2),1990-01-01,Yes,asd,hello,No,,English,,,,cbf2cba5-f1dc-45bd-ae6d-2b042336ce6c test2.txt,test/test2.txt,2024-03-26,Open,,,,,No,,sfsdfd,No,,English,,,,c4d5e0f1-f6e1-4a77-a7c0-a4317404da00 diff --git a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala index a555314..8ce9d7d 100644 --- a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala +++ b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala @@ -13,7 +13,7 @@ class CSVHandlerSpec extends AnyFlatSpec with BeforeAndAfterEach { val filePath: String = getClass.getResource("/sample-for-csv-handler.csv").getPath val metadataNames: List[String] = List("ClosureStatus", "ClosurePeriod") - "loadCSV" should "read the file and return FileData with all the rows" in { + "loadCSV with path and metadata names" should "read the file and return FileData with all the rows" in { val csvHandler = new CSVHandler val fileData = csvHandler.loadCSV(filePath, metadataNames) @@ -34,6 +34,49 @@ class CSVHandlerSpec extends AnyFlatSpec with BeforeAndAfterEach { fileData should be(expected) } + "loadCSV with path " should "read the file and return FileRows" in { + val csvHandler = new CSVHandler + val fileRows = csvHandler.loadCSV(filePath) + + val expected = List( + FileRow( + "16b2f65c-ec50-494b-824b-f8c08e6b575c", + List( + Metadata("Closure status", "Closed"), + Metadata("UUID", "16b2f65c-ec50-494b-824b-f8c08e6b575c"), + Metadata("Closure Period", "10"), + Metadata("Filename", "file1.jpg"), + Metadata("Date last modified", "2020-05-29"), + Metadata("Filepath", "aa/file.jpg") + ) + ), + FileRow( + "18449d9b-6a86-40b4-8855-b872a79bebad", + List( + Metadata("Closure status", "Open"), + Metadata("UUID", "18449d9b-6a86-40b4-8855-b872a79bebad"), + Metadata("Closure Period", ""), + Metadata("Filename", "file2.jpg"), + Metadata("Date last modified", "2020-05-29"), + Metadata("Filepath", "aa/file.jpg") + ) + ), + FileRow( + "61b49923-daf7-4140-98f1-58ba6cbed61f", + List( + Metadata("Closure status", "Open"), + Metadata("UUID", "61b49923-daf7-4140-98f1-58ba6cbed61f"), + Metadata("Closure Period", ""), + Metadata("Filename", "file3.jpg"), + Metadata("Date last modified", "2020-05-29"), + Metadata("Filepath", "aa/file.jpg") + ) + ) + ) + + fileRows should be(expected) + } + "writeCsv" should "read the file and return FileData with all the rows" in { val csvHandler = new CSVHandler diff --git a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala index 0d250ab..3a10195 100644 --- a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala +++ b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala @@ -1,14 +1,14 @@ package uk.gov.nationalarchives.draftmetadatavalidator import com.amazonaws.services.lambda.runtime.Context -import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyRequestEvent import com.github.tomakehurst.wiremock.client.WireMock.{aResponse, get, put, urlEqualTo} -import com.github.tomakehurst.wiremock.stubbing.StubMapping +import com.github.tomakehurst.wiremock.http.RequestMethod +import com.github.tomakehurst.wiremock.stubbing.{ServeEvent, StubMapping} import org.mockito.MockitoSugar.mock import org.scalatest.matchers.should.Matchers.{convertToAnyShouldWrapper, equal} import java.nio.file.{Files, Paths} -import scala.jdk.CollectionConverters.MapHasAsJava +import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava} class LambdaSpec extends ExternalServicesSpec { @@ -48,5 +48,16 @@ class LambdaSpec extends ExternalServicesSpec { val input = Map("consignmentId" -> consignmentId).asJava val response = new Lambda().handleRequest(input, mockContext) response.getStatusCode should equal(200) + + val s3Interactions: Iterable[ServeEvent] = wiremockS3.getAllServeEvents.asScala.filter(serveEvent => serveEvent.getRequest.getMethod == RequestMethod.PUT).toList + s3Interactions.size shouldBe 1 + + val csvWriteEvent = s3Interactions.head + val expectedCSVHeader = + "Filename,Filepath,Date last modified,Closure status,Closure Start Date,Closure Period,FOI exemption code,FOI decision asserted,Is the title sensitive for the public?,Add alternative title without the file extension,Description,Is the description sensitive for the public?,Alternative description,Language,Date of the record,Translated title of record,Former reference,UUID,Error" + val expectedCSVRow1 = "test3.txt,test/test3.txt,12/2/2345,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c,date_last_modified: format.date" + val csvLines = csvWriteEvent.getRequest.getBodyAsString.split("\\n") + csvLines(0).strip() shouldBe expectedCSVHeader + csvLines(1).strip() shouldBe expectedCSVRow1 } }