Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/edan verifier utility #531

Draft
wants to merge 19 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ export enum eVocabularyID {
eWorkflowTypeCookJob,
eWorkflowTypeIngestion,
eWorkflowTypeUpload,
eWorkflowTypeVerifier, // ADDED: EM (2023-01-28)
eWorkflowStepTypeStart,
eWorkflowEventIngestionUploadAssetVersion,
eWorkflowEventIngestionIngestObject,
Expand Down
1 change: 1 addition & 0 deletions server/cache/VocabularyCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ export class VocabularyCache {
case 'Cook Job': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeCookJob; break;
case 'Ingestion': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeIngestion; break;
case 'Upload': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeUpload; break;
case 'Verifier': eVocabEnum = COMMON.eVocabularyID.eWorkflowTypeVerifier; break;
}
} break;

Expand Down
2 changes: 2 additions & 0 deletions server/db/api/WorkflowReport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export class WorkflowReport extends DBC.DBObject<WorkflowReportBase> implements
idWorkflow!: number;
MimeType!: string;
Data!: string;
Name!: string | null;
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved

constructor(input: WorkflowReportBase) {
super(input);
Expand Down Expand Up @@ -43,6 +44,7 @@ export class WorkflowReport extends DBC.DBObject<WorkflowReportBase> implements
Workflow: { connect: { idWorkflow }, },
MimeType,
Data,
Name: this.Name,
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved
},
}) ? true : /* istanbul ignore next */ false;
} catch (error) /* istanbul ignore next */ {
Expand Down
1 change: 1 addition & 0 deletions server/db/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,7 @@ model WorkflowReport {
idWorkflow Int
MimeType String @mariasql.VarChar(256)
Data String @mariasql.LongText
Name String? @mariasql.VarChar(255)
Workflow Workflow @relation(fields: [idWorkflow], references: [idWorkflow], onDelete: NoAction, onUpdate: NoAction, map: "fk_workflowreport_workflow1")

@@index([idWorkflow], map: "fk_workflowreport_workflow1")
Expand Down
Binary file modified server/db/sql/models/Packrat.mwb
Binary file not shown.
Binary file added server/db/sql/models/Packrat.mwb.bak
Binary file not shown.
6 changes: 5 additions & 1 deletion server/db/sql/scripts/Packrat.ALTER.sql
Original file line number Diff line number Diff line change
Expand Up @@ -483,4 +483,8 @@ UPDATE Unit SET ARKPrefix = 'uj5' WHERE Abbreviation = 'OCIO';
-- 2022-11-11 Jon
ALTER TABLE ModelSceneXref MODIFY COLUMN `NAME` varchar(512) DEFAULT NULL;

-- 2022-11-11 Deployed to Staging and Production
-- 2022-11-11 Deployed to Staging and Production

-- 2023-01-29 Eric
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 4, 'Verifier');
ALTER TABLE WorkflowRecord ADD 'Name' varchar(255);
1 change: 1 addition & 0 deletions server/db/sql/scripts/Packrat.DATA.sql
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (23, 1, 'Ingest
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (23, 2, 'Ingestion: Ingest Object');
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 2, 'Ingestion');
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 3, 'Upload');
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (22, 4, 'Verifier');
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (18, 2, 'Image');
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (24, 1, 'mm');
INSERT INTO Vocabulary (idVocabularySet, SortOrder, Term) VALUES (24, 2, 'cm');
Expand Down
1 change: 1 addition & 0 deletions server/db/sql/scripts/Packrat.SCHEMA.sql
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ CREATE TABLE IF NOT EXISTS `WorkflowReport` (
`idWorkflow` int(11) NOT NULL,
`MimeType` varchar(256) NOT NULL,
`Data` longtext NOT NULL,
`Name` varchar(256) DEFAULT NULL,
PRIMARY KEY (`idWorkflowReport`)
) ENGINE=InnoDB DEFAULT CHARSET=UTF8MB4;

Expand Down
6 changes: 6 additions & 0 deletions server/http/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import { Downloader, download } from './routes/download';
import { errorhandler } from './routes/errorhandler';
import { WebDAVServer } from './routes/WebDAVServer';

import * as Verifiers from './routes/verifiers';
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved

import express, { Request, Express, RequestHandler } from 'express';
import cors from 'cors';
import { ApolloServer } from 'apollo-server-express';
Expand Down Expand Up @@ -87,6 +89,10 @@ export class HttpServer {
this.app.get(`${Downloader.httpRoute}*`, HttpServer.idRequestMiddleware2);
this.app.get(`${Downloader.httpRoute}*`, download);

// endpoints for verifiers.
this.app.get('/verifier',Verifiers.routeRequest); // catch in case of misuse
this.app.get('/verifier/:id', Verifiers.routeRequest);

const WDSV: WebDAVServer | null = await WebDAVServer.server();
if (WDSV) {
this.app.use(WebDAVServer.httpRoute, HttpServer.idRequestMiddleware2);
Expand Down
99 changes: 99 additions & 0 deletions server/http/routes/dataQueries.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/* eslint-disable @typescript-eslint/no-unused-vars */

import { Request, Response } from 'express';
import * as H from '../../utils/helpers';
import * as LOG from '../../utils/logger';
import * as DBAPI from '../../db';

import GraphQLApi from '../../graphql';
import { GetSystemObjectDetailsInput, GetSystemObjectDetailsResult } from '../../types/graphql';

export async function routeRequest(request: Request, response: Response): Promise<void> {

const detailsToReturn = request.params.id;
console.warn(detailsToReturn+'|'+JSON.stringify(request.params));

// if nothing then complain
if(detailsToReturn===undefined) {
LOG.error('HTTP request: incorrect usage of endpoint', LOG.LS.eHTTP);
response.send('Request failed. Incorrect use of endpoint. Be sure to include what you are looking for');
return;
}

// handle the proper type
switch(detailsToReturn){
case 'systemObject': {
return await getSystemObjectDetails(request,response);
} break;

default: {
LOG.error(`HTTP request: unsupported request (${detailsToReturn})`, LOG.LS.eHTTP);
response.send(`Request failed. Unsupported request/path (${detailsToReturn})`);
}
}
}

// convenience routine routine for getting system object details to be used with routes.
// NOTE: not connected as it should not be 'live' until an API is created and protected
async function getSystemObjectDetails(req: Request, response: Response): Promise<void> {

// grab our config options from query params
const subjectLimit: number = (req.query.limit)?parseInt(req.query.limit as string):10000;
const systemObjectId: number = (req.query.objectId)?parseInt(req.query.objectId as string):-1;

// fetch all subjects from Packrat DB to get list of IDs
const systemObjects: DBAPI.SystemObject[] | null = await DBAPI.SystemObject.fetchAll(); /* istanbul ignore if */
if (!systemObjects) {
sendResponseMessage(response,false,'could not get system objects from DB');
return;
}
if(systemObjects.length<=0) {
sendResponseMessage(response,false,'no system objects found in DB');
return;
}
LOG.info(`Getting SystemObject Details processing ${systemObjects.length} ids`,LOG.LS.eGQL);

// loop through subjects, extract name, query from EDAN
const output: string[] = [];
for(let i=0; i<systemObjects.length; i++) {

if(i>=subjectLimit) break;

const idSystemObject: number = systemObjects[i].fetchID();
if(systemObjectId>0 && idSystemObject!=systemObjectId) continue;

const input: GetSystemObjectDetailsInput = {
idSystemObject
};
const graphQLApi = new GraphQLApi(true);
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved
const results: GetSystemObjectDetailsResult = await graphQLApi.getSystemObjectDetails(input);

// TODO: get asset details and inject into above results on 'asset' field
// getAssetDetailsForSystemObject()

// store our results
output.push(H.Helpers.JSONStringify(results));

// break;
}

// if we return the file then do so, overwriting any message
if(output.length>0) {
const name = 'SystemObjectDetails_'+new Date().toISOString().split('T')[0];
response.setHeader('Content-disposition', `attachment; filename=${name}.json`);
response.set('Content-Type', 'text/json');
response.statusMessage = 'Gathering system object details SUCCEEDED!';
response.status(200).send(output.join('\n'));
return;
}

const message = 'Getting system object details succeeded, but nothing to return.';
LOG.info(message,LOG.LS.eGQL);
sendResponseMessage(response,true,message);
return;
}

function sendResponseMessage(response: Response, success: boolean, message: string) {
LOG.error(`Getting data from database ${(success)?'SUCCEEDED':'FAILED'}: ${message}`, LOG.LS.eGQL);
response.send(`Getting data from database ${(success)?'SUCCEEDED':'FAILED'}: ${message}`);
}
14 changes: 13 additions & 1 deletion server/http/routes/download.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,19 @@ export class Downloader {
const mimeType: string = WFReports[0].MimeType;
const idWorkflowReport: number = WFReports[0].idWorkflowReport;

this.response.setHeader('Content-disposition', `inline; filename=WorkflowReport.${idWorkflowReport}.htm`);
// get/set our filename depending on if it's present or not
let filename: string = `WorkflowReport.${idWorkflowReport}`;
if(WFReports[0].Name) filename = WFReports[0].Name;

// add our extension based on mimeType
switch(mimeType) {
case 'text/html': filename += '.htm'; break;
case 'text/csv': filename += '.csv'; break;
default: filename += '.htm';
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved
}

// set our properties and configure the response
this.response.setHeader('Content-disposition', `inline; filename=${filename}`);
if (mimeType)
this.response.setHeader('Content-type', mimeType);
let first: boolean = true;
Expand Down
182 changes: 182 additions & 0 deletions server/http/routes/verifiers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/* eslint-disable @typescript-eslint/no-unused-vars */

import { Request, Response } from 'express';
import * as COL from '../../collections/interface/';
import * as WF from '../../workflow/interface';
import * as REP from '../../report/interface';
import * as COMMON from '@dpo-packrat/common';
import * as H from '../../utils/helpers';
import * as LOG from '../../utils/logger';
import * as WFV from '../../workflow/impl/Packrat/WorkflowVerifier';
import * as DBAPI from '../../db';
import { ASL, LocalStore } from '../../utils/localStore';

export async function routeRequest(request: Request, response: Response): Promise<void> {

const verifierToRun = request.params.id;
console.warn(verifierToRun+'|'+JSON.stringify(request.params));
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved

// if nothing then complain
if(verifierToRun===undefined) {
LOG.error('HTTP request: incorrect usage of endpoint', LOG.LS.eHTTP);
response.send('Request failed. Incorrect use of endpoint. Be sure to include which verifier to use.');
return;
}

// handle the proper type
switch(verifierToRun){
case 'edan': {
return await verifyEdanWorkflow(request,response);
} break;

default: {
LOG.error(`HTTP request: unsupported verify type (${verifierToRun})`, LOG.LS.eHTTP);
response.send(`Request failed. Unsupported verify type/path (${verifierToRun})`);
}
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved
}
}

// TODO: progressively build up report so that if requested before done it returns partial results
// requires changing verifier to append after each subject, connecting tightly to workflow logic
// TODO: fork verifier(s) so it does not use the same event loop as the main server improving performance
// https://nodejs.org/api/child_process.html
// TODO: support server side events (SSE) to provide notifications to client on progress
async function verifyEdanWorkflow(req: Request, response: Response): Promise<void> {
LOG.info('(Workflows) Verifying EDAN Records from endpoint...', LOG.LS.eGQL);

const workflowEngine: WF.IWorkflowEngine | null = await WF.WorkflowFactory.getInstance();
if (!workflowEngine) {
const error: string = 'verifiers createWorkflow could not load WorkflowEngine';
sendResponseMessage(response,false,error);
return;
}

// create our workflow (but don't start) and add it to the DB
const wfParams: WF.WorkflowParameters = {
eWorkflowType: COMMON.eVocabularyID.eWorkflowTypeVerifier,
//idSystemObject: undefined, // not operating on SystemObjects
//idProject: TODO: populate with idProject
//idUserInitiator: this.user?.idUser, // not getting user at this point (but should when behind wall)
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved
autoStart: false // don't start the workflow because we need to configure it
};
const workflow: WF.IWorkflow | null = await workflowEngine.create(wfParams);
if (!workflow) {
const error: string = `unable to create EDAN Verifier workflow: ${H.Helpers.JSONStringify(wfParams)}`;
sendResponseMessage(response,false,error);
return;
}

// grab our config options from query params
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The theory here is that you would pass these additional parameters to the workflow via WorkkflowParameters.parameters, which has type any, and can be populated with the config object needed by your specific workflow.

If you do this, then you can simply autoStart: true, instead of doing a two-step process of creating, then starting, the workflow.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking this could be a default property vs. something specific to just the EDAN verifier. I'll look at how WorkflowEngine references this and update.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The key point here is that the workflow parameters exist to supply standard params, plus workflow-specific params. You can and should take advantage of that to pass all of the params into the workflow, instead of having to create it, then configure it, then start it. Just create it with the required params! If you do this, then you can probably autoStart: true it.

const returnFile: boolean = req.query.returnFile==='true'?true:false;
const detailedLogs: boolean = req.query.details==='true'?true:false;
const subjectLimit: number = (req.query.limit)?parseInt(req.query.limit as string):10000;
const systemObjectId: number = (req.query.objectId)?parseInt(req.query.objectId as string):-1;

// cast it to our verifier type (TODO: catch fails) and configure
const verifierWorkflow = workflow as WFV.WorkflowVerifier;
verifierWorkflow.config = {
collection: COL.CollectionFactory.getInstance(),
detailedLogs,
subjectLimit,
systemObjectId
};

// start our workflow
// TODO: check during execution for it timing out
const workflowResult: H.IOResults = await verifierWorkflow.start();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With just 3K subjects, this will complete in minutes (5-10?). When Packrat has 300K subjects, this will need to be an offline process, run asynchronously. Why not just plan for that future now, and return an HTTP response of "EDAN Verifier Job Started" ... combined with emailing a link to the report upon completion?

if(!workflowResult || workflowResult.success===false) {
const error: string = 'EDAN Verifier workflow failed to start. '+workflowResult?.error;
sendResponseMessage(response,false,error);
return;
}

// get/create our report
const iReport: REP.IReport | null = await REP.ReportFactory.getReport();
if(!iReport) {
const error: string = 'EDAN Verifier workflow failed to get report.';
LOG.error(error, LOG.LS.eGQL);
response.send('Verifing EDAN records FAILED!\n'+error);
return;
}

// get the local store, our current report ID and fetch it
// WHY: can the IReport higher up expose the id for fetch or grabbing the ID?
const LS: LocalStore = await ASL.getOrCreateStore();
const idWorkflowReport: number | undefined = LS.getWorkflowReportID();
if (!idWorkflowReport) {
const error: string = 'could not get workflow report ID';
sendResponseMessage(response,false,error);
return;
}

// get our report from the DB and configure
const workflowReport = await DBAPI.WorkflowReport.fetch(idWorkflowReport);
if (!workflowReport) {
sendResponseMessage(response,false,`unable to fetch report with id ${idWorkflowReport}`);
return;
}

// if we have CSV output add it to our report
if(verifierWorkflow.result && verifierWorkflow.result.csvOutput) {
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved

// our desired filename
const now: string = new Date().toISOString().split('T')[0];

workflowReport.MimeType = 'text/csv';
workflowReport.Data = verifierWorkflow.result.csvOutput;
workflowReport.Name = 'EDANVerifier_Results_'+now;
workflowReport.update();
} else {
const error: string = 'Error with verifier result';
sendResponseMessage(response,false,`unable to fetch report with id ${idWorkflowReport}`);
workflowReport.Data = error;
workflowReport.update();
return;
}

// if we return the file then do so, overwriting any message
if(returnFile && verifierWorkflow.result.csvOutput!=undefined) {
response.setHeader('Content-disposition', `attachment; filename=${workflowReport.Name}.csv`);
response.set('Content-Type', 'text/csv');
response.statusMessage = 'Verifying EDAN records SUCCEEDED!';
response.status(200).send(verifierWorkflow.result.csvOutput);
return;
}

// create our download URL for future use. (NOTE: using HTTP so localhost works)
const workflowReportURL: string = `http://localhost:4000/download?idWorkflowReport=${idWorkflowReport}`;
EMaslowskiQ marked this conversation as resolved.
Show resolved Hide resolved
LOG.info(`EDAN verifier SUCCEEDED!! (${workflowReportURL})`,LOG.LS.eGQL);
sendResponseMessage(response,true,getResponseMarkup(true,'Download Report',workflowReportURL));//`<a href="${workflowReportURL}">DOWNLOAD</a>`);

return;
}

function sendResponseMessage(response: Response, success: boolean, message: string) {
if(success) {
LOG.info(`EDAN Verifier SUCCEEDED: ${message}`, LOG.LS.eGQL);
response.send(message);
} else {
LOG.error(`EDAN Verifier FAILED: ${message}`, LOG.LS.eGQL);
response.send(`Verifing EDAN records FAILED: ${message}`);
}
}

function getResponseMarkup(success: boolean, message?: string, url?: string): string {
let output = '';
output += '<div style="display: flex;align-items: center;flex-direction: column;margin-top:3rem;">';
output += '<div style="width: 15rem;border-radius: 1rem;background-color: #0079C4;display: flex;flex-direction: column;">';
output += '<img src="https://smithsonian.github.io/dpo-packrat/images/logo-name.png" style="width: 100%;object-fit: none;">';

output += '<div style="border: 1px solid #0079c4;text-align: center;font-size: 1rem;background-color: white;border-radius: 0 0 1rem 1rem;border-top: 0px;">';
if(success) {
output += `<a href="${url}" style="color: #0079C4; font-weight: bold; text-decoration: none;">${message}</a>`;
} else {
output += '<span style="font-size:1.5rem; color:red;">ERROR</span>';
output += `<p>${message}</p>`;
}
output += '</div>';

output += '</div>';
output += '</div>';
return output;
}
Loading