diff --git a/README.md b/README.md index 862ad0585..53620da17 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Transit Data Manager -The core application for Conveyal's transit data tools suite. +The core application for IBI Group's transit data tools suite. ## Documentation diff --git a/configurations/default/env.yml.tmp b/configurations/default/env.yml.tmp index eb5769962..119b399a6 100644 --- a/configurations/default/env.yml.tmp +++ b/configurations/default/env.yml.tmp @@ -15,5 +15,5 @@ SPARKPOST_EMAIL: email@example.com GTFS_DATABASE_URL: jdbc:postgresql://localhost/catalogue # GTFS_DATABASE_USER: # GTFS_DATABASE_PASSWORD: -#MONGO_URI: mongodb://mongo-host:27017 +#MONGO_HOST: mongo-host:27017 MONGO_DB_NAME: catalogue diff --git a/pom.xml b/pom.xml index 54c227b4d..6486d52ad 100644 --- a/pom.xml +++ b/pom.xml @@ -270,7 +270,7 @@ com.github.conveyal gtfs-lib - 7.0.2 + 7.0.4 diff --git a/src/main/java/com/conveyal/datatools/common/status/MonitorableJob.java b/src/main/java/com/conveyal/datatools/common/status/MonitorableJob.java index a3e487cbd..d134b2a34 100644 --- a/src/main/java/com/conveyal/datatools/common/status/MonitorableJob.java +++ b/src/main/java/com/conveyal/datatools/common/status/MonitorableJob.java @@ -79,7 +79,8 @@ public enum JobType { MONITOR_SERVER_STATUS, MERGE_FEED_VERSIONS, RECREATE_BUILD_IMAGE, - UPDATE_PELIAS + UPDATE_PELIAS, + AUTO_PUBLISH_FEED_VERSION } public MonitorableJob(Auth0UserProfile owner, String name, JobType type) { diff --git a/src/main/java/com/conveyal/datatools/common/utils/SparkUtils.java b/src/main/java/com/conveyal/datatools/common/utils/SparkUtils.java index d93681227..522beca71 100644 --- a/src/main/java/com/conveyal/datatools/common/utils/SparkUtils.java +++ b/src/main/java/com/conveyal/datatools/common/utils/SparkUtils.java @@ -137,9 +137,8 @@ public static void logMessageAndHalt( if (statusCode >= 500) { LOG.error(message); - - // create report to notify bugsnag if configured - ErrorUtils.reportToBugsnag(e, request.attribute("user")); + Auth0UserProfile userProfile = request != null ? request.attribute("user") : null; + ErrorUtils.reportToBugsnag(e, userProfile); } JsonNode json = getObjectNode(message, statusCode, e); diff --git a/src/main/java/com/conveyal/datatools/manager/controllers/api/DeploymentController.java b/src/main/java/com/conveyal/datatools/manager/controllers/api/DeploymentController.java index 7ef59d772..7600bf69b 100644 --- a/src/main/java/com/conveyal/datatools/manager/controllers/api/DeploymentController.java +++ b/src/main/java/com/conveyal/datatools/manager/controllers/api/DeploymentController.java @@ -10,6 +10,7 @@ import com.conveyal.datatools.common.utils.aws.S3Utils; import com.conveyal.datatools.manager.auth.Auth0UserProfile; import com.conveyal.datatools.manager.jobs.DeployJob; +import com.conveyal.datatools.manager.jobs.PeliasUpdateJob; import com.conveyal.datatools.manager.models.Deployment; import com.conveyal.datatools.manager.models.EC2InstanceSummary; import com.conveyal.datatools.manager.models.FeedSource; @@ -485,6 +486,23 @@ private static String deploy (Request req, Response res) { return SparkUtils.formatJobMessage(job.jobId, "Deployment initiating."); } + /** + * Create a Pelias update job based on an existing, live deployment + */ + private static String peliasUpdate (Request req, Response res) { + Auth0UserProfile userProfile = req.attribute("user"); + Deployment deployment = getDeploymentWithPermissions(req, res); + Project project = Persistence.projects.getById(deployment.projectId); + if (project == null) { + logMessageAndHalt(req, 400, "Internal reference error. Deployment's project ID is invalid"); + } + + // Execute the pelias update job and keep track of it + PeliasUpdateJob peliasUpdateJob = new PeliasUpdateJob(userProfile, "Updating Local Places Index", deployment); + JobUtils.heavyExecutor.execute(peliasUpdateJob); + return SparkUtils.formatJobMessage(peliasUpdateJob.jobId, "Pelias update initiating."); + } + /** * Uploads a file from Spark request object to the s3 bucket of the deployment the Pelias Update Job is associated with. * Follows https://github.com/ibi-group/datatools-server/blob/dev/src/main/java/com/conveyal/datatools/editor/controllers/api/EditorController.java#L111 @@ -537,6 +555,7 @@ public static void register (String apiPrefix) { fullJson.addMixin(Deployment.class, Deployment.DeploymentWithEc2InstancesMixin.class); post(apiPrefix + "secure/deployments/:id/deploy/:target", DeploymentController::deploy, slimJson::write); + post(apiPrefix + "secure/deployments/:id/updatepelias", DeploymentController::peliasUpdate, slimJson::write); post(apiPrefix + "secure/deployments/:id/deploy/", ((request, response) -> { logMessageAndHalt(request, 400, "Must provide valid deployment target name"); return null; diff --git a/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedSourceController.java b/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedSourceController.java index c8420cbb6..ce162c215 100644 --- a/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedSourceController.java +++ b/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedSourceController.java @@ -252,13 +252,14 @@ private static FeedSource updateExternalFeedResource(Request req, Response res) } // Hold previous value for use when updating third-party resource String previousValue = prop.value; - // Update the property in our database. - ExternalFeedSourceProperty updatedProp = Persistence.externalFeedSourceProperties.updateField( - propertyId, "value", entry.getValue().asText()); - // Trigger an event on the external resource + // Update the property with the value to be submitted. + prop.value = entry.getValue().asText(); + + // Trigger an event on the external resource. + // After updating the external resource, we will update Mongo with values sent by the external resource. try { - externalFeedResource.propertyUpdated(updatedProp, previousValue, req.headers("Authorization")); + externalFeedResource.propertyUpdated(prop, previousValue, req.headers("Authorization")); } catch (IOException e) { logMessageAndHalt(req, 500, "Could not update external feed source", e); } diff --git a/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedVersionController.java b/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedVersionController.java index 73d22c009..eac4ba000 100644 --- a/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedVersionController.java +++ b/src/main/java/com/conveyal/datatools/manager/controllers/api/FeedVersionController.java @@ -1,6 +1,7 @@ package com.conveyal.datatools.manager.controllers.api; import com.conveyal.datatools.common.utils.SparkUtils; +import com.conveyal.datatools.common.utils.aws.CheckedAWSException; import com.conveyal.datatools.common.utils.aws.S3Utils; import com.conveyal.datatools.manager.DataManager; import com.conveyal.datatools.manager.auth.Auth0UserProfile; @@ -8,7 +9,7 @@ import com.conveyal.datatools.manager.jobs.CreateFeedVersionFromSnapshotJob; import com.conveyal.datatools.manager.jobs.GisExportJob; import com.conveyal.datatools.manager.jobs.MergeFeedsJob; -import com.conveyal.datatools.manager.jobs.MergeFeedsType; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType; import com.conveyal.datatools.manager.jobs.ProcessSingleFeedJob; import com.conveyal.datatools.manager.models.FeedDownloadToken; import com.conveyal.datatools.manager.models.FeedRetrievalMethod; @@ -44,7 +45,7 @@ import static com.conveyal.datatools.common.utils.SparkUtils.logMessageAndHalt; import static com.conveyal.datatools.manager.controllers.api.FeedSourceController.checkFeedSourcePermissions; import static com.mongodb.client.model.Filters.eq; -import static com.conveyal.datatools.manager.jobs.MergeFeedsType.REGIONAL; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.REGIONAL; import static com.mongodb.client.model.Filters.in; import static spark.Spark.delete; import static spark.Spark.get; @@ -119,8 +120,8 @@ private static String createFeedVersionViaUpload(Request req, Response res) { LOG.info("Last modified: {}", new Date(newGtfsFile.lastModified())); // Check that the hashes of the feeds don't match, i.e. that the feed has changed since the last version. - // (as long as there is a latest version, i.e. the feed source is not completely new) - if (latestVersion != null && latestVersion.hash.equals(newFeedVersion.hash)) { + // (as long as there is a latest version, the feed source is not completely new) + if (newFeedVersion.isSameAs(latestVersion)) { // Uploaded feed matches latest. Delete GTFS file because it is a duplicate. LOG.error("Upload version {} matches latest version {}.", newFeedVersion.id, latestVersion.id); newGtfsFile.delete(); @@ -260,30 +261,33 @@ private static FeedVersion publishToExternalResource (Request req, Response res) // notify any extensions of the change try { - for (String resourceType : DataManager.feedResources.keySet()) { - DataManager.feedResources.get(resourceType).feedVersionCreated(version, null); - } - if (!DataManager.isExtensionEnabled("mtc")) { - // update published version ID on feed source - Persistence.feedSources.updateField(version.feedSourceId, "publishedVersionId", version.namespace); - return version; - } else { - // NOTE: If the MTC extension is enabled, the parent feed source's publishedVersionId will not be updated to the - // version's namespace until the FeedUpdater has successfully downloaded the feed from the share S3 bucket. - Date publishedDate = new Date(); - // Set "sent" timestamp to now and reset "processed" timestamp (in the case that it had previously been - // published as the active version. - version.sentToExternalPublisher = publishedDate; - version.processedByExternalPublisher = null; - Persistence.feedVersions.replace(version.id, version); - return version; - } + publishToExternalResource(version); + return version; } catch (Exception e) { logMessageAndHalt(req, 500, "Could not publish feed.", e); return null; } } + public static void publishToExternalResource(FeedVersion version) throws CheckedAWSException { + for (String resourceType : DataManager.feedResources.keySet()) { + DataManager.feedResources.get(resourceType).feedVersionCreated(version, null); + } + if (!DataManager.isExtensionEnabled("mtc")) { + // update published version ID on feed source + Persistence.feedSources.updateField(version.feedSourceId, "publishedVersionId", version.namespace); + } else { + // NOTE: If the MTC extension is enabled, the parent feed source's publishedVersionId will not be updated to the + // version's namespace until the FeedUpdater has successfully downloaded the feed from the share S3 bucket. + Date publishedDate = new Date(); + // Set "sent" timestamp to now and reset "processed" timestamp (in the case that it had previously been + // published as the active version. + version.sentToExternalPublisher = publishedDate; + version.processedByExternalPublisher = null; + Persistence.feedVersions.replace(version.id, version); + } + } + /** * HTTP endpoint to initiate an export of a shapefile containing the stops or routes of one or * more feed versions. NOTE: the job ID returned must be used by the requester to download the diff --git a/src/main/java/com/conveyal/datatools/manager/controllers/api/ProjectController.java b/src/main/java/com/conveyal/datatools/manager/controllers/api/ProjectController.java index 193720fbb..d237b753e 100644 --- a/src/main/java/com/conveyal/datatools/manager/controllers/api/ProjectController.java +++ b/src/main/java/com/conveyal/datatools/manager/controllers/api/ProjectController.java @@ -25,7 +25,6 @@ import spark.Request; import spark.Response; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -35,7 +34,7 @@ import static com.conveyal.datatools.common.utils.SparkUtils.formatJobMessage; import static com.conveyal.datatools.common.utils.SparkUtils.logMessageAndHalt; import static com.conveyal.datatools.manager.DataManager.publicPath; -import static com.conveyal.datatools.manager.jobs.MergeFeedsType.REGIONAL; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.REGIONAL; import static spark.Spark.delete; import static spark.Spark.get; import static spark.Spark.post; diff --git a/src/main/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResource.java b/src/main/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResource.java index 246c15ba7..4226897f6 100644 --- a/src/main/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResource.java +++ b/src/main/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResource.java @@ -11,19 +11,27 @@ import com.conveyal.datatools.manager.models.FeedVersion; import com.conveyal.datatools.manager.models.Project; import com.conveyal.datatools.manager.persistence.Persistence; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; import static com.conveyal.datatools.manager.models.ExternalFeedSourceProperty.constructId; +import static com.mongodb.client.model.Filters.eq; /** * This class implements the {@link ExternalFeedResource} interface for the MTC RTD database list of carriers (transit @@ -40,11 +48,12 @@ public class MtcFeedResource implements ExternalFeedResource { public static final Logger LOG = LoggerFactory.getLogger(MtcFeedResource.class); + public static final String TEST_AGENCY = "test-agency"; + public static final String AGENCY_ID_FIELDNAME = "AgencyId"; + public static final String RESOURCE_TYPE = "MTC"; private String rtdApi, s3Bucket, s3Prefix; - public static final String AGENCY_ID_FIELDNAME = "AgencyId"; - public static final String RESOURCE_TYPE = "MTC"; public MtcFeedResource() { rtdApi = DataManager.getExtensionPropertyAsText(RESOURCE_TYPE, "rtd_api"); s3Bucket = DataManager.getExtensionPropertyAsText(RESOURCE_TYPE, "s3_bucket"); @@ -139,7 +148,8 @@ public void feedSourceCreated(FeedSource source, String authHeader) throws Illeg } /** - * Sync an updated property with the RTD database. Note: if the property is AgencyId and the value was previously + * Sync a property with the RTD database, and syncs Mongo with data returned from RTD. + * Note: if the property is AgencyId and the value was previously * null create/register a new carrier with RTD. */ @Override @@ -161,6 +171,9 @@ public void propertyUpdated( // Otherwise, this is just a standard prop update. writeCarrierToRtd(carrier, false, authHeader); } + + // Fetch the agency properties from RTD and update the Mongo records from that instead of what was sent to RTD. + fetchCarrierFromRtdAndUpdateMongo(source, carrier, authHeader); } /** @@ -181,11 +194,16 @@ public void feedVersionCreated( constructId(feedVersion.parentFeedSource(), this.getResourceType(), AGENCY_ID_FIELDNAME) ); - if(agencyIdProp == null || agencyIdProp.value.equals("null")) { + if (agencyIdProp == null || agencyIdProp.value == null || agencyIdProp.value.equals("null")) { LOG.error("Could not read {} for FeedSource {}", AGENCY_ID_FIELDNAME, feedVersion.feedSourceId); return; } + if (agencyIdProp.value.equals(TEST_AGENCY)) { + LOG.info("Skipping S3 upload for unit test."); + return; + } + String keyName = String.format("%s%s.zip", this.s3Prefix, agencyIdProp.value); LOG.info("Pushing to MTC S3 Bucket: s3://{}/{}", s3Bucket, keyName); File file = feedVersion.retrieveGtfsFile(); @@ -202,7 +220,6 @@ public void feedVersionCreated( * Update or create a carrier and its properties with an HTTP request to the RTD. */ private void writeCarrierToRtd(RtdCarrier carrier, boolean createNew, String authHeader) throws IOException { - try { ObjectMapper mapper = new ObjectMapper(); @@ -222,10 +239,103 @@ private void writeCarrierToRtd(RtdCarrier carrier, boolean createNew, String aut osw.write(carrierJson); osw.flush(); osw.close(); - LOG.info("RTD API response: {}/{}", connection.getResponseCode(), connection.getResponseMessage()); + LOG.info( + "RTD API {} response: {}/{}", + connection.getRequestMethod(), + connection.getResponseCode(), + connection.getResponseMessage() + ); + } catch (Exception e) { + LOG.error("Error writing to RTD", e); + throw e; + } + } + + /** + * Fetch agency properties from RTD and update the ExternalFeedSourceProperty collection in Mongo. + */ + private void fetchCarrierFromRtdAndUpdateMongo(FeedSource source, RtdCarrier carrier, String authHeader) throws IOException { + try { + URL rtdUrl = new URL(rtdApi + "/Carrier/" + carrier.AgencyId); + LOG.info("Fetching to RTD URL: {}", rtdUrl); + HttpURLConnection connection = (HttpURLConnection) rtdUrl.openConnection(); + + connection.setRequestMethod("GET"); + connection.setRequestProperty("Content-Type", "application/json"); + connection.setRequestProperty("Accept", "application/json"); + connection.setRequestProperty("Authorization", authHeader); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + String inputLine; + StringBuilder response = new StringBuilder(); + + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + } + in.close(); + + LOG.info("RTD API GET response: {}/{}", connection.getResponseCode(), connection.getResponseMessage()); + + // Parse the response and update Mongo. + ObjectMapper responseMapper = new ObjectMapper(); + JsonNode node = responseMapper.readTree(response.toString()); + updateMongoExternalFeedProperties(source, node); } catch (Exception e) { LOG.error("Error writing to RTD", e); throw e; } } + + /** + * Updates Mongo using the provided JSON object from RTD. + */ + void updateMongoExternalFeedProperties(FeedSource source, JsonNode rtdResponse) { + String resourceType = this.getResourceType(); + Iterator> fieldsIterator = rtdResponse.fields(); + List rtdKeys = new ArrayList<>(); + + // Iterate over fields found in body and update external properties accordingly. + while (fieldsIterator.hasNext()) { + Map.Entry entry = fieldsIterator.next(); + ExternalFeedSourceProperty property = new ExternalFeedSourceProperty( + source, + resourceType, + entry.getKey(), + convertRtdString(entry.getValue().asText()) + ); + + // Update the attributes in Mongo. + ExternalFeedSourceProperty existingProperty = Persistence.externalFeedSourceProperties.getById( + property.id + ); + if (existingProperty != null) { + Persistence.externalFeedSourceProperties.updateField( + property.id, + "value", + property.value + ); + } else { + Persistence.externalFeedSourceProperties.create(property); + } + + // Hold the received attribute keys to delete the extra ones from Mongo that are assumed not used. + rtdKeys.add(property.name); + } + + // Get the attributes stored in Mongo, remove those not in the RTD response. + Persistence.externalFeedSourceProperties.getFiltered(eq("feedSourceId", source.id)) + .stream() + .filter(property -> !rtdKeys.contains(property.name)) + .forEach(property -> Persistence.externalFeedSourceProperties.removeById(property.id)); + } + + /** + * This method converts the RTD attribute value "null" to "" by MTC request, + * so that it is displayed in the UI under Mtc Properties as "(none)". + * @return An empty string if the provided string is the string "null", else the passed string itself. + */ + static String convertRtdString(String s) { + if ("null".equals(s)) return ""; + return s; + } } diff --git a/src/main/java/com/conveyal/datatools/manager/extensions/mtc/RtdCarrier.java b/src/main/java/com/conveyal/datatools/manager/extensions/mtc/RtdCarrier.java index 920d7ee32..b9521a7fa 100644 --- a/src/main/java/com/conveyal/datatools/manager/extensions/mtc/RtdCarrier.java +++ b/src/main/java/com/conveyal/datatools/manager/extensions/mtc/RtdCarrier.java @@ -100,11 +100,12 @@ private String getPropId(FeedSource source, String fieldName) { /** * Get the value stored in the database for a particular field. - * - * TODO: Are there cases where this might throw NPEs? */ private String getValueForField (FeedSource source, String fieldName) { - return Persistence.externalFeedSourceProperties.getById(getPropId(source, fieldName)).value; + ExternalFeedSourceProperty property = Persistence.externalFeedSourceProperties.getById( + getPropId(source, fieldName) + ); + return property != null ? property.value : null; } /** diff --git a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java index c6adecb10..c62b056b5 100644 --- a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java +++ b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java @@ -1,23 +1,22 @@ package com.conveyal.datatools.manager.gtfsplus; -import com.conveyal.datatools.common.utils.Consts; import com.conveyal.datatools.manager.DataManager; import com.conveyal.datatools.manager.models.FeedVersion; import com.conveyal.datatools.manager.persistence.FeedStore; import com.conveyal.datatools.manager.persistence.Persistence; import com.conveyal.gtfs.GTFSFeed; +import com.csvreader.CsvReader; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import org.apache.commons.io.input.BOMInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.Serializable; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; import java.util.Enumeration; @@ -124,10 +123,15 @@ private static void validateTable( GTFSFeed gtfsFeed ) throws IOException { String tableId = specTable.get("id").asText(); + // Read in table data from input stream. - BufferedReader in = new BufferedReader(new InputStreamReader(inputStreamToValidate)); - String line = in.readLine(); - String[] inputHeaders = line.split(","); + CsvReader csvReader = new CsvReader(inputStreamToValidate, ',', StandardCharsets.UTF_8); + // Don't skip empty records (this is set to true by default on CsvReader. We want to check for empty records + // during table load, so that they are logged as validation issues (rows with wrong number of columns). + csvReader.setSkipEmptyRecords(false); + csvReader.readHeaders(); + + String[] inputHeaders = csvReader.getHeaders(); List fieldList = Arrays.asList(inputHeaders); JsonNode[] fieldsFound = new JsonNode[inputHeaders.length]; JsonNode specFields = specTable.get("fields"); @@ -144,24 +148,27 @@ private static void validateTable( issues.add(new ValidationIssue(tableId, fieldName, -1, "Required column missing.")); } } + // Iterate over each row and validate each field value. int rowIndex = 0; int rowsWithWrongNumberOfColumns = 0; - while ((line = in.readLine()) != null) { - String[] values = line.split(Consts.COLUMN_SPLIT, -1); + while (csvReader.readRecord()) { // First, check that row has the correct number of fields. - if (values.length != fieldsFound.length) { + int recordColumnCount = csvReader.getColumnCount(); + if (recordColumnCount != fieldsFound.length) { rowsWithWrongNumberOfColumns++; } // Validate each value in row. Note: we iterate over the fields and not values because a row may be missing // columns, but we still want to validate that missing value (e.g., if it is missing a required field). for (int f = 0; f < fieldsFound.length; f++) { // If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception. - String val = f < values.length ? values[f] : null; + String val = f < recordColumnCount ? csvReader.get(f) : null; validateTableValue(issues, tableId, rowIndex, val, fieldsFound[f], gtfsFeed); } rowIndex++; } + csvReader.close(); + // Add issue for wrong number of columns after processing all rows. // Note: We considered adding an issue for each row, but opted for the single error approach because there's no // concept of a row-level issue in the UI right now. So we would potentially need to add that to the UI diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/AutoDeployJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/AutoDeployJob.java index bac7fca7d..89a88785b 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/AutoDeployJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/AutoDeployJob.java @@ -1,6 +1,5 @@ package com.conveyal.datatools.manager.jobs; -import com.conveyal.datatools.common.status.MonitorableJob; import com.conveyal.datatools.manager.auth.Auth0UserProfile; import com.conveyal.datatools.manager.models.Deployment; import com.conveyal.datatools.manager.models.FeedVersion; @@ -12,7 +11,6 @@ import org.slf4j.LoggerFactory; import java.util.Collection; -import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.LinkedList; @@ -32,7 +30,7 @@ * If there are related feed fetches in progress auto deploy is skipped but the deployment's feed versions are still * advanced to the latest versions. */ -public class AutoDeployJob extends MonitorableJob { +public class AutoDeployJob extends MonitorableJobWithResourceLock { public static final Logger LOG = LoggerFactory.getLogger(AutoDeployJob.class); /** @@ -40,29 +38,30 @@ public class AutoDeployJob extends MonitorableJob { */ private final Project project; - /** - * A set of projects which have been locked by a instance of {@link AutoDeployJob} to prevent repeat - * deployments. - */ - private static final Set lockedProjects = Collections.synchronizedSet(new HashSet<>()); + private final Deployment deployment; /** * Auto deploy specific project. */ public AutoDeployJob(Project project, Auth0UserProfile owner) { - super(owner, "Auto Deploy Feed", JobType.AUTO_DEPLOY_FEED_VERSION); + super( + owner, + "Auto Deploy Feed", + JobType.AUTO_DEPLOY_FEED_VERSION, + project, + project.name + ); this.project = project; + deployment = Persistence.deployments.getById(project.pinnedDeploymentId); } @Override public void jobLogic() { - Deployment deployment = Persistence.deployments.getById(project.pinnedDeploymentId); // Define if project and deployment are candidates for auto deploy. if ( project.pinnedDeploymentId == null || deployment == null || - deployment.feedVersionIds.isEmpty() || - lockedProjects.contains(project.id) + deployment.feedVersionIds.isEmpty() ) { String message = String.format( "Project %s skipped for auto deployment as required criteria not met.", @@ -89,171 +88,157 @@ public void jobLogic() { return; } - try { - synchronized (lockedProjects) { - if (!lockedProjects.contains(project.id)) { - lockedProjects.add(project.id); - LOG.info("Auto deploy lock added for project id: {}", project.id); - } else { - LOG.warn("Unable to acquire lock for project {}", project.name); - status.fail(String.format("Project %s is locked for auto-deployments.", project.name)); - return; - } - } - LOG.info("Auto deploy task running for project {}", project.name); - - // Get the most recently used server. - String latestServerId = deployment.latest().serverId; - OtpServer server = Persistence.servers.getById(latestServerId); - if (server == null) { - String message = String.format( - "Server with id %s no longer exists. Skipping deployment for project %s.", - latestServerId, - project.name - ); - LOG.warn(message); - status.fail(message); - return; - } + // Super class handles lock management and will trigger innerJobLogic. + super.jobLogic(); + } - // Analyze and update feed versions in deployment. - Collection updatedFeedVersionIds = new LinkedList<>(); - List latestVersionsWithCriticalErrors = new LinkedList<>(); - List previousFeedVersions = deployment.retrieveFeedVersions(); - boolean shouldWaitForNewFeedVersions = false; + @Override + protected void innerJobLogic() { + LOG.info("Auto deploy task running for project {}", project.name); - // Production ready feed versions. - List pinnedFeedVersions = deployment.retrievePinnedFeedVersions(); - Set pinnedFeedSourceIds = new HashSet<>( - pinnedFeedVersions - .stream() - .map(pinnedFeedVersion -> pinnedFeedVersion.feedSource.id) - .collect(Collectors.toList()) + // Get the most recently used server. + String latestServerId = deployment.latest().serverId; + OtpServer server = Persistence.servers.getById(latestServerId); + if (server == null) { + String message = String.format( + "Server with id %s no longer exists. Skipping deployment for project %s.", + latestServerId, + project.name ); + LOG.warn(message); + status.fail(message); + return; + } - // Iterate through each feed version for deployment. - for ( - Deployment.SummarizedFeedVersion currentDeploymentFeedVersion : previousFeedVersions - ) { - // Retrieve the latest feed version associated with the feed source of the current - // feed version set for the deployment. - FeedVersion latestFeedVersion = currentDeploymentFeedVersion.feedSource.retrieveLatest(); - // Make sure the latest feed version is not going to supersede a pinned feed version. - if (pinnedFeedSourceIds.contains(latestFeedVersion.feedSourceId)) { - continue; - } - - // Update to the latest feed version. - updatedFeedVersionIds.add(latestFeedVersion.id); - - // Throttle this auto-deployment if needed. For projects that haven't yet been auto-deployed, don't - // wait and go ahead with the auto-deployment. But if the project has been auto-deployed before and - // if the latest feed version was created before the last time the project was auto-deployed and - // there are currently-active jobs that could result in an updated feed version being created, then - // this auto deployment should be throttled. - if ( - project.lastAutoDeploy != null && - latestFeedVersion.dateCreated.before(project.lastAutoDeploy) && - currentDeploymentFeedVersion.feedSource.hasJobsInProgress() - ) { - // Another job exists that should result in the creation of a new feed version which should then - // trigger an additional AutoDeploy job. - LOG.warn( - "Feed source {} contains an active job that should result in the creation of a new feed version. Auto deployment will be skipped until that version has fully processed.", - currentDeploymentFeedVersion.feedSource.name - ); - shouldWaitForNewFeedVersions = true; - } - - // Make sure the latest feed version has no critical errors. - if (latestFeedVersion.hasCriticalErrors()) { - latestVersionsWithCriticalErrors.add(latestFeedVersion); - } + // Analyze and update feed versions in deployment. + Collection updatedFeedVersionIds = new LinkedList<>(); + List latestVersionsWithCriticalErrors = new LinkedList<>(); + List previousFeedVersions = deployment.retrieveFeedVersions(); + boolean shouldWaitForNewFeedVersions = false; + + // Production ready feed versions. + List pinnedFeedVersions = deployment.retrievePinnedFeedVersions(); + Set pinnedFeedSourceIds = new HashSet<>( + pinnedFeedVersions + .stream() + .map(pinnedFeedVersion -> pinnedFeedVersion.feedSource.id) + .collect(Collectors.toList()) + ); + + // Iterate through each feed version for deployment. + for ( + Deployment.SummarizedFeedVersion currentDeploymentFeedVersion : previousFeedVersions + ) { + // Retrieve the latest feed version associated with the feed source of the current + // feed version set for the deployment. + FeedVersion latestFeedVersion = currentDeploymentFeedVersion.feedSource.retrieveLatest(); + // Make sure the latest feed version is not going to supersede a pinned feed version. + if (pinnedFeedSourceIds.contains(latestFeedVersion.feedSourceId)) { + continue; } - // Skip auto-deployment for this project if Data Tools should wait for a job that should create a new - // feed version to complete. - if (shouldWaitForNewFeedVersions) { - status.completeSuccessfully("Auto-Deployment will wait for new feed versions to be created from jobs in-progress"); - return; - } + // Update to the latest feed version. + updatedFeedVersionIds.add(latestFeedVersion.id); - // Skip auto-deployment for this project if any of the feed versions contained critical errors. - if (latestVersionsWithCriticalErrors.size() > 0) { - StringBuilder errorMessageBuilder = new StringBuilder( - String.format("Auto deployment for project %s has %s feed(s) with critical errors:", - project.name, - latestVersionsWithCriticalErrors.size()) + // Throttle this auto-deployment if needed. For projects that haven't yet been auto-deployed, don't + // wait and go ahead with the auto-deployment. But if the project has been auto-deployed before and + // if the latest feed version was created before the last time the project was auto-deployed and + // there are currently-active jobs that could result in an updated feed version being created, then + // this auto deployment should be throttled. + if ( + project.lastAutoDeploy != null && + latestFeedVersion.dateCreated.before(project.lastAutoDeploy) && + currentDeploymentFeedVersion.feedSource.hasJobsInProgress() + ) { + // Another job exists that should result in the creation of a new feed version which should then + // trigger an additional AutoDeploy job. + LOG.warn( + "Feed source {} contains an active job that should result in the creation of a new feed version. Auto deployment will be skipped until that version has fully processed.", + currentDeploymentFeedVersion.feedSource.name ); - for (FeedVersion version : latestVersionsWithCriticalErrors) { - errorMessageBuilder.append( - String.format( - "%s (version %s), ", - version.parentFeedSource().name, - version.id - ) - ); - } - String message = errorMessageBuilder.toString(); - LOG.warn(message); - if (!project.autoDeployWithCriticalErrors) { - NotifyUsersForSubscriptionJob.createNotification( - "deployment-updated", - project.id, - message - ); - status.fail(message); - return; - } + shouldWaitForNewFeedVersions = true; } - // Add all pinned feed versions to the list of feed versions to be deployed so that they aren't lost as part - // of this update. - for (Deployment.SummarizedFeedVersion pinnedFeedVersion : pinnedFeedVersions) { - updatedFeedVersionIds.add(pinnedFeedVersion.id); + // Make sure the latest feed version has no critical errors. + if (latestFeedVersion.hasCriticalErrors()) { + latestVersionsWithCriticalErrors.add(latestFeedVersion); } + } + + // Skip auto-deployment for this project if Data Tools should wait for a job that should create a new + // feed version to complete. + if (shouldWaitForNewFeedVersions) { + status.completeSuccessfully("Auto-Deployment will wait for new feed versions to be created from jobs in-progress"); + return; + } - // Check if the updated feed versions have any difference between the previous ones. If not, and if not - // doing a regularly scheduled update with street data, then don't bother starting a deploy job. - // TODO: add logic for street data update - Set previousFeedVersionIds = new HashSet<>( - previousFeedVersions.stream().map(feedVersion -> feedVersion.id).collect(Collectors.toList()) + // Skip auto-deployment for this project if any of the feed versions contained critical errors. + if (latestVersionsWithCriticalErrors.size() > 0) { + StringBuilder errorMessageBuilder = new StringBuilder( + String.format("Auto deployment for project %s has %s feed(s) with critical errors:", + project.name, + latestVersionsWithCriticalErrors.size()) ); - if ( - !updatedFeedVersionIds.stream() - .anyMatch(feedVersionId -> !previousFeedVersionIds.contains(feedVersionId)) - ) { - LOG.info("No updated feed versions to deploy for project {}.", project.name); - status.completeSuccessfully("No updated feed versions to deploy."); - return; + for (FeedVersion version : latestVersionsWithCriticalErrors) { + errorMessageBuilder.append( + String.format( + "%s (version %s), ", + version.parentFeedSource().name, + version.id + ) + ); } - - // Queue up the deploy job. - if (JobUtils.queueDeployJob(deployment, owner, server) != null) { - LOG.info("Last auto deploy date updated for project {}.", project.name); - // Update the deployment's feed version IDs with the latest (and pinned) feed versions. - deployment.feedVersionIds = updatedFeedVersionIds; - project.lastAutoDeploy = new Date(); - Persistence.deployments.replace(deployment.id, deployment); - Persistence.projects.replace(project.id, project); - status.completeSuccessfully("Auto deploy started new deploy job."); - } else { - String message = String.format( - "Auto-deployment to %s should occur after active deployment for project %s completes.", - server.name, - project.name + String message = errorMessageBuilder.toString(); + LOG.warn(message); + if (!project.autoDeployWithCriticalErrors) { + NotifyUsersForSubscriptionJob.createNotification( + "deployment-updated", + project.id, + message ); - LOG.info(message); - status.completeSuccessfully(message); + status.fail(message); + return; } - } catch (Exception e) { - status.fail( - String.format("Could not auto-deploy project %s!", project.name), - e + } + + // Add all pinned feed versions to the list of feed versions to be deployed so that they aren't lost as part + // of this update. + for (Deployment.SummarizedFeedVersion pinnedFeedVersion : pinnedFeedVersions) { + updatedFeedVersionIds.add(pinnedFeedVersion.id); + } + + // Check if the updated feed versions have any difference between the previous ones. If not, and if not + // doing a regularly scheduled update with street data, then don't bother starting a deploy job. + // TODO: add logic for street data update + Set previousFeedVersionIds = new HashSet<>( + previousFeedVersions.stream().map(feedVersion -> feedVersion.id).collect(Collectors.toList()) + ); + if ( + !updatedFeedVersionIds.stream() + .anyMatch(feedVersionId -> !previousFeedVersionIds.contains(feedVersionId)) + ) { + LOG.info("No updated feed versions to deploy for project {}.", project.name); + status.completeSuccessfully("No updated feed versions to deploy."); + return; + } + + // Queue up the deploy job. + if (JobUtils.queueDeployJob(deployment, owner, server) != null) { + LOG.info("Last auto deploy date updated for project {}.", project.name); + // Update the deployment's feed version IDs with the latest (and pinned) feed versions. + deployment.feedVersionIds = updatedFeedVersionIds; + project.lastAutoDeploy = new Date(); + Persistence.deployments.replace(deployment.id, deployment); + Persistence.projects.replace(project.id, project); + status.completeSuccessfully("Auto deploy started new deploy job."); + } else { + String message = String.format( + "Auto-deployment to %s should occur after active deployment for project %s completes.", + server.name, + project.name ); - } finally { - lockedProjects.remove(project.id); - LOG.info("Auto deploy lock removed for project id: {}", project.id); + LOG.info(message); + status.completeSuccessfully(message); } } } diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/AutoPublishJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/AutoPublishJob.java new file mode 100644 index 000000000..d198fc381 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/AutoPublishJob.java @@ -0,0 +1,63 @@ +package com.conveyal.datatools.manager.jobs; + +import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.controllers.api.FeedVersionController; +import com.conveyal.datatools.manager.gtfsplus.GtfsPlusValidation; +import com.conveyal.datatools.manager.models.FeedSource; +import com.conveyal.datatools.manager.models.FeedVersion; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Auto publish the latest feed versions of a feed source if: + * - there are no blocking validation errors, and. + * - the feed source is not locked/already being published by another instance of {@link AutoPublishJob}. + * This class assumes that feed attributes such as autoPublish and retrievalMethod + * have been checked. + */ +public class AutoPublishJob extends MonitorableJobWithResourceLock { + public static final Logger LOG = LoggerFactory.getLogger(AutoPublishJob.class); + + /** + * Auto-publish latest feed from specific feed source. + */ + public AutoPublishJob(FeedSource feedSource, Auth0UserProfile owner) { + super( + owner, + "Auto-Publish Feed", + JobType.AUTO_PUBLISH_FEED_VERSION, + feedSource, + feedSource.name + ); + } + + @Override + protected void innerJobLogic() throws Exception { + FeedSource feedSource = super.resource; + LOG.info("Auto-publish task running for feed source {}", feedSource.name); + + // Retrieve the latest feed version associated with the feed source of the current + // feed version set for the deployment. + FeedVersion latestFeedVersion = feedSource.retrieveLatest(); + + // Validate and check for blocking issues in the feed version to deploy. + if (latestFeedVersion.hasBlockingIssuesForPublishing()) { + status.fail("Could not publish this feed version because it contains blocking errors."); + } else { + try { + GtfsPlusValidation gtfsPlusValidation = GtfsPlusValidation.validate(latestFeedVersion.id); + if (!gtfsPlusValidation.issues.isEmpty()) { + status.fail("Could not publish this feed version because it contains GTFS+ blocking errors."); + } + } catch(Exception e) { + status.fail("Could not read GTFS+ zip file", e); + } + } + + // If validation successful, just execute the feed updating process. + if (!status.error) { + FeedVersionController.publishToExternalResource(latestFeedVersion); + LOG.info("Auto-published feed source {} to external resource.", feedSource.id); + } + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/DeployJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/DeployJob.java index c21892cee..78ef55ea3 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/DeployJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/DeployJob.java @@ -56,6 +56,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; @@ -328,6 +329,82 @@ public void jobLogic () { return; } } + else if ("true".equals(System.getenv("RUN_E2E"))) { + // If running E2E tests, fire up an otp-runner graph build on the same machine. + try { + // Generate a basic otp-runner manifest + OtpRunnerManifest manifest = new OtpRunnerManifest(); + // add common settings + manifest.baseFolder = String.format("/tmp/%s/graphs", getTripPlannerString()); + manifest.baseFolderDownloads = new ArrayList<>(); + manifest.jarFile = getJarFileOnInstance(); + manifest.nonce = this.nonce; + manifest.otpRunnerLogFile = OTP_RUNNER_LOG_FILE; + manifest.otpVersion = isOtp2() + ? "2.x" + : "1.x"; + manifest.prefixLogUploadsWithInstanceId = true; + manifest.statusFileLocation = String.format("%s/%s", "/var/log", OTP_RUNNER_STATUS_FILE); + manifest.uploadOtpRunnerLogs = false; + manifest.buildGraph = true; + try { + if (deployment.feedVersionIds.size() > 0) { + // add OSM data + URL osmDownloadUrl = deployment.getUrlForOsmExtract(); + if (osmDownloadUrl != null) { + addUriAsBaseFolderDownload(manifest, osmDownloadUrl.toString()); + } + + // add GTFS data + for (String feedVersionId : deployment.feedVersionIds) { + CustomFile gtfsFile = new CustomFile(); + // OTP 2.x must have the string `gtfs` somewhere inside the filename, so prepend the filename + // with the string `gtfs-`. + gtfsFile.filename = String.format("gtfs-%s", feedVersionId); + gtfsFile.uri = S3Utils.getS3FeedUri(feedVersionId); + addCustomFileAsBaseFolderDownload(manifest, gtfsFile); + } + } + } catch (MalformedURLException e) { + status.fail("Failed to create base folder download URLs!", e); + return; + } + // The graph stays on this machine for e2e tests. + manifest.uploadGraph = false; + manifest.uploadGraphBuildLogs = false; + manifest.uploadGraphBuildReport = false; + // A new OTP instance should not be started. In E2E environments, + // there is already an OTP instance running in the background, + // and the test emulates updating the router graph in that OTP instance. + manifest.runServer = false; + + // Write manifest to temp file + // (CI directories are managed separately). + String otpRunnerManifestFile = String.format("/tmp/%s/otp-runner-manifest.json", getTripPlannerString()); + File otpManifestFile = new File(otpRunnerManifestFile); + otpManifestFile.createNewFile(); + LOG.info("E2E otp-runner empty manifest file created."); + + try ( + FileWriter fw = new FileWriter(otpManifestFile) + ) { + ObjectMapper mapper = new ObjectMapper(); + mapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY); + fw.write(mapper.writeValueAsString(manifest)); + LOG.info("E2E otp-runner manifest file written."); + } catch (JsonProcessingException e) { + status.fail("Failed to create E2E manifest for otp-runner!", e); + return; + } + + // Run otp-runner with the manifest produced earlier. + Process p = Runtime.getRuntime().exec(String.format("otp-runner %s", otpRunnerManifestFile)); + p.waitFor(); + LOG.info("otp-runner exit code: {}", p.exitValue()); + } catch (IOException | InterruptedException e) { + e.printStackTrace(); + } + } // If there are no OTP targets (i.e. we're only deploying to S3), we're done. if(otpServer.internalUrl != null) { @@ -338,16 +415,6 @@ public void jobLogic () { status.baseUrl = otpServer.publicUrl; } - // Now that the build + deployment was successful, update Pelias - if (deployment.peliasUpdate) { - // Get log upload URI from deploy job - AmazonS3URI logUploadS3URI = getS3FolderURI(); - - // Execute the pelias update job and keep track of it - PeliasUpdateJob peliasUpdateJob = new PeliasUpdateJob(owner, "Updating Custom Geocoder Database", deployment, logUploadS3URI); - addNextJob(peliasUpdateJob); - } - status.completed = true; } diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java b/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java index d1b72933f..501eb56f4 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java @@ -13,6 +13,7 @@ import com.conveyal.datatools.manager.persistence.Persistence; import com.conveyal.datatools.manager.utils.HashUtils; import com.google.common.io.ByteStreams; +import org.bson.conversions.Bson; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,6 +36,9 @@ import static com.conveyal.datatools.common.utils.Scheduler.schedulerService; import static com.mongodb.client.model.Filters.and; import static com.mongodb.client.model.Filters.eq; +import static com.mongodb.client.model.Filters.lt; +import static com.mongodb.client.model.Filters.ne; +import static com.mongodb.client.model.Filters.or; /** * This class is used to schedule an {@link UpdateFeedsTask}, which will check the specified S3 bucket (and prefix) for @@ -52,16 +56,34 @@ * it in a “failed” folder, yet there is no check by Data Tools to see if the feed landed there. */ public class FeedUpdater { + private static final String TEST_BUCKET = "test-bucket"; + private static final String TEST_COMPLETED_FOLDER = "test-completed"; + private static final Logger LOG = LoggerFactory.getLogger(FeedUpdater.class); + public static final String SENT_TO_EXTERNAL_PUBLISHER_FIELD = "sentToExternalPublisher"; + public static final String PROCESSED_BY_EXTERNAL_PUBLISHER_FIELD = "processedByExternalPublisher"; + private Map eTagForFeed; private final String feedBucket; private final String bucketFolder; - private static final Logger LOG = LoggerFactory.getLogger(FeedUpdater.class); + private final CompletedFeedRetriever completedFeedRetriever; + private List versionsToMarkAsProcessed; + private FeedUpdater(int updateFrequencySeconds, String feedBucket, String bucketFolder) { LOG.info("Setting feed update to check every {} seconds", updateFrequencySeconds); schedulerService.scheduleAtFixedRate(new UpdateFeedsTask(), 0, updateFrequencySeconds, TimeUnit.SECONDS); this.feedBucket = feedBucket; this.bucketFolder = bucketFolder; + this.completedFeedRetriever = new DefaultCompletedFeedRetriever(); + } + + /** + * Constructor used for tests. + */ + private FeedUpdater(CompletedFeedRetriever completedFeedRetriever) { + this.feedBucket = TEST_BUCKET; + this.bucketFolder = TEST_COMPLETED_FOLDER; + this.completedFeedRetriever = completedFeedRetriever; } /** @@ -72,6 +94,13 @@ public static FeedUpdater schedule(int updateFrequencySeconds, String s3Bucket, return new FeedUpdater(updateFrequencySeconds, s3Bucket, s3Prefix); } + /** + * Helper method used in tests to create a {@link FeedUpdater}. + */ + public static FeedUpdater createForTest(CompletedFeedRetriever completedFeedRetriever) { + return new FeedUpdater(completedFeedRetriever); + } + private class UpdateFeedsTask implements Runnable { public void run() { Map updatedTags; @@ -92,61 +121,66 @@ public void run() { * objects in order to keep data-tools application in sync with external processes (for example, MTC RTD). * @return map of feedIDs to eTag values */ - private Map checkForUpdatedFeeds() { + public Map checkForUpdatedFeeds() { if (eTagForFeed == null) { // If running the check for the first time, instantiate the eTag map. LOG.info("Running initial check for feeds on S3."); eTagForFeed = new HashMap<>(); } + + // The feed versions corresponding to entries in objectSummaries + // that need to be marked as processed should meet all conditions below: + // - sentToExternalPublisher is not null, + // - processedByExternalPublisher is null or before sentToExternalPublisher. + Bson query = and( + ne(SENT_TO_EXTERNAL_PUBLISHER_FIELD, null), + or( + eq(PROCESSED_BY_EXTERNAL_PUBLISHER_FIELD, null), + lt(PROCESSED_BY_EXTERNAL_PUBLISHER_FIELD, SENT_TO_EXTERNAL_PUBLISHER_FIELD) + ) + ); + versionsToMarkAsProcessed = Persistence.feedVersions.getFiltered(query) + .stream() + .map(v -> v.id) + .collect(Collectors.toList()); + LOG.debug("Checking for feeds on S3."); Map newTags = new HashMap<>(); // iterate over feeds in download_prefix folder and register to (MTC project) - ObjectListing gtfsList = null; - try { - gtfsList = S3Utils.getDefaultS3Client().listObjects(feedBucket, bucketFolder); - } catch (AmazonServiceException | CheckedAWSException e) { - LOG.error("Failed to list S3 Objects", e); + List objectSummaries = completedFeedRetriever.retrieveCompletedFeeds(); + if (objectSummaries == null) { return newTags; } - LOG.debug(eTagForFeed.toString()); - for (S3ObjectSummary objSummary : gtfsList.getObjectSummaries()) { + LOG.debug(eTagForFeed.toString()); + for (S3ObjectSummary objSummary : objectSummaries) { String eTag = objSummary.getETag(); String keyName = objSummary.getKey(); LOG.debug("{} etag = {}", keyName, eTag); - if (!eTagForFeed.containsValue(eTag)) { - // Don't add object if it is a dir - if (keyName.equals(bucketFolder)) continue; - String filename = keyName.split("/")[1]; - String feedId = filename.replace(".zip", ""); - // Skip object if the filename is null - if ("null".equals(feedId)) continue; + + // Don't add object if it is a dir + if (keyName.equals(bucketFolder)) continue; + String filename = keyName.split("/")[1]; + String feedId = filename.replace(".zip", ""); + FeedSource feedSource = getFeedSource(feedId); + if (feedSource == null) { + LOG.error("No feed source found for feed ID {}", feedId); + continue; + } + // Skip object if the filename is null + if ("null".equals(feedId)) continue; + + FeedVersion latestVersionSentForPublishing = getLatestVersionSentForPublishing(feedId, feedSource); + if (shouldMarkFeedAsProcessed(eTag, latestVersionSentForPublishing)) { try { - LOG.info("New version found for {} at s3://{}/{}. ETag = {}.", feedId, feedBucket, keyName, eTag); - FeedSource feedSource = null; - List properties = Persistence.externalFeedSourceProperties.getFiltered( - and(eq("value", feedId), eq("name", AGENCY_ID_FIELDNAME)) - ); - if (properties.size() > 1) { - StringBuilder b = new StringBuilder(); - properties.forEach(b::append); - LOG.warn("Found multiple feed sources for {}: {}", - feedId, - properties.stream().map(p -> p.feedSourceId).collect(Collectors.joining(","))); - } - for (ExternalFeedSourceProperty prop : properties) { - // FIXME: What if there are multiple props found for different feed sources. This could happen if - // multiple projects have been synced with MTC or if the ExternalFeedSourceProperty for a feed - // source is not deleted properly when the feed source is deleted. - feedSource = Persistence.feedSources.getById(prop.feedSourceId); + // Don't mark a feed version as published if previous published version is before sentToExternalPublisher. + if (!objSummary.getLastModified().before(latestVersionSentForPublishing.sentToExternalPublisher)) { + LOG.info("New version found for {} at s3://{}/{}. ETag = {}.", feedId, feedBucket, keyName, eTag); + updatePublishedFeedVersion(feedId, latestVersionSentForPublishing); + // TODO: Explore if MD5 checksum can be used to find matching feed version. + // findMatchingFeedVersion(md5, feedId, feedSource); } - if (feedSource == null) { - LOG.error("No feed source found for feed ID {}", feedId); - continue; - } - updatePublishedFeedVersion(feedId, feedSource); - // TODO: Explore if MD5 checksum can be used to find matching feed version. - // findMatchingFeedVersion(md5, feedId, feedSource); + } catch (Exception e) { LOG.warn("Could not load feed " + keyName, e); } finally { @@ -162,38 +196,86 @@ private Map checkForUpdatedFeeds() { return newTags; } + /** + * Obtains the {@link FeedSource} for the given feed id (for MTC, that's the 2-letter agency code). + */ + private FeedSource getFeedSource(String feedId) { + FeedSource feedSource = null; + List properties = Persistence.externalFeedSourceProperties.getFiltered( + and(eq("value", feedId), eq("name", AGENCY_ID_FIELDNAME)) + ); + if (properties.size() > 1) { + LOG.warn("Found multiple feed sources for {}: {}. The published status on some feed versions will be incorrect.", + feedId, + properties.stream().map(p -> p.feedSourceId).collect(Collectors.joining(","))); + } + for (ExternalFeedSourceProperty prop : properties) { + // FIXME: What if there are multiple props found for different feed sources. This could happen if + // multiple projects have been synced with MTC or if the ExternalFeedSourceProperty for a feed + // source is not deleted properly when the feed source is deleted. + feedSource = Persistence.feedSources.getById(prop.feedSourceId); + } + return feedSource; + } + + /** + * @return true if the feed with the corresponding etag should be mark as processed, false otherwise. + */ + private boolean shouldMarkFeedAsProcessed(String eTag, FeedVersion publishedVersion) { + if (eTagForFeed.containsValue(eTag)) return false; + if (publishedVersion == null) return false; + + return versionsToMarkAsProcessed.contains(publishedVersion.id); + } + /** * Update the published feed version for the feed source. * @param feedId the unique ID used by MTC to identify a feed source - * @param feedSource the feed source for which a newly published version should be registered + * @param publishedVersion the feed version to be registered */ - private void updatePublishedFeedVersion(String feedId, FeedSource feedSource) { - // Collect the feed versions for the feed source. - Collection versions = feedSource.retrieveFeedVersions(); + private void updatePublishedFeedVersion(String feedId, FeedVersion publishedVersion) { try { - // Get the latest published version (if there is one). NOTE: This is somewhat flawed because it presumes - // that the latest published version is guaranteed to be the one found in the "completed" folder, but it - // could be that more than one versions were recently "published" and the latest published version was a bad - // feed that failed processing by RTD. - Optional lastPublishedVersionCandidate = versions - .stream() - .min(Comparator.comparing(v -> v.sentToExternalPublisher, Comparator.nullsLast(Comparator.reverseOrder()))); - if (lastPublishedVersionCandidate.isPresent()) { - FeedVersion publishedVersion = lastPublishedVersionCandidate.get(); + if (publishedVersion != null) { if (publishedVersion.sentToExternalPublisher == null) { LOG.warn("Not updating published version for {} (version was never sent to external publisher)", feedId); return; } // Set published namespace to the feed version and set the processedByExternalPublisher timestamp. LOG.info("Latest published version (sent at {}) for {} is {}", publishedVersion.sentToExternalPublisher, feedId, publishedVersion.id); - Persistence.feedVersions.updateField(publishedVersion.id, "processedByExternalPublisher", new Date()); - Persistence.feedSources.updateField(feedSource.id, "publishedVersionId", publishedVersion.namespace); + Persistence.feedVersions.updateField(publishedVersion.id, PROCESSED_BY_EXTERNAL_PUBLISHER_FIELD, new Date()); + Persistence.feedSources.updateField(publishedVersion.feedSourceId, "publishedVersionId", publishedVersion.namespace); } else { - LOG.error("No published versions found for {} ({} id={})", feedId, feedSource.name, feedSource.id); + LOG.error( + "No published versions found for {} ({} id={})", + feedId, + publishedVersion.parentFeedSource().name, + publishedVersion.feedSourceId + ); } + } catch (Exception e) { + e.printStackTrace(); + LOG.error("Error encountered while updating the latest published version for {}", feedId); + } + } + + /** + * Get the latest published version (if there is one). NOTE: This is somewhat flawed because it presumes + * that the latest published version is guaranteed to be the one found in the "completed" folder, but it + * could be that more than one versions were recently "published" and the latest published version was a bad + * feed that failed processing by RTD. + */ + private static FeedVersion getLatestVersionSentForPublishing(String feedId, FeedSource feedSource) { + try { + // Collect the feed versions for the feed source. + Collection versions = feedSource.retrieveFeedVersions(); + Optional lastPublishedVersionCandidate = versions + .stream() + .min(Comparator.comparing(v -> v.sentToExternalPublisher, Comparator.nullsLast(Comparator.reverseOrder()))); + return lastPublishedVersionCandidate.orElse(null); } catch (Exception e) { e.printStackTrace(); LOG.error("Error encountered while checking for latest published version for {}", feedId); + return null; } } @@ -238,4 +320,26 @@ private FeedVersion findMatchingFeedVersion( return matchingVersion; } + /** + * Helper interface for fetching a list of feeds deemed production-complete. + */ + public interface CompletedFeedRetriever { + List retrieveCompletedFeeds(); + } + + /** + * Implements the default behavior for above interface. + */ + public class DefaultCompletedFeedRetriever implements CompletedFeedRetriever { + @Override + public List retrieveCompletedFeeds() { + try { + ObjectListing gtfsList = S3Utils.getDefaultS3Client().listObjects(feedBucket, bucketFolder); + return gtfsList.getObjectSummaries(); + } catch (CheckedAWSException e) { + LOG.error("Failed to list S3 Objects", e); + return null; + } + } + } } diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsJob.java index 53b0e8f9f..ab683b090 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsJob.java @@ -6,122 +6,66 @@ import com.conveyal.datatools.manager.DataManager; import com.conveyal.datatools.manager.auth.Auth0UserProfile; import com.conveyal.datatools.manager.gtfsplus.tables.GtfsPlusTable; -import com.conveyal.datatools.manager.models.FeedRetrievalMethod; +import com.conveyal.datatools.manager.jobs.feedmerge.FeedMergeContext; +import com.conveyal.datatools.manager.jobs.feedmerge.FeedToMerge; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsResult; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeLineContext; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeStrategy; import com.conveyal.datatools.manager.models.FeedSource; import com.conveyal.datatools.manager.models.FeedVersion; import com.conveyal.datatools.manager.models.Project; import com.conveyal.datatools.manager.persistence.Persistence; -import com.conveyal.gtfs.error.NewGTFSError; -import com.conveyal.gtfs.error.NewGTFSErrorType; -import com.conveyal.gtfs.loader.Field; -import com.conveyal.gtfs.loader.ReferenceTracker; +import com.conveyal.datatools.manager.utils.ErrorUtils; +import com.conveyal.gtfs.loader.Feed; import com.conveyal.gtfs.loader.Table; -import com.csvreader.CsvReader; +import com.conveyal.gtfs.model.StopTime; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; +import org.bson.codecs.pojo.annotations.BsonIgnore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.supercsv.io.CsvListWriter; -import org.supercsv.prefs.CsvPreference; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStreamWriter; -import java.time.LocalDate; -import java.time.temporal.ChronoUnit; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Objects; import java.util.Set; -import java.util.UUID; import java.util.stream.Collectors; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; -import static com.conveyal.datatools.manager.jobs.MergeFeedsType.SERVICE_PERIOD; -import static com.conveyal.datatools.manager.jobs.MergeFeedsType.REGIONAL; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.REGIONAL; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeStrategy.CHECK_STOP_TIMES; import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.REGIONAL_MERGE; -import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.SERVICE_PERIOD_MERGE; -import static com.conveyal.datatools.manager.utils.StringUtils.getCleanName; -import static com.conveyal.gtfs.loader.DateField.GTFS_DATE_FORMATTER; -import static com.conveyal.gtfs.loader.Field.getFieldIndex; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.*; /** * This job handles merging two or more feed versions according to logic specific to the specified merge type. - * The current merge types handled here are: + * The merge types handled here are: * - {@link MergeFeedsType#REGIONAL}: this is essentially a "dumb" merge. For each feed version, each primary key is * scoped so that there is no possibility that it will conflict with other IDs * found in any other feed version. Note: There is absolutely no attempt to merge * entities based on either expected shared IDs or entity location (e.g., stop * coordinates). - * - {@link MergeFeedsType#SERVICE_PERIOD}: this strategy is defined in detail at https://github.com/conveyal/datatools-server/issues/185, - * but in essence, this strategy attempts to merge a current and future feed into + * - {@link MergeFeedsType#SERVICE_PERIOD}: + * this strategy is defined in detail at https://github.com/conveyal/datatools-server/issues/185, + * but in essence, this strategy attempts to merge an active and future feed into * a combined file. For certain entities (specifically stops and routes) it uses * alternate fields as primary keys (stop_code and route_short_name) if they are * available. There is some complexity related to this in {@link #constructMergedTable(Table, List, ZipOutputStream)}. * Another defining characteristic is to prefer entities defined in the "future" - * file if there are matching entities in the current file. + * file if there are matching entities in the active file. * Future merge strategies could be added here. For example, some potential customers have mentioned a desire to - * prefer entities from the "current" version, so that entities edited in Data Tools would override the values found + * prefer entities from the active version, so that entities edited in Data Tools would override the values found * in the "future" file, which may have limited data attributes due to being exported from scheduling software with * limited GTFS support. - * - * Reproduced from https://github.com/conveyal/datatools-server/issues/185 on 2019/04/23: - * - * 1. When a new GTFS+ feed is loaded in TDM, check as part of the loading and validation process if - * the dataset is for a future date. (If all services start in the future, consider the dataset - * to be for the future). - * 2. If it is a future dataset, automatically notify the user that the feed needs to be merged with - * most recent active version or a selected one in order to further process the feed. - * 3. Use the chosen version to merge the future feed. The merging process needs to be efficient so - * that the user doesn’t need to wait more than a tolerable time. - * 4. The merge process shall compare the current and future datasets, validate the following rules - * and generate the Merge Validation Report: - * i. Merging will be based on route_short_name in the current and future datasets. All matching - * route_short_names between the datasets shall be considered same route. Any route_short_name - * in active data not present in the future will be appended to the future routes file. - * ii. Future feed_info.txt file should get priority over active feed file when difference is - * identified. - * iii. When difference is found in agency.txt file between active and future feeds, the future - * agency.txt file data should be used. Possible issue with missing agency_id referenced by routes - * iv. When stop_code is included, stop merging will be based on that. If stop_code is not - * included, it will be based on stop_id. All stops in future data will be carried forward and - * any stops found in active data that are not in the future data shall be appended. If one - * of the feed is missing stop_code, merge fails with a notification to the user with - * suggestion that the feed with missing stop_code must be fixed with stop_code. - * v. If any service_id in the active feed matches with the future feed, it should be modified - * and all associated trip records must also be changed with the modified service_id. - * If a service_id from the active calendar has both the start_date and end_date in the - * future, the service shall not be appended to the merged file. Records in trips, - * calendar_dates, and calendar_attributes referencing this service_id shall also be - * removed/ignored. Stop_time records for the ignored trips shall also be removed. - * If a service_id from the active calendar has only the end_date in the future, the end_date - * shall be set to one day prior to the earliest start_date in future dataset before appending - * the calendar record to the merged file. - * trip_ids between active and future datasets must not match. If any trip_id is found to be - * matching, the merge should fail with appropriate notification to user with the cause of the - * failure. Notification should include all matched trip_ids. - * vi. New shape_ids in the future datasets should be appended in the merged feed. - * vii. Merging fare_attributes will be based on fare_id in the current and future datasets. All - * matching fare_ids between the datasets shall be considered same fare. Any fare_id in active - * data not present in the future will be appended to the future fare_attributes file. - * viii. All fare rules from the future dataset will be included. Any identical fare rules from - * the current dataset will be discarded. Any fare rules unique to the current dataset will be - * appended to the future file. - * ix. All transfers.txt entries with unique stop pairs (from - to) from both the future and - * current datasets will be included in the merged file. Entries with duplicate stop pairs from - * the current dataset will be discarded. - * x. All GTFS+ files should be merged based on how the associated base GTFS file is merged. For - * example, directions for routes that are not in the future routes.txt file should be appended - * to the future directions.txt file in the merged feed. */ public class MergeFeedsJob extends FeedSourceJob { @@ -129,32 +73,33 @@ public class MergeFeedsJob extends FeedSourceJob { public static final ObjectMapper mapper = new ObjectMapper(); private final Set feedVersions; private final FeedSource feedSource; - private final ReferenceTracker referenceTracker = new ReferenceTracker(); - public MergeFeedsResult mergeFeedsResult; + public final MergeFeedsResult mergeFeedsResult; private final String filename; public final String projectId; public final MergeFeedsType mergeType; private File mergedTempFile = null; - /** - * If {@link MergeFeedsJob} storeNewVersion variable is true, a new version will be created from the merged GTFS - * dataset. Otherwise, this will be null throughout the life of the job. - */ final FeedVersion mergedVersion; - public boolean failOnDuplicateTripId = true; + @JsonIgnore @BsonIgnore + public Set sharedTripIdsWithInconsistentSignature = new HashSet<>(); + @JsonIgnore @BsonIgnore + public Set sharedTripIdsWithConsistentSignature = new HashSet<>(); + @JsonIgnore @BsonIgnore + public Set serviceIdsToCloneRenameAndExtend = new HashSet<>(); + @JsonIgnore @BsonIgnore + public Set serviceIdsFromActiveFeedToTerminateEarly = new HashSet<>(); - public MergeFeedsJob(Auth0UserProfile owner, Set feedVersions, String file, MergeFeedsType mergeType) { - this(owner, feedVersions, file, mergeType, true); - } + private List sharedConsistentTripAndCalendarIds = new ArrayList<>(); + + // Variables used for a service period merge. + private FeedMergeContext feedMergeContext; /** * @param owner user ID that initiated job * @param feedVersions set of feed versions to merge * @param file resulting merge filename (without .zip) * @param mergeType the type of merge to perform {@link MergeFeedsType} - * @param storeNewVersion whether to store merged feed as new version */ - public MergeFeedsJob(Auth0UserProfile owner, Set feedVersions, String file, - MergeFeedsType mergeType, boolean storeNewVersion) { + public MergeFeedsJob(Auth0UserProfile owner, Set feedVersions, String file, MergeFeedsType mergeType) { super(owner, mergeType.equals(REGIONAL) ? "Merging project feeds" : "Merging feed versions", JobType.MERGE_FEED_VERSIONS); this.feedVersions = feedVersions; @@ -169,7 +114,7 @@ public MergeFeedsJob(Auth0UserProfile owner, Set feedVersions, Stri // Grab parent feed source depending on merge type. FeedSource regionalFeedSource = null; // If storing a regional merge as a new version, find the feed source designated by the project. - if (mergeType.equals(REGIONAL) && storeNewVersion) { + if (mergeType.equals(REGIONAL)) { regionalFeedSource = Persistence.feedSources.getById(project.regionalFeedSourceId); // Create new feed source if this is the first regional merge. if (regionalFeedSource == null) { @@ -186,26 +131,33 @@ public MergeFeedsJob(Auth0UserProfile owner, Set feedVersions, Stri this.feedSource = mergeType.equals(REGIONAL) ? regionalFeedSource : feedVersions.iterator().next().parentFeedSource(); - FeedRetrievalMethod retrievalMethod = mergeType.equals(REGIONAL) - ? REGIONAL_MERGE - : SERVICE_PERIOD_MERGE; + // Assuming job is successful, mergedVersion will contain the resulting feed version. // Merged version will be null if the new version should not be stored. - this.mergedVersion = storeNewVersion ? new FeedVersion(this.feedSource, retrievalMethod) : null; + this.mergedVersion = getMergedVersion(this, true); this.mergeFeedsResult = new MergeFeedsResult(mergeType); } + @BsonIgnore @JsonIgnore + public Set getFeedVersions() { + return this.feedVersions; + } + /** * The final stage handles clean up (deleting temp file) and adding the next job to process the * new merged version (assuming the merge did not fail). */ + @Override public void jobFinished() { // Delete temp file to ensure it does not cause storage bloat. Note: merged file has already been stored // permanently. - if (!mergedTempFile.delete()) { - // FIXME: send to bugsnag? - LOG.error( + try { + Files.delete(mergedTempFile.toPath()); + } catch (IOException e) { + logAndReportToBugsnag( + e, "Merged feed file {} not deleted. This may contribute to storage space shortages.", - mergedTempFile.getAbsolutePath()); + mergedTempFile.getAbsolutePath() + ); } } @@ -213,65 +165,78 @@ public void jobFinished() { * Primary job logic handles collecting and sorting versions, creating a merged table for all versions, and writing * the resulting zip file to storage. */ - @Override public void jobLogic() throws IOException, CheckedAWSException { + @Override + public void jobLogic() { // Create temp zip file to add merged feed content to. - mergedTempFile = File.createTempFile(filename, null); - mergedTempFile.deleteOnExit(); - // Create the zipfile. - ZipOutputStream out = new ZipOutputStream(new FileOutputStream(mergedTempFile)); - LOG.info("Created merge file: " + mergedTempFile.getAbsolutePath()); - List feedsToMerge = collectAndSortFeeds(feedVersions); + try { + mergedTempFile = File.createTempFile(filename, null); + } catch (IOException e) { - // Determine which tables to merge (only merge GTFS+ tables for MTC extension). - final List tablesToMerge = - Arrays.stream(Table.tablesInOrder) - .filter(Table::isSpecTable) - .collect(Collectors.toList()); - if (DataManager.isExtensionEnabled("mtc")) { - // Merge GTFS+ tables only if MTC extension is enabled. We should do this for both - // regional and MTC merge strategies. - tablesToMerge.addAll(Arrays.asList(GtfsPlusTable.tables)); + String message = "Error creating temp file for feed merge."; + logAndReportToBugsnag(e, message); + status.fail(message, e); } - int numberOfTables = tablesToMerge.size(); - // Loop over GTFS tables and merge each feed one table at a time. - for (int i = 0; i < numberOfTables; i++) { - Table table = tablesToMerge.get(i); - if (mergeType.equals(REGIONAL) && table.name.equals(Table.FEED_INFO.name)) { - // It does not make sense to include the feed_info table when performing a - // regional feed merge because this file is intended to contain data specific to - // a single agency feed. - // TODO: Perhaps future work can generate a special feed_info file for the merged - // file. - LOG.warn("Skipping feed_info table for regional merge."); - continue; + + // Create the zipfile with try with resources so that it is always closed. + try (ZipOutputStream out = new ZipOutputStream(new FileOutputStream(mergedTempFile))) { + LOG.info("Created merge file: {}", mergedTempFile.getAbsolutePath()); + feedMergeContext = new FeedMergeContext(feedVersions, owner); + + // Determine which tables to merge (only merge GTFS+ tables for MTC extension). + final List
tablesToMerge = getTablesToMerge(); + int numberOfTables = tablesToMerge.size(); + + // Before initiating the merge process, get the merge strategy to use, which runs some pre-processing to + // check for id conflicts for certain tables (e.g., trips and calendars). + if (mergeType.equals(SERVICE_PERIOD)) { + determineMergeStrategy(); + + // Failure condition "if a single trip signature does not match the merge process shall stop with the following + // error message along with matching trip_ids with differing trip signatures." + Set tripIdsWithInconsistentSignature = getSharedTripIdsWithInconsistentSignature(); + if (!tripIdsWithInconsistentSignature.isEmpty()) { + mergeFeedsResult.tripIdsToCheck.addAll(tripIdsWithInconsistentSignature); + failMergeJob( + "Trips in the new feed have differing makeup from matching trips in active feed. " + + "If a trip characteristic has changed, a new trip_id must be assigned." + ); + return; + } } - if (table.name.equals(Table.PATTERNS.name) || table.name.equals(Table.PATTERN_STOP.name)) { - LOG.warn("Skipping editor-only table {}.", table.name); - continue; + + // Loop over GTFS tables and merge each feed one table at a time. + for (int i = 0; i < numberOfTables; i++) { + Table table = tablesToMerge.get(i); + if (shouldSkipTable(table.name)) continue; + double percentComplete = Math.round((double) i / numberOfTables * 10000d) / 100d; + status.update("Merging " + table.name, percentComplete); + // Perform the merge. + LOG.info("Writing {} to merged feed", table.name); + int mergedLineNumber = constructMergedTable(table, feedMergeContext.feedsToMerge, out); + if (mergedLineNumber == 0) { + LOG.warn("Skipping {} table. No entries found in zip files.", table.name); + } else if (mergedLineNumber == -1) { + LOG.error("Merge {} table failed!", table.name); + } } - double percentComplete = Math.round((double) i / numberOfTables * 10000d) / 100d; - status.update("Merging " + table.name, percentComplete); - // Perform the merge. - LOG.info("Writing {} to merged feed", table.name); - int mergedLineNumber = constructMergedTable(table, feedsToMerge, out); - if (mergedLineNumber == 0) { - LOG.warn("Skipping {} table. No entries found in zip files.", table.name); - } else if (mergedLineNumber == -1) { - LOG.error("Merge {} table failed!", table.name); + } catch (IOException e) { + String message = "Error creating output stream for feed merge."; + logAndReportToBugsnag(e, message); + status.fail(message, e); + } finally { + try { + feedMergeContext.close(); + } catch (IOException e) { + logAndReportToBugsnag(e, "Error closing FeedMergeContext object"); } } - // Close output stream for zip file. - out.close(); - if (mergeFeedsResult.failed) { - // Fail job if the merge result indicates something went wrong. - status.fail("Merging feed versions failed."); - } else { + if (!mergeFeedsResult.failed) { // Store feed locally and (if applicable) upload regional feed to S3. storeMergedFeed(); status.completeSuccessfully("Merged feed created successfully."); } LOG.info("Feed merge is complete."); - if (mergedVersion != null && !status.error && !mergeFeedsResult.failed) { + if (shouldLoadAsNewFeed()) { mergedVersion.inputVersions = feedVersions.stream().map(FeedVersion::retrieveId).collect(Collectors.toSet()); // Handle the processing of the new version when storing new version (note: s3 upload is handled within this job). // We must add this job in jobLogic (rather than jobFinished) because jobFinished is called after this job's @@ -281,38 +246,77 @@ public void jobFinished() { } /** - * Collect zipFiles for each feed version before merging tables. - * Note: feed versions are sorted by first calendar date so that future dataset is iterated over first. This is - * required for the MTC merge strategy which prefers entities from the future dataset over past entities. + * Obtains trip ids whose entries in the stop_times table differ between the active and future feed. */ - private List collectAndSortFeeds(Set feedVersions) { - return feedVersions.stream().map(version -> { - try { - return new FeedToMerge(version); - } catch (Exception e) { - LOG.error("Could not create zip file for version: {}", version.version); - return null; - } - }).filter(Objects::nonNull).filter(entry -> entry.version.validationResult != null - && entry.version.validationResult.firstCalendarDate != null) - // MTC-specific sort mentioned in above comment. - // TODO: If another merge strategy requires a different sort order, a merge type check should be added. - .sorted(Comparator.comparing(entry -> entry.version.validationResult.firstCalendarDate, - Comparator.reverseOrder())).collect(Collectors.toList()); + private Set getSharedTripIdsWithInconsistentSignature() { + return sharedTripIdsWithInconsistentSignature; + } + + private List
getTablesToMerge() { + List
tablesToMerge = Arrays.stream(Table.tablesInOrder) + .filter(Table::isSpecTable) + .collect(Collectors.toList()); + if (DataManager.isExtensionEnabled("mtc")) { + // Merge GTFS+ tables only if MTC extension is enabled. We should do this for both + // regional and MTC merge strategies. + tablesToMerge.addAll(Arrays.asList(GtfsPlusTable.tables)); + } + return tablesToMerge; + } + + /** + * Check if the table should be skipped in the merged output. + */ + private boolean shouldSkipTable(String tableName) { + if (mergeType.equals(REGIONAL) && tableName.equals(Table.FEED_INFO.name)) { + // It does not make sense to include the feed_info table when performing a + // regional feed merge because this file is intended to contain data specific to + // a single agency feed. + // TODO: Perhaps future work can generate a special feed_info file for the merged + // file. + LOG.warn("Skipping feed_info table for regional merge."); + return true; + } + if (tableName.equals(Table.PATTERNS.name) || tableName.equals(Table.PATTERN_STOP.name)) { + LOG.warn("Skipping editor-only table {}.", tableName); + return true; + } + return false; + } + + /** + * Determines whether {@link ProcessSingleFeedJob} should be run as a follow on task at the completion of the merge. + * The merge must have no errors and the mergedVersion must not be null (i.e., we need somewhere to put the new + * version). + */ + private boolean shouldLoadAsNewFeed() { + return mergedVersion != null && !status.error && !mergeFeedsResult.failed; + } + + /** + * Handle updating {@link MergeFeedsResult} and the overall job status when a failure condition is triggered while + * merging feeds. + */ + public void failMergeJob(String failureMessage) { + LOG.error(failureMessage); + mergeFeedsResult.failed = true; + mergeFeedsResult.errorCount++; + mergeFeedsResult.failureReasons.add(failureMessage); + // Use generic message for overall job status. + status.fail("Merging feed versions failed."); } /** * Handles writing the GTFS zip file to disk. For REGIONAL merges, this will end up in a project subdirectory on s3. * Otherwise, it will write to a new version. */ - private void storeMergedFeed() throws IOException, CheckedAWSException { + private void storeMergedFeed() { if (mergedVersion != null) { // Store the zip file for the merged feed version. try { mergedVersion.newGtfsFile(new FileInputStream(mergedTempFile)); } catch (IOException e) { - LOG.error("Could not store merged feed for new version", e); - throw e; + logAndReportToBugsnag(e, "Could not store merged feed for new version"); } } // Write the new latest regional merge file to s3://$BUCKET/project/$PROJECT_ID.zip @@ -321,15 +325,21 @@ private void storeMergedFeed() throws IOException, CheckedAWSException { // Store the project merged zip locally or on s3 if (DataManager.useS3) { String s3Key = String.join("/", "project", filename); - S3Utils.getDefaultS3Client().putObject(S3Utils.DEFAULT_BUCKET, s3Key, mergedTempFile); + try { + S3Utils.getDefaultS3Client().putObject(S3Utils.DEFAULT_BUCKET, s3Key, mergedTempFile); + } catch (CheckedAWSException e) { + String message = "Could not upload store merged feed for new version"; + logAndReportToBugsnag(e, message); + status.fail(message, e); + } LOG.info("Storing merged project feed at {}", S3Utils.getDefaultBucketUriForKey(s3Key)); } else { try { - FeedVersion.feedStore - .newFeed(filename, new FileInputStream(mergedTempFile), null); + FeedVersion.feedStore.newFeed(filename, new FileInputStream(mergedTempFile), null); } catch (IOException e) { - LOG.error("Could not store feed for project " + filename, e); - throw e; + String message = "Could not store feed for project " + filename; + logAndReportToBugsnag(e, message); + status.fail(message, e); } } } @@ -343,718 +353,152 @@ private void storeMergedFeed() throws IOException, CheckedAWSException { * @param out output stream to write table into * @return number of lines in merged table */ - private int constructMergedTable(Table table, List feedsToMerge, - ZipOutputStream out) throws IOException { - // CSV writer used to write to zip file. - CsvListWriter writer = new CsvListWriter(new OutputStreamWriter(out), CsvPreference.STANDARD_PREFERENCE); - String keyField = table.getKeyFieldName(); - String orderField = table.getOrderFieldName(); - if (mergeType.equals(SERVICE_PERIOD) && DataManager.isExtensionEnabled("mtc")) { - // MTC requires that the stop and route records be merged based on different key fields. - switch (table.name) { - case "stops": - keyField = "stop_code"; - break; - case "routes": - keyField = "route_short_name"; - break; - default: - // Otherwise, use the standard key field (see keyField declaration. - break; - } - } - // Set up objects for tracking the rows encountered - Map rowValuesForStopOrRouteId = new HashMap<>(); - Set rowStrings = new HashSet<>(); - // Track shape_ids found in future feed in order to check for conflicts with active feed (MTC only). - Set shapeIdsInFutureFeed = new HashSet<>(); - int mergedLineNumber = 0; - // Get the spec fields to export - List specFields = table.specFields(); - boolean stopCodeMissingFromFirstFeed = false; + private int constructMergedTable(Table table, List feedsToMerge, ZipOutputStream out) { + MergeLineContext ctx = null; try { - // Get shared fields between all feeds being merged. This is used to filter the spec fields so that only - // fields found in the collection of feeds are included in the merged table. - Set sharedFields = getSharedFields(feedsToMerge, table); - // Initialize future feed's first date to the first calendar date from the validation result. - // This is equivalent to either the earliest date of service defined for a calendar_date record or the - // earliest start_date value for a calendars.txt record. For MTC, however, they require that GTFS - // providers use calendars.txt entries and prefer that this value (which is used to determine cutoff - // dates for the active feed when merging with the future) be strictly assigned the earliest - // calendar#start_date (unless that table for some reason does not exist). - LocalDate futureFeedFirstDate = feedsToMerge.get(0).version.validationResult.firstCalendarDate; - LocalDate futureFirstCalendarStartDate = LocalDate.MAX; + ctx = MergeLineContext.create(this, table, out); + // Iterate over each zip file. For service period merge, the first feed is the future GTFS. for (int feedIndex = 0; feedIndex < feedsToMerge.size(); feedIndex++) { - boolean keyFieldMissing = false; - // Use for a new agency ID for use if the feed does not contain one. Initialize to - // null. If the value becomes non-null, the agency_id is missing and needs to be - // replaced with the generated value stored in this variable. - String newAgencyId = null; - mergeFeedsResult.feedCount++; - FeedToMerge feed = feedsToMerge.get(feedIndex); - FeedVersion version = feed.version; - FeedSource feedSource = version.parentFeedSource(); - // Generate ID prefix to scope GTFS identifiers to avoid conflicts. - String idScope = getCleanName(feedSource.name) + version.version; - CsvReader csvReader = table.getCsvReader(feed.zipFile, null); - // If csv reader is null, the table was not found in the zip file. There is no need - // to handle merging this table for the current zip file. - if (csvReader == null) { - LOG.warn("Table {} not found in the zip file for {}{}", table.name, - feedSource.name, version.version); - continue; - } - LOG.info("Adding {} table for {}{}", table.name, feedSource.name, version.version); - - Field[] fieldsFoundInZip = - table.getFieldsFromFieldHeaders(csvReader.getHeaders(), null); - List fieldsFoundList = Arrays.asList(fieldsFoundInZip); - // Determine the index of the key field for this version's table. - int keyFieldIndex = getFieldIndex(fieldsFoundInZip, keyField); - if (keyFieldIndex == -1) { - LOG.error("No {} field exists for {} table (feed={})", keyField, table.name, - feed.version.id); - keyFieldMissing = true; - // If there is no agency_id for agency table, create one and ensure that - // route#agency_id gets set. + ctx.startNewFeed(feedIndex); + if (ctx.skipFile) continue; + LOG.info("Adding {} table for {}{}", table.name, ctx.feedSource.name, ctx.version.version); + // Iterate over the rows of the table and write them to the merged output table. If an error was + // encountered, return -1 to fail the merge job immediately. + if (!ctx.iterateOverRows()) { + return -1; } - int lineNumber = 0; - // Iterate over rows in table, writing them to the out file. - while (csvReader.readRecord()) { - String keyValue = csvReader.get(keyFieldIndex); - if (feedIndex > 0 && mergeType.equals(SERVICE_PERIOD)) { - // Always prefer the "future" file for the feed_info table, which means - // we can skip any iterations following the first one. If merging the agency - // table, we should only skip the following feeds if performing an MTC merge - // because that logic assumes the two feeds share the same agency (or - // agencies). NOTE: feed_info file is skipped by default (outside of this - // method) for a regional merge), which is why this block is exclusively - // for an MTC merge. Also, this statement may print multiple log - // statements, but it is deliberately nested in the csv while block in - // order to detect agency_id mismatches and fail the merge if found. - if (table.name.equals("feed_info")) { - LOG.warn("Skipping {} file for feed {}/{} (future file preferred)", - table.name, feedIndex, feedsToMerge.size()); - continue; - } else if (table.name.equals("agency")) { - // The second feed's agency table must contain the same agency_id - // value as the first feed. - String agencyId = String.join(":", keyField, keyValue); - if (!"".equals(keyValue) && !referenceTracker.transitIds.contains(agencyId)) { - String otherAgencyId = referenceTracker.transitIds.stream() - .filter(transitId -> transitId.startsWith("agency_id")) - .findAny() - .orElse(null); - String message = String.format( - "MTC merge detected mismatching agency_id values between two " - + "feeds (%s and %s). Failing merge operation.", - agencyId, - otherAgencyId - ); - LOG.error(message); - mergeFeedsResult.failed = true; - mergeFeedsResult.failureReasons.add(message); - return -1; - } - LOG.warn("Skipping {} file for feed {}/{} (future file preferred)", - table.name, feedIndex, feedsToMerge.size()); - continue; - } else if (table.name.equals("calendar_dates")) { - if ( - futureFirstCalendarStartDate.isBefore(LocalDate.MAX) && - futureFeedFirstDate.isBefore(futureFirstCalendarStartDate) - ) { - // If the future feed's first date is before the feed's first calendar start date, - // override the future feed first date with the calendar start date for use when checking - // MTC calendar_dates and calendar records for modification/exclusion. - futureFeedFirstDate = futureFirstCalendarStartDate; - } - } - } - // Check certain initial conditions on the first line of the file. - if (lineNumber == 0) { - if (table.name.equals(Table.AGENCY.name) && (keyFieldMissing || keyValue.equals(""))) { - // agency_id is optional if only one agency is present, but that will - // cause issues for the feed merge, so we need to insert an agency_id - // for the single entry. - newAgencyId = UUID.randomUUID().toString(); - if (keyFieldMissing) { - // Only add agency_id field if it is missing in table. - List fieldsList = new ArrayList<>(Arrays.asList(fieldsFoundInZip)); - fieldsList.add(Table.AGENCY.fields[0]); - fieldsFoundInZip = fieldsList.toArray(fieldsFoundInZip); - sharedFields.add(Table.AGENCY.fields[0]); - } - fieldsFoundList = Arrays.asList(fieldsFoundInZip); - } - if (mergeType.equals(SERVICE_PERIOD) && table.name.equals("stops")) { - if (lineNumber == 0) { - // Before reading any lines in stops.txt, first determine whether all records contain - // properly filled stop_codes. The rules governing this logic are as follows: - // 1. Stops with location_type greater than 0 (i.e., anything but 0 or empty) are permitted - // to have empty stop_codes (even if there are other stops in the feed that have - // stop_code values). This is because these location_types represent special entries - // that are either stations, entrances/exits, or generic nodes (e.g., for - // pathways.txt). - // 2. For regular stops (location_type = 0 or empty), all or none of the stops must - // contain stop_codes. Otherwise, the merge feeds job will be failed. - int stopsMissingStopCodeCount = 0; - int stopsCount = 0; - int specialStopsCount = 0; - int locationTypeIndex = getFieldIndex(fieldsFoundInZip, "location_type"); - int stopCodeIndex = getFieldIndex(fieldsFoundInZip, "stop_code"); - // Get special stops reader to iterate over every stop and determine if stop_code values - // are present. - CsvReader stopsReader = table.getCsvReader(feed.zipFile, null); - while (stopsReader.readRecord()) { - stopsCount++; - String locationType = stopsReader.get(locationTypeIndex); - // Special stop records (i.e., a station, entrance, or anything with - // location_type > 0) do not need to specify stop_code. Other stops should. - boolean isSpecialStop = !"".equals(locationType) && !"0".equals(locationType); - String stopCode = stopsReader.get(stopCodeIndex); - boolean stopCodeIsMissing = "".equals(stopCode); - if (isSpecialStop) specialStopsCount++; - else if (stopCodeIsMissing) stopsMissingStopCodeCount++; - } - LOG.info("total stops: {}", stopsCount); - LOG.info("stops missing stop_code: {}", stopsMissingStopCodeCount); - if (stopsMissingStopCodeCount + specialStopsCount == stopsCount) { - // If all stops are missing stop_code (taking into account the special stops that do - // not require stop_code), we simply default to merging on stop_id. - LOG.warn( - "stop_code is not present in file {}/{}. Reverting to stop_id", - feedIndex + 1, feedsToMerge.size()); - // If the key value for stop_code is not present, revert to stop_id. - keyField = table.getKeyFieldName(); - keyFieldIndex = table.getKeyFieldIndex(fieldsFoundInZip); - keyValue = csvReader.get(keyFieldIndex); - // When all stops missing stop_code for the first feed, there's nothing to do (i.e., - // no failure condition has been triggered yet). Just indicate this in the flag and - // proceed with the merge. - if (feedIndex == 0) stopCodeMissingFromFirstFeed = true; - // However... if the second feed was missing stop_codes and the first feed was not, - // fail the merge job. - if (feedIndex == 1 && !stopCodeMissingFromFirstFeed) { - mergeFeedsResult.failed = true; - mergeFeedsResult.errorCount++; - mergeFeedsResult.failureReasons.add( - stopCodeFailureMessage(stopsMissingStopCodeCount, stopsCount, specialStopsCount) - ); - } - } else if (stopsMissingStopCodeCount > 0) { - // If some, but not all, stops are missing stop_code, the merge feeds job must fail. - mergeFeedsResult.failed = true; - mergeFeedsResult.errorCount++; - mergeFeedsResult.failureReasons.add( - stopCodeFailureMessage(stopsMissingStopCodeCount, stopsCount, specialStopsCount) - ); - } - } - } - } - // Filter the spec fields on the set of shared fields found in all feeds to be merged. - List sharedSpecFields = specFields.stream() - .filter(field -> containsField(sharedFields, field.name)) - .collect(Collectors.toList()); - Field[] sharedSpecFieldsArray = sharedSpecFields.toArray(new Field[0]); - boolean skipRecord = false; - String[] rowValues = new String[sharedSpecFields.size()]; - String[] values = csvReader.getValues(); - if (values.length == 1) { - LOG.warn("Found blank line. Skipping..."); - continue; - } - // Piece together the row to write, which should look practically identical to the original - // row except for the identifiers receiving a prefix to avoid ID conflicts. - for (int specFieldIndex = 0; specFieldIndex < sharedSpecFields.size(); specFieldIndex++) { - // There is nothing to do in this loop if it has already been determined that the record should - // be skipped. - if (skipRecord) continue; - Field field = sharedSpecFields.get(specFieldIndex); - // Get index of field from GTFS spec as it appears in feed - int index = fieldsFoundList.indexOf(field); - String val = csvReader.get(index); - // Default value to write is unchanged from value found in csv (i.e. val). Note: if looking to - // modify the value that is written in the merged file, you must update valueToWrite (e.g., - // updating the current feed's end_date or accounting for cases where IDs conflict). - String valueToWrite = val; - // Handle filling in agency_id if missing when merging regional feeds. - if (newAgencyId != null && field.name.equals("agency_id") && mergeType - .equals(REGIONAL)) { - if (val.equals("") && table.name.equals("agency") && lineNumber > 0) { - // If there is no agency_id value for a second (or greater) agency - // record, fail the merge feed job. - String message = String.format( - "Feed %s has multiple agency records but no agency_id values.", - feed.version.id); - mergeFeedsResult.failed = true; - mergeFeedsResult.failureReasons.add(message); - LOG.error(message); - return -1; - } - LOG.info("Updating {}#agency_id to (auto-generated) {} for ID {}", - table.name, newAgencyId, keyValue); - val = newAgencyId; - } - // Determine if field is a GTFS identifier. - boolean isKeyField = - field.isForeignReference() || keyField.equals(field.name); - if (this.mergeType.equals(REGIONAL) && isKeyField && !val.isEmpty()) { - // For regional merge, if field is a GTFS identifier (e.g., route_id, - // stop_id, etc.), add scoped prefix. - valueToWrite = String.join(":", idScope, val); - } - // Only need to check for merge conflicts if using MTC merge type because - // the regional merge type scopes all identifiers by default. Also, the - // reference tracker will get far too large if we attempt to use it to - // track references for a large number of feeds (e.g., every feed in New - // York State). - if (mergeType.equals(SERVICE_PERIOD)) { - Set idErrors; - // If analyzing the second feed (non-future feed), the service_id always gets feed scoped. - // See https://github.com/ibi-group/datatools-server/issues/244 - if (feedIndex == 1 && field.name.equals("service_id")) { - valueToWrite = String.join(":", idScope, val); - mergeFeedsResult.remappedIds.put( - getTableScopedValue(table, idScope, val), - valueToWrite - ); - idErrors = referenceTracker - .checkReferencesAndUniqueness(keyValue, lineNumber, field, valueToWrite, - table, keyField, orderField); - } else { - idErrors = referenceTracker - .checkReferencesAndUniqueness(keyValue, lineNumber, field, val, - table, keyField, orderField); - } - - // Store values for key fields that have been encountered. - // TODO Consider using Strategy Pattern https://en.wikipedia.org/wiki/Strategy_pattern - // instead of a switch statement. - switch (table.name) { - case "calendar": - // If any service_id in the active feed matches with the future - // feed, it should be modified and all associated trip records - // must also be changed with the modified service_id. - // TODO How can we check that calendar_dates entries are - // duplicates? I think we would need to consider the - // service_id:exception_type:date as the unique key and include any - // all entries as long as they are unique on this key. - if (hasDuplicateError(idErrors)) { - String key = getTableScopedValue(table, idScope, val); - // Modify service_id and ensure that referencing trips - // have service_id updated. - valueToWrite = String.join(":", idScope, val); - mergeFeedsResult.remappedIds.put(key, valueToWrite); - } - int startDateIndex = - getFieldIndex(fieldsFoundInZip, "start_date"); - LocalDate startDate = LocalDate - .parse(csvReader.get(startDateIndex), - GTFS_DATE_FORMATTER); - if (feedIndex == 0) { - // For the future feed, check if the calendar's start date is earlier than the - // previous earliest value and update if so. - if (futureFirstCalendarStartDate.isAfter(startDate)) { - futureFirstCalendarStartDate = startDate; - } - } else { - // If a service_id from the active calendar has both the - // start_date and end_date in the future, the service will be - // excluded from the merged file. Records in trips, - // calendar_dates, and calendar_attributes referencing this - // service_id shall also be removed/ignored. Stop_time records - // for the ignored trips shall also be removed. - if (!startDate.isBefore(futureFeedFirstDate)) { - LOG.warn( - "Skipping calendar entry {} because it operates fully within the time span of future feed.", - keyValue); - String key = getTableScopedValue(table, idScope, keyValue); - mergeFeedsResult.skippedIds.add(key); - skipRecord = true; - continue; - } - // If a service_id from the active calendar has only the - // end_date in the future, the end_date shall be set to one - // day prior to the earliest start_date in future dataset - // before appending the calendar record to the merged file. - int endDateIndex = - getFieldIndex(fieldsFoundInZip, "end_date"); - if (index == endDateIndex) { - LocalDate endDate = LocalDate - .parse(csvReader.get(endDateIndex), GTFS_DATE_FORMATTER); - if (!endDate.isBefore(futureFeedFirstDate)) { - val = valueToWrite = futureFeedFirstDate - .minus(1, ChronoUnit.DAYS) - .format(GTFS_DATE_FORMATTER); - } - } - } - // Track service ID because we want to avoid removing trips that may reference this - // service_id when the service_id is used by calendar_dates that operate in the valid - // date range, i.e., before the future feed's first date. - if (field.name.equals("service_id")) mergeFeedsResult.serviceIds.add(valueToWrite); - break; - case "calendar_dates": - // Drop any calendar_dates.txt records from the existing feed for dates that are - // not before the first date of the future feed. - int dateIndex = getFieldIndex(fieldsFoundInZip, "date"); - LocalDate date = LocalDate.parse(csvReader.get(dateIndex), GTFS_DATE_FORMATTER); - if (feedIndex > 0) { - if (!date.isBefore(futureFeedFirstDate)) { - LOG.warn( - "Skipping calendar_dates entry {} because it operates in the time span of future feed (i.e., after or on {}).", - keyValue, - futureFeedFirstDate); - String key = getTableScopedValue(table, idScope, keyValue); - mergeFeedsResult.skippedIds.add(key); - skipRecord = true; - continue; - } - } - // Track service ID because we want to avoid removing trips that may reference this - // service_id when the service_id is used by calendar.txt records that operate in - // the valid date range, i.e., before the future feed's first date. - if (field.name.equals("service_id")) mergeFeedsResult.serviceIds.add(valueToWrite); - break; - case "shapes": - // If a shape_id is found in both future and active datasets, all shape points from - // the active dataset must be feed-scoped. Otherwise, the merged dataset may contain - // shape_id:shape_pt_sequence values from both datasets (e.g., if future dataset contains - // sequences 1,2,3,10 and active contains 1,2,7,9,10; the merged set will contain - // 1,2,3,7,9,10). - if (field.name.equals("shape_id")) { - if (feedIndex == 0) { - // Track shape_id if working on future feed. - shapeIdsInFutureFeed.add(val); - } else if (shapeIdsInFutureFeed.contains(val)) { - // For the active feed, if the shape_id was already processed from the - // future feed, we need to add the feed-scope to avoid weird, hybrid shapes - // with points from both feeds. - valueToWrite = String.join(":", idScope, val); - // Update key value for subsequent ID conflict checks for this row. - keyValue = valueToWrite; - mergeFeedsResult.remappedIds.put( - getTableScopedValue(table, idScope, val), - valueToWrite - ); - // Re-check refs and uniqueness after changing shape_id value. (Note: this - // probably won't have any impact, but there's not much harm in including it.) - idErrors = referenceTracker - .checkReferencesAndUniqueness(keyValue, lineNumber, field, valueToWrite, - table, keyField, orderField); - } - } - // Skip record if normal duplicate errors are found. - if (hasDuplicateError(idErrors)) skipRecord = true; - break; - case "trips": - // trip_ids between active and future datasets must not match. If any trip_id is found - // to be matching, the merge should fail with appropriate notification to user with the - // cause of the failure. Merge result should include all conflicting trip_ids. - for (NewGTFSError error : idErrors) { - if (error.errorType.equals(NewGTFSErrorType.DUPLICATE_ID)) { - mergeFeedsResult.failureReasons - .add("Trip ID conflict caused merge failure."); - mergeFeedsResult.idConflicts.add(error.badValue); - mergeFeedsResult.errorCount++; - if (failOnDuplicateTripId) - mergeFeedsResult.failed = true; - skipRecord = true; - } - } - break; - case "stops": - // When stop_code is included, stop merging will be based on that. If stop_code is not - // included, it will be based on stop_id. All stops in future data will be carried - // forward and any stops found in active data that are not in the future data shall be - // appended. If one of the feed is missing stop_code, merge fails with a notification to - // the user with suggestion that the feed with missing stop_code must be fixed with - // stop_code. - // NOTE: route case is also used by the stops case, so the route - // case must follow this block. - case "routes": - boolean useAltKey = - keyField.equals("stop_code") || keyField.equals("route_short_name"); - // First, check uniqueness of primary key value (i.e., stop or route ID) - // in case the stop_code or route_short_name are being used. This - // must occur unconditionally because each record must be tracked - // by the reference tracker. - String primaryKeyValue = - csvReader.get(table.getKeyFieldIndex(fieldsFoundInZip)); - Set primaryKeyErrors = referenceTracker - .checkReferencesAndUniqueness(primaryKeyValue, lineNumber, - field, val, table); - // Merging will be based on route_short_name/stop_code in the current and future datasets. All - // matching route_short_names/stop_codes between the datasets shall be considered same route/stop. Any - // route_short_name/stop_code in active data not present in the future will be appended to the - // future routes/stops file. - if (useAltKey) { - if ("".equals(keyValue) && field.name.equals(table.getKeyFieldName())) { - // If alt key is empty (which is permitted), skip - // checking of alt key dupe errors/re-mapping values and - // simply use the primary key (route_id/stop_id). - if (hasDuplicateError(primaryKeyErrors)) { - skipRecord = true; - } - } else if (hasDuplicateError(idErrors)) { - // If we encounter a route/stop that shares its alt. - // ID with a previous route/stop, we need to - // remap its route_id/stop_id field so that - // references point to the previous - // route_id/stop_id. For example, - // route_short_name in both feeds is "ABC" but - // each route has a different route_id (123 and - // 456). This block will map references to 456 to - // 123 so that ABC/123 is the route of record. - //////////////////////////////////////////////////////// - // Get current route/stop ID. (Note: primary - // ID index is always zero because we're - // iterating over the spec fields). - String currentPrimaryKey = rowValues[0]; - // Get unique key to check for remapped ID when - // writing values to file. - String key = - getTableScopedValue(table, idScope, currentPrimaryKey); - // Extract the route/stop ID value used for the - // route/stop with already encountered matching - // short name/stop code. - String[] strings = rowValuesForStopOrRouteId.get( - String.join(":", keyField, val) - ); - String keyForMatchingAltId = strings[0]; - if (!keyForMatchingAltId.equals(currentPrimaryKey)) { - // Remap this row's route_id/stop_id to ensure - // that referencing entities (trips, stop_times) - // have their references updated. - mergeFeedsResult.remappedIds.put(key, keyForMatchingAltId); - } - skipRecord = true; - } - // Next check for regular ID conflicts (e.g., on route_id or stop_id) because any - // conflicts here will actually break the feed. This essentially handles the case - // where two routes have different short_names, but share the same route_id. We want - // both of these routes to end up in the merged feed in this case because we're - // matching on short name, so we must modify the route_id. - if (!skipRecord && !referenceTracker.transitIds - .contains(String.join(":", keyField, keyValue))) { - if (hasDuplicateError(primaryKeyErrors)) { - String key = getTableScopedValue(table, idScope, val); - // Modify route_id and ensure that referencing trips - // have route_id updated. - valueToWrite = String.join(":", idScope, val); - mergeFeedsResult.remappedIds.put(key, valueToWrite); - } - } - } else { - // Key field has defaulted to the standard primary key field - // (stop_id or route_id), which makes the check much - // simpler (just skip the duplicate record). - if (hasDuplicateError(idErrors)) skipRecord = true; - } - - if (newAgencyId != null && field.name.equals("agency_id")) { - LOG.info( - "Updating route#agency_id to (auto-generated) {} for route={}", - newAgencyId, keyValue); - val = newAgencyId; - } - break; - default: - // For any other table, skip any duplicate record. - if (hasDuplicateError(idErrors)) skipRecord = true; - break; - } - } - // If the current field is a foreign reference, check if the reference has been removed in the - // merged result. If this is the case (or other conditions are met), we will need to skip this - // record. Likewise, if the reference has been modified, ensure that the value written to the - // merged result is correctly updated. - if (field.isForeignReference()) { - String key = getTableScopedValue(field.referenceTable, idScope, val); - // Check if we're performing a service period merge, this ref field is a service_id, and it - // is not found in the list of service_ids (e.g., it was removed). - boolean isValidServiceId = mergeFeedsResult.serviceIds.contains(valueToWrite); - boolean serviceIdShouldBeSkipped = mergeType.equals(SERVICE_PERIOD) && - field.name.equals("service_id") && - !isValidServiceId; - // If the current foreign ref points to another record that has - // been skipped or is a ref to a non-existent service_id during a service period merge, skip - // this record and add its primary key to the list of skipped IDs (so that other references - // can be properly omitted). - if (mergeFeedsResult.skippedIds.contains(key) || serviceIdShouldBeSkipped) { - // If a calendar#service_id has been skipped (it's listed in skippedIds), but there were - // valid service_ids found in calendar_dates, do not skip that record for both the - // calendar_date and any related trips. - if (field.name.equals("service_id") && isValidServiceId) { - LOG.warn("Not skipping valid service_id {} for {} {}", valueToWrite, table.name, keyValue); - } else { - String skippedKey = getTableScopedValue(table, idScope, keyValue); - if (orderField != null) { - skippedKey = String.join(":", skippedKey, - csvReader.get(getFieldIndex(fieldsFoundInZip, orderField))); - } - mergeFeedsResult.skippedIds.add(skippedKey); - skipRecord = true; - continue; - } - } - // If the field is a foreign reference, check to see whether the reference has been - // remapped due to a conflicting ID from another feed (e.g., calendar#service_id). - if (mergeFeedsResult.remappedIds.containsKey(key)) { - mergeFeedsResult.remappedReferences++; - // If the value has been remapped update the value to write. - valueToWrite = mergeFeedsResult.remappedIds.get(key); - } - } - rowValues[specFieldIndex] = valueToWrite; - } // End of iteration over each field for a row. - // Do not write rows that are designated to be skipped. - if (skipRecord && this.mergeType.equals(SERVICE_PERIOD)) { - mergeFeedsResult.recordsSkipCount++; - continue; - } - String newLine = String.join(",", rowValues); - switch (table.name) { - // Store row values for route or stop ID (or alternative ID field) in order - // to check for ID conflicts. NOTE: This is only intended to be used for - // routes and stops. Otherwise, this might (will) consume too much memory. - case "stops": - case "routes": - // FIXME: This should be revised for tables with order fields, but it should work fine for its - // primary purposes: to detect exact copy rows and to temporarily hold the data in case a reference - // needs to be looked up in order to remap an entity to that key. - // Here we need to get the key field index according to the spec - // table definition. Otherwise, if we use the keyFieldIndex variable - // defined above, we will be using the found fields index, which will - // cause major issues when trying to put and get values into the - // below map. - int index = getFieldIndex(sharedSpecFieldsArray, keyField); - String key = String.join(":", keyField, rowValues[index]); - rowValuesForStopOrRouteId.put(key, rowValues); - break; - case "transfers": - case "fare_rules": - case "directions": // GTFS+ table - if (!rowStrings.add(newLine)) { - // The line already exists in the output file, do not append it again. This prevents duplicate - // entries for certain files that do not contain primary keys (e.g., fare_rules and transfers) and - // do not otherwise have convenient ways to track uniqueness (like an order field). - // FIXME: add ordinal field/compound keys for transfers (from/to_stop_id) and fare_rules (?). - // Perhaps it makes sense to include all unique fare rules rows, but transfers that share the - // same from/to stop IDs but different transfer times or other values should not both be - // included in the merged feed (yet this strategy would fail to filter those out). - mergeFeedsResult.recordsSkipCount++; - continue; - } - break; - default: - // Do nothing. - break; - - } - // Finally, handle writing lines to zip entry. - if (mergedLineNumber == 0) { - // Create entry for zip file. - ZipEntry tableEntry = new ZipEntry(table.name + ".txt"); - out.putNextEntry(tableEntry); - // Write headers to table. - String[] headers = sharedSpecFields.stream() - .map(field -> field.name) - .toArray(String[]::new); - writer.write(headers); - } - // Write line to table (plus new line char). - writer.write(rowValues); - lineNumber++; - mergedLineNumber++; - } // End of iteration over each row. } - writer.flush(); - out.closeEntry(); - } catch (Exception e) { - LOG.error("Error merging feed sources: {}", - feedVersions.stream().map(version -> version.parentFeedSource().name) - .collect(Collectors.toList()).toString()); - e.printStackTrace(); - throw e; + ctx.afterTableRecords(); + ctx.flushAndClose(); + } catch (IOException e) { + List versionNames = feedVersions.stream() + .map(version -> version.parentFeedSource().name) + .collect(Collectors.toList()); + String message = "Error merging feed sources: " + versionNames; + logAndReportToBugsnag(e, message); + status.fail(message, e); + } + if (ctx != null) { + // Track the number of lines in the merged table and return final number. + mergeFeedsResult.linesPerTable.put(table.name, ctx.mergedLineNumber); + return ctx.mergedLineNumber; } - // Track the number of lines in the merged table and return final number. - mergeFeedsResult.linesPerTable.put(table.name, mergedLineNumber); - return mergedLineNumber; + return 0; } - private static String stopCodeFailureMessage(int stopsMissingStopCodeCount, int stopsCount, int specialStopsCount) { - return String.format( - "If stop_code is provided for some stops (for those with location_type = " + - "empty or 0), all stops must have stop_code values. The merge process " + - "found %d of %d total stops that were incorrectly missing stop_code values. " + - "Note: \"special\" stops with location_type > 0 need not specify this value " + - "(%d special stops found in feed).", - stopsMissingStopCodeCount, - stopsCount, - specialStopsCount - ); - } + /** + * Get the merge strategy to use for MTC service period merges by checking the active and future feeds for various + * combinations of matching trip and service IDs. + */ + private void determineMergeStrategy() { + // Revised merge logic + // Step 1: TDM Merge functionality shall start with first comparing trip_ids + // between active and future GTFS feed. + if (feedMergeContext.areActiveAndFutureTripIdsDisjoint()) { + // If none of the trip_ids in active GTFS feed match with the trip_ids + // available in future GTFS feed, then proceed to Step 3; otherwise continue to the next step [Step 2]. + // Step 3: When the complete set of trip_ids between active and future GTFS feeds is different, + // all trip records from both feeds shall be added to the merged feed as per the following rule + // and the merge process will exit. + // If a service_id from an active calendar has an end date in the future, + // the end_date shall be set to one day prior to the earliest start_date in the future dataset + // before appending the calendar record to the merged file. + // The merge process shall end here by publishing the merge feed and inform the user + // that trip_ids were unique which successfully created a merge feed. - /** Get the set of shared fields for all feeds being merged for a specific table. */ - private Set getSharedFields(List feedsToMerge, Table table) throws IOException { - Set sharedFields = new HashSet<>(); - // First, iterate over each feed to collect the shared fields that need to be output in the merged table. - for (FeedToMerge feed : feedsToMerge) { - CsvReader csvReader = table.getCsvReader(feed.zipFile, null); - // If csv reader is null, the table was not found in the zip file. - if (csvReader == null) { - continue; + // => Step 3 is the existing DEFAULT merge strategy. + mergeFeedsResult.mergeStrategy = MergeStrategy.DEFAULT; + } else { + // Step 2: If matching trip_ids are provided in active and future GTFS feed, for those matching trips, + // trip signatures – a combination of arrival_time, departure_time, stop_id, + // and stop_sequence – in stop_times.txt file should be compared. + // If all the matching trip_ids contain the same trip signatures, the merge process shall proceed + // to step 4. If a single trip signature does not match + // the merge process shall stop with the following error message + // along with matching trip_ids with differing trip signatures. + // Error Message: Trips [trip_id] in new feed have differing makeup from matching trips in active feed. + // If a trip character has changed, new trip_id must be assigned. + + // => Step 2 is the CHECK_STOP_TIMES strategy + // If just the service_ids are an exact match, check the that the stop_times having matching signatures + // between the two feeds (i.e., each stop time in the ordered list is identical between the two feeds). + Feed futureFeed = feedMergeContext.future.feed; + Feed activeFeed = feedMergeContext.active.feed; + for (String tripId : feedMergeContext.sharedTripIds) { + compareStopTimesAndCollectTripAndServiceIds(tripId, futureFeed, activeFeed); } - // Get fields found from headers and add them to the shared fields set. - Field[] fieldsFoundInZip = table.getFieldsFromFieldHeaders(csvReader.getHeaders(), null); - sharedFields.addAll(Arrays.asList(fieldsFoundInZip)); + + // Build the set of calendars to be cloned/renamed/extended from trip ids present + // in both active/future feeds and that have consistent signature. + // These trips will be linked to the new service_ids. + serviceIdsToCloneRenameAndExtend.addAll( + feedMergeContext.active.getServiceIds(this.sharedTripIdsWithConsistentSignature) + ); + + // Build the set of calendars to be shortened to the day before the future feed start date + // from trips in the active feed but not in the future feed. + serviceIdsFromActiveFeedToTerminateEarly.addAll( + feedMergeContext.active.getServiceIds(feedMergeContext.getActiveTripIdsNotInFutureFeed()) + ); + + + // Build the set of calendars ids from the active|future feed to be removed + // because they become no longer used after shared trips are remapped to another service id. + feedMergeContext.collectServiceIdsToRemove(); + + mergeFeedsResult.mergeStrategy = CHECK_STOP_TIMES; } - return sharedFields; } /** - * Checks whether a collection of fields contains a field with the provided name. + * Compare stop times for the given tripId between the future and active feeds. The comparison will inform whether + * trip and/or service IDs should be modified in the output merged feed. */ - private boolean containsField(Collection fields, String fieldName) { - for (Field field : fields) if (field.name.equals(fieldName)) return true; - return false; - } - - /** Checks that any of a set of errors is of the type {@link NewGTFSErrorType#DUPLICATE_ID}. */ - private boolean hasDuplicateError(Set errors) { - for (NewGTFSError error : errors) { - if (error.errorType.equals(NewGTFSErrorType.DUPLICATE_ID)) return true; + private void compareStopTimesAndCollectTripAndServiceIds(String tripId, Feed futureFeed, Feed activeFeed) { + // Fetch all ordered stop_times for each shared trip_id and compare the two sets for the + // future and active feed. If the stop_times are an exact match, include one instance of the trip + // (ignoring the other identical one). If they do not match, modify the active trip_id and include. + List futureStopTimes = Lists.newArrayList(futureFeed.stopTimes.getOrdered(tripId)); + List activeStopTimes = Lists.newArrayList(activeFeed.stopTimes.getOrdered(tripId)); + String activeServiceId = activeFeed.trips.get(tripId).service_id; + String futureServiceId = futureFeed.trips.get(tripId).service_id; + if (!stopTimesMatchSimplified(futureStopTimes, activeStopTimes)) { + // If stop_times or services do not match, merge will fail and no other action will be taken. + sharedTripIdsWithInconsistentSignature.add(tripId); + } else { + // If the trip's stop_times are an exact match, we can safely include just the + // future trip and exclude the active one. Also, mark the service_id for cloning, + // the cloned service id will need to be extended to the full time range. + sharedTripIdsWithConsistentSignature.add(tripId); + serviceIdsToCloneRenameAndExtend.add(futureServiceId); + sharedConsistentTripAndCalendarIds.add(new TripAndCalendars(tripId, activeServiceId, futureServiceId)); } - return false; } - /** Get table-scoped value used for key when remapping references for a particular feed. */ - private static String getTableScopedValue(Table table, String prefix, String id) { - return String.join(":", - table.name, - prefix, - id); + public String getFeedSourceId() { + return feedSource.id; } - /** - * Helper class that collects the feed version and its zip file. Note: this class helps with sorting versions to - * merge in a list collection. - */ - private class FeedToMerge { - public FeedVersion version; - public ZipFile zipFile; + private void logAndReportToBugsnag(Exception e, String message, Object... args) { + LOG.error(message, args, e); + ErrorUtils.reportToBugsnag(e, "datatools", message, owner); + } - FeedToMerge(FeedVersion version) throws IOException { - this.version = version; - this.zipFile = new ZipFile(version.retrieveGtfsFile()); - } + @BsonIgnore @JsonIgnore + public FeedMergeContext getFeedMergeContext() { + return feedMergeContext; } - public String getFeedSourceId() { - return feedSource.id; + private static class TripAndCalendars { + public final String tripId; + public final String activeCalendarId; + public final String futureCalendarId; + + public TripAndCalendars(String tripId, String activeCalendarId, String futureCalendarId) { + this.tripId = tripId; + this.activeCalendarId = activeCalendarId; + this.futureCalendarId = futureCalendarId; + } } } diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/MonitorServerStatusJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/MonitorServerStatusJob.java index 41e6f4f29..13a828e84 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/MonitorServerStatusJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/MonitorServerStatusJob.java @@ -325,12 +325,11 @@ private boolean checkForOtpRunnerCompletion(String url) { // if the otp-runner status file contains an error message, fail the job if (otpRunnerStatus.error) { // report to bugsnag if configured - Map debuggingMessages = new HashMap<>(); - debuggingMessages.put("otp-runner message", otpRunnerStatus.message); ErrorUtils.reportToBugsnag( new RuntimeException("otp-runner reported an error"), - debuggingMessages, - this.owner + "otp-runner", + otpRunnerStatus.message, + owner ); failJob(otpRunnerStatus.message); return false; diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/MonitorableJobWithResourceLock.java b/src/main/java/com/conveyal/datatools/manager/jobs/MonitorableJobWithResourceLock.java new file mode 100644 index 000000000..7873d4982 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/MonitorableJobWithResourceLock.java @@ -0,0 +1,86 @@ +package com.conveyal.datatools.manager.jobs; + +import com.conveyal.datatools.common.status.MonitorableJob; +import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.models.Model; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * Contains logic to lock/release feeds and other objects to ensure + * that jobs on such resources are not executed concurrently. + */ +public abstract class MonitorableJobWithResourceLock extends MonitorableJob { + public static final Logger LOG = LoggerFactory.getLogger(MonitorableJobWithResourceLock.class); + + protected final T resource; + private final String resourceName; + private final String resourceClass; + private final String jobClass; + + /** + * A set of resources (ids) which have been locked by a instance of {@link MonitorableJobWithResourceLock} + * to prevent repeat auto-deploy, auto-publishing, etc. + */ + private static final Set lockedResources = Collections.synchronizedSet(new HashSet<>()); + + protected MonitorableJobWithResourceLock( + Auth0UserProfile owner, + String name, + JobType jobType, + T resource, + String resourceName + ) { + super(owner, name, jobType); + this.resource = resource; + this.resourceName = resourceName; + resourceClass = resource.getClass().getSimpleName(); + jobClass = this.getClass().getSimpleName(); + } + + protected abstract void innerJobLogic() throws Exception; + + @Override + public void jobLogic() { + // Determine if the resource is not locked for this job. + if ( + lockedResources.contains(resource.id) + ) { + String message = String.format( + "%s '%s' skipped for %s execution (another such job is in progress)", + resourceClass, + resourceName, + jobClass + ); + LOG.info(message); + status.fail(message); + return; + } + + try { + synchronized (lockedResources) { + if (!lockedResources.contains(resource.id)) { + lockedResources.add(resource.id); + LOG.info("{} lock added for {} id '{}'", jobClass, resourceClass, resource.id); + } else { + LOG.warn("Unable to acquire lock for {} '{}'", resourceClass, resourceName); + status.fail(String.format("%s '%s' is locked for %s.", resourceClass, resourceName, jobClass)); + return; + } + } + innerJobLogic(); + } catch (Exception e) { + status.fail( + String.format("%s failed for %s '%s'!", jobClass, resourceClass, resourceName), + e + ); + } finally { + lockedResources.remove(resource.id); + LOG.info("{} lock removed for {} id: '{}'", jobClass, resourceClass, resource.id); + } + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/PeliasUpdateJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/PeliasUpdateJob.java index 61e3d85bf..964f75fed 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/PeliasUpdateJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/PeliasUpdateJob.java @@ -6,6 +6,7 @@ import com.conveyal.datatools.manager.auth.Auth0UserProfile; import com.conveyal.datatools.manager.models.Deployment; import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.datatools.manager.models.OtpServer; import com.conveyal.datatools.manager.persistence.Persistence; import com.conveyal.datatools.manager.utils.HttpUtils; import com.conveyal.datatools.manager.utils.SimpleHttpResponse; @@ -58,13 +59,25 @@ public PeliasUpdateJob(Auth0UserProfile owner, String name, Deployment deploymen this.timer = new Timer(); this.logUploadS3URI = logUploadS3URI; } + public PeliasUpdateJob(Auth0UserProfile owner, String name, Deployment deployment) { + super(owner, name, JobType.UPDATE_PELIAS); + this.deployment = deployment; + this.timer = new Timer(); + + if (deployment.deployJobSummaries.size() <= 0) { + throw new RuntimeException("Deployment must be deployed to at least one server to update Pelias!"); + } + + // Get log upload URI from deployment (the latest build artifacts folder is where the logs get uploaded to) + this.logUploadS3URI = new AmazonS3URI(deployment.deployJobSummaries.get(deployment.deployJobSummaries.size() - 1).buildArtifactsFolder); + } /** * This method must be overridden by subclasses to perform the core steps of the job. */ @Override public void jobLogic() throws Exception { - status.message = "Launching custom geocoder update request"; + status.message = "Launching Local Places Index update request"; workerId = this.makeWebhookRequest(); status.percentComplete = 1.0; @@ -76,7 +89,7 @@ public void jobLogic() throws Exception { } private void getWebhookStatus() { - URI url = getWebhookURI(deployment.peliasWebhookUrl + "/status/" + workerId); + URI url = getWebhookURI(deployment.parentProject().peliasWebhookUrl + "/status/" + workerId); // Convert raw body to JSON PeliasWebhookStatusMessage statusResponse; @@ -118,7 +131,7 @@ private void getWebhookStatus() { * @return The workerID of the run created on the Pelias server */ private String makeWebhookRequest() { - URI url = getWebhookURI(deployment.peliasWebhookUrl); + URI url = getWebhookURI(deployment.parentProject().peliasWebhookUrl); // Convert from feedVersionIds to Pelias Config objects List gtfsFeeds = Persistence.feedVersions.getFiltered(in("_id", deployment.feedVersionIds)) @@ -131,6 +144,7 @@ private String makeWebhookRequest() { peliasWebhookRequestBody.csvFiles = deployment.peliasCsvFiles; peliasWebhookRequestBody.logUploadUrl = logUploadS3URI.toString(); peliasWebhookRequestBody.deploymentId = deployment.id; + peliasWebhookRequestBody.resetDb = deployment.peliasResetDb; String query = JsonUtil.toJson(peliasWebhookRequestBody); @@ -203,6 +217,7 @@ private static class PeliasWebhookRequestBody { public List csvFiles; public String logUploadUrl; public String deploymentId; + public boolean resetDb; } /** diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java index d83161471..35e88e0a8 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java @@ -150,6 +150,16 @@ public void jobLogic() { ) { addNextJob(new AutoDeployJob(feedSource.retrieveProject(), owner)); } + + // If auto-publish job is enabled (MTC extension required), + // create an auto-publish job for feeds that are fetched automatically. + if ( + DataManager.isExtensionEnabled("mtc") && + feedSource.autoPublish && + feedVersion.retrievalMethod == FeedRetrievalMethod.FETCHED_AUTOMATICALLY + ) { + addNextJob(new AutoPublishJob(feedSource, owner)); + } } /** diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/AgencyMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/AgencyMergeLineContext.java new file mode 100644 index 000000000..bfddc4776 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/AgencyMergeLineContext.java @@ -0,0 +1,83 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.loader.Table; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.UUID; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; + +public class AgencyMergeLineContext extends MergeLineContext { + private static final Logger LOG = LoggerFactory.getLogger(AgencyMergeLineContext.class); + + public AgencyMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public void checkFirstLineConditions() { + checkForMissingAgencyId(); + } + + @Override + public boolean shouldProcessRows() { + return !checkMismatchedAgency(); + } + + private void checkForMissingAgencyId() { + if ((keyFieldMissing || keyValue.equals(""))) { + // agency_id is optional if only one agency is present, but that will + // cause issues for the feed merge, so we need to insert an agency_id + // for the single entry. + (isHandlingActiveFeed() + ? feedMergeContext.active + : feedMergeContext.future + ).setNewAgencyId(UUID.randomUUID().toString()); + + if (keyFieldMissing) { + // Only add agency_id field if it is missing in table. + addField(Table.AGENCY.fields[0]); + } + } + } + + /** + * Check for some conditions that could occur when handling a service period merge. + * + * @return true if the merge encountered failing conditions + */ + private boolean checkMismatchedAgency() { + if (isHandlingActiveFeed() && job.mergeType.equals(SERVICE_PERIOD)) { + // If merging the agency table, we should only skip the following feeds if performing an MTC merge + // because that logic assumes the two feeds share the same agency (or + // agencies). NOTE: feed_info file is skipped by default (outside of this + // method) for a regional merge), which is why this block is exclusively + // for an MTC merge. Note, this statement may print multiple log + // statements, but it is deliberately nested in the csv while block in + // order to detect agency_id mismatches and fail the merge if found. + // The second feed's agency table must contain the same agency_id + // value as the first feed. + String agencyId = String.join(":", keyField, keyValue); + if (!"".equals(keyValue) && !referenceTracker.transitIds.contains(agencyId)) { + String otherAgencyId = referenceTracker.transitIds.stream() + .filter(transitId -> transitId.startsWith(AGENCY_ID)) + .findAny() + .orElse(null); + job.failMergeJob(String.format( + "MTC merge detected mismatching agency_id values between two " + + "feeds (%s and %s). Failing merge operation.", + agencyId, + otherAgencyId + )); + return true; + } + LOG.warn("Skipping {} file for feed {}/{} (future file preferred)", table.name, getFeedIndex(), feedMergeContext.feedsToMerge.size()); + skipFile = true; + } + return false; + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarAttributesMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarAttributesMergeLineContext.java new file mode 100644 index 000000000..123ddb1cc --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarAttributesMergeLineContext.java @@ -0,0 +1,62 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Table; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Set; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.hasDuplicateError; + +/** + * Holds the logic for merging entries from the GTFS+ calendar_attributes table. + */ +public class CalendarAttributesMergeLineContext extends MergeLineContext { + private static final Logger LOG = LoggerFactory.getLogger(CalendarAttributesMergeLineContext.class); + + public CalendarAttributesMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) { + return checkCalendarIds(idErrors, fieldContext); + } + + @Override + public void afterRowWrite() throws IOException { + addClonedServiceId(); + } + + private boolean checkCalendarIds(Set idErrors, FieldContext fieldContext) { + boolean shouldSkipRecord = false; + + // If any service_id in the active feed matches with the future + // feed, it should be modified and all associated trip records + // must also be changed with the modified service_id. + // TODO How can we check that calendar_dates entries are + // duplicates? I think we would need to consider the + // service_id:exception_type:date as the unique key and include any + // all entries as long as they are unique on this key. + if (isHandlingActiveFeed() && hasDuplicateError(idErrors)) { + // Modify service_id and ensure that referencing trips + // have service_id updated. + updateAndRemapOutput(fieldContext); + } + + // Skip record (based on remapped id if necessary) if it was skipped in the calendar table. + String keyInCalendarTable = getTableScopedValue(Table.CALENDAR, keyValue); + if (mergeFeedsResult.skippedIds.contains(keyInCalendarTable)) { + LOG.warn( + "Skipping calendar entry {} because it was skipped in the merged calendar table.", + keyValue); + shouldSkipRecord = true; + } + + return !shouldSkipRecord; + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarDatesMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarDatesMergeLineContext.java new file mode 100644 index 000000000..32809e70f --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarDatesMergeLineContext.java @@ -0,0 +1,167 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Table; +import com.conveyal.gtfs.model.CalendarDate; +import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; +import static com.conveyal.gtfs.loader.DateField.GTFS_DATE_FORMATTER; + +/** + * Contains logic for merging records in the GTFS calendar_dates table. + */ +public class CalendarDatesMergeLineContext extends MergeLineContext { + private static final Logger LOG = LoggerFactory.getLogger(CalendarDatesMergeLineContext.class); + + /** Holds the date used to check calendar validity */ + private LocalDate futureFeedFirstDateForCalendarValidity; + + public CalendarDatesMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) throws IOException { + return checkCalendarDatesIds(fieldContext); + } + + @Override + public void afterTableRecords() throws IOException { + // If the current row is for a calendar service_id that is marked for cloning/renaming, clone the + // values, change the ID, extend the start/end dates to the feed's full range, and write the + // additional line to the file. + addClonedServiceIds(); + } + + @Override + public void startNewFeed(int feedIndex) throws IOException { + super.startNewFeed(feedIndex); + futureFeedFirstDateForCalendarValidity = getFutureFeedFirstDateForCheckingCalendarValidity(); + } + + private boolean checkCalendarDatesIds(FieldContext fieldContext) throws IOException { + boolean shouldSkipRecord = false; + if (job.mergeType.equals(SERVICE_PERIOD)) { + // Drop any calendar_dates.txt records from the existing feed for dates that are + // not before the first date of the future feed, + // or for corresponding calendar entries that have been dropped. + LocalDate date = getCsvDate("date"); + String calendarKey = getTableScopedValue(Table.CALENDAR, keyValue); + if ( + isHandlingActiveFeed() && + ( + job.mergeFeedsResult.skippedIds.contains(calendarKey) || + !isBeforeFutureFeedStartDate(date) + ) + ) { + String key = getTableScopedValue(keyValue); + LOG.warn( + "Skipping calendar_dates entry {} because it operates in the time span of future feed (i.e., after or on {}).", + keyValue, + futureFeedFirstDateForCalendarValidity + ); + mergeFeedsResult.skippedIds.add(key); + shouldSkipRecord = true; + } + + if (isServiceIdUnused()) { + shouldSkipRecord = true; + } + } + + // Track service ID because we want to avoid removing trips that may reference this + // service_id when the service_id is used by calendar.txt records that operate in + // the valid date range, i.e., before the future feed's first date. + if (!shouldSkipRecord && fieldContext.nameEquals(SERVICE_ID)) { + mergeFeedsResult.serviceIds.add(fieldContext.getValueToWrite()); + } + + return !shouldSkipRecord; + } + + /** + * Obtains the future feed start date to use + * if the future feed's first date is before its first calendar start date, + * when checking MTC calendar_dates and calendar records for modification/exclusion. + */ + private LocalDate getFutureFeedFirstDateForCheckingCalendarValidity() { + LocalDate futureFirstCalendarStartDate = feedMergeContext.futureFirstCalendarStartDate; + LocalDate futureFeedFirstDate = feedMergeContext.future.getFeedFirstDate(); + if ( + isHandlingActiveFeed() && + job.mergeType.equals(SERVICE_PERIOD) && + futureFirstCalendarStartDate.isBefore(LocalDate.MAX) && + futureFeedFirstDate.isBefore(futureFirstCalendarStartDate) + ) { + return futureFirstCalendarStartDate; + } + return futureFeedFirstDate; + } + + private boolean isBeforeFutureFeedStartDate(LocalDate date) { + return date.isBefore(futureFeedFirstDateForCalendarValidity); + } + + /** + * Adds a cloned service id for trips with the same signature in both the active & future feeds. + * The cloned service id spans from the start date in the active feed until the end date in the future feed. + */ + public void addClonedServiceIds() throws IOException { + if (job.mergeType.equals(SERVICE_PERIOD)) { + String clonedIdScope = getClonedIdScope(); + + // Retrieve all active and future calendar dates ahead + // to avoid repeat database get-all queries, + // and exclude active entries with a date after the future feed start date. + List allCalendarDates = new ArrayList<>(); + allCalendarDates.addAll(Lists.newArrayList( + StreamSupport.stream(feedMergeContext.active.feed.calendarDates.spliterator(), false) + .filter(calDate -> isBeforeFutureFeedStartDate(calDate.date)) + .collect(Collectors.toList()) + )); + allCalendarDates.addAll(Lists.newArrayList( + feedMergeContext.future.feed.calendarDates.getAll() + )); + + for (String id : job.serviceIdsToCloneRenameAndExtend) { + String newServiceId = getIdWithScope(id, clonedIdScope); + + // Because this service has been extended to span both active and future feed, + // we need to add all calendar_dates entries for the original service id + // under the active AND future feed (and of course rename service id). + for (CalendarDate calDate : allCalendarDates) { + if (calDate.service_id.equals(id)) { + writeValuesToTable(getCalendarRowValues(calDate, newServiceId), true); + } + } + } + } + } + + /** + * Helper method that builds a string array from a CalendarDates object + * with a new service_id. + */ + private String[] getCalendarRowValues(CalendarDate calDate, String newServiceId) { + String[] rowValues = new String[getOriginalRowValues().length]; + rowValues[getFieldIndex(SERVICE_ID)] = newServiceId; + rowValues[getFieldIndex("date")] + = calDate.date.format(GTFS_DATE_FORMATTER); + rowValues[getFieldIndex("exception_type")] + = String.valueOf(calDate.exception_type); + return rowValues; + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarMergeLineContext.java new file mode 100644 index 000000000..ab8902531 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarMergeLineContext.java @@ -0,0 +1,118 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Table; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.LocalDate; +import java.time.temporal.ChronoUnit; +import java.util.Set; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.hasDuplicateError; +import static com.conveyal.gtfs.loader.DateField.GTFS_DATE_FORMATTER; + +/** + * Contains logic for merging records in the GTFS calendar table. + */ +public class CalendarMergeLineContext extends MergeLineContext { + private static final Logger LOG = LoggerFactory.getLogger(CalendarMergeLineContext.class); + + public CalendarMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) throws IOException { + return checkCalendarIds(idErrors, fieldContext); + } + + @Override + public void afterRowWrite() throws IOException { + addClonedServiceId(); + } + + private boolean checkCalendarIds(Set idErrors, FieldContext fieldContext) throws IOException { + boolean shouldSkipRecord = false; + String key = getTableScopedValue(keyValue); + + if (isHandlingActiveFeed()) { + LocalDate startDate = getCsvDate("start_date"); + if (!startDate.isBefore(feedMergeContext.future.getFeedFirstDate())) { + // If a service_id from the active calendar has both the + // start_date and end_date in the future, the service will be + // excluded from the merged file. Records in trips, + // calendar_dates, and calendar_attributes referencing this + // service_id shall also be removed/ignored. Stop_time records + // for the ignored trips shall also be removed. + LOG.warn( + "Skipping active calendar entry {} because it operates fully within the time span of future feed.", + keyValue); + mergeFeedsResult.skippedIds.add(key); + shouldSkipRecord = true; + } else { + // In the MTC revised feed merge logic: + // - If trip ids in active and future feed are disjoint, + // - calendar entries from the active feed will be inserted, + // but the ending date will be set to the day before the earliest **calendar start date** from the new feed. + // - If some trip ids are found in both active/future feed, + // - new calendar entries are created for those trips + // that span from active feed’s start date to the future feed’s end date. + // - calendar entries for other trip ids in the active feed are inserted in the merged feed, + // but the ending date will be set to the day before the **start date of the new feed**. + LocalDate endDate = getCsvDate("end_date"); + LocalDate futureStartDate = null; + boolean activeAndFutureTripIdsAreDisjoint = job.sharedTripIdsWithConsistentSignature.isEmpty(); + if (activeAndFutureTripIdsAreDisjoint) { + futureStartDate = feedMergeContext.futureFirstCalendarStartDate; + } else if (job.serviceIdsFromActiveFeedToTerminateEarly.contains(keyValue)) { + futureStartDate = feedMergeContext.future.getFeedFirstDate(); + } + // In other cases not covered above, new calendar entry is already flagged for insertion + // from getMergeStrategy, so that trip ids may reference it. + + + if ( + fieldContext.nameEquals("end_date") && + futureStartDate != null && + !endDate.isBefore(futureStartDate) + ) { + fieldContext.resetValue(futureStartDate + .minus(1, ChronoUnit.DAYS) + .format(GTFS_DATE_FORMATTER)); + } + } + } + + if (isServiceIdUnused()) { + shouldSkipRecord = true; + } + + // If any service_id in the active feed matches with the future + // feed, it should be modified and all associated trip records + // must also be changed with the modified service_id. + // TODO How can we check that calendar_dates entries are + // duplicates? I think we would need to consider the + // service_id:exception_type:date as the unique key and include any + // all entries as long as they are unique on this key. + if (isHandlingActiveFeed() && hasDuplicateError(idErrors)) { + // Modify service_id and ensure that referencing trips + // have service_id updated. + updateAndRemapOutput(fieldContext); + } + + // Track service ID because we want to avoid removing trips that may reference this + // service_id when the service_id is used by calendar_dates that operate in the valid + // date range, i.e., before the future feed's first date. + // + // If service is going to be cloned, add to the output service ids. + if (!shouldSkipRecord && fieldContext.nameEquals(SERVICE_ID)) { + mergeFeedsResult.serviceIds.add(fieldContext.getValueToWrite()); + } + + return !shouldSkipRecord; + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedContext.java new file mode 100644 index 000000000..3bc91938c --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedContext.java @@ -0,0 +1,72 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.DataManager; +import com.conveyal.gtfs.loader.Feed; +import com.conveyal.gtfs.loader.Table; +import com.google.common.collect.Sets; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Contains information related to a feed to merge. + */ +public class FeedContext { + public final FeedToMerge feedToMerge; + public final Set tripIds; + public final Feed feed; + private final LocalDate feedFirstDate; + /** + * Holds the auto-generated agency id to be updated for each feed if none was provided. + */ + private String newAgencyId; + private Set serviceIdsToRemove = new HashSet<>(); + + public FeedContext(FeedToMerge givenFeedToMerge) throws IOException { + feedToMerge = givenFeedToMerge; + feedToMerge.collectTripAndServiceIds(); + tripIds = feedToMerge.idsForTable.get(Table.TRIPS); + feed = new Feed(DataManager.GTFS_DATA_SOURCE, feedToMerge.version.namespace); + + // Initialize future and active feed's first date to the first calendar date from validation result. + // This is equivalent to either the earliest date of service defined for a calendar_date record or the + // earliest start_date value for a calendars.txt record. For MTC, however, they require that GTFS + // providers use calendars.txt entries and prefer that this value (which is used to determine cutoff + // dates for the active feed when merging with the future) be strictly assigned the earliest + // calendar#start_date (unless that table for some reason does not exist). + feedFirstDate = feedToMerge.version.validationResult.firstCalendarDate; + } + + public LocalDate getFeedFirstDate() { return feedFirstDate; } + + public String getNewAgencyId() { + return newAgencyId; + } + + public void setNewAgencyId(String agencyId) { + newAgencyId = agencyId; + } + + public Set getServiceIdsToRemove() { + return serviceIdsToRemove; + } + + public void setServiceIdsToRemoveUsingOtherFeed(Set tripIdsNotInOtherFeed) { + serviceIdsToRemove = Sets.difference( + feedToMerge.serviceIds, + getServiceIds(tripIdsNotInOtherFeed) + ); + } + + /** + * Obtains the service ids corresponding to the provided trip ids. + */ + public Set getServiceIds(Set tripIds) { + return tripIds.stream() + .map(tripId -> feed.trips.get(tripId).service_id) + .collect(Collectors.toSet()); + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedMergeContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedMergeContext.java new file mode 100644 index 000000000..371112026 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedMergeContext.java @@ -0,0 +1,83 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.datatools.manager.utils.MergeFeedUtils; +import com.conveyal.gtfs.model.Calendar; +import com.google.common.collect.Sets; + +import java.io.Closeable; +import java.io.IOException; +import java.time.LocalDate; +import java.util.List; +import java.util.Set; + +/** + * Contains merge information between an active feed and a future feed. + */ +public class FeedMergeContext implements Closeable { + public final List feedsToMerge; + public final FeedContext active; + public final FeedContext future; + public final boolean serviceIdsMatch; + public final boolean tripIdsMatch; + public final LocalDate futureFirstCalendarStartDate; + public final Set sharedTripIds; + + public FeedMergeContext(Set feedVersions, Auth0UserProfile owner) throws IOException { + feedsToMerge = MergeFeedUtils.collectAndSortFeeds(feedVersions, owner); + FeedToMerge activeFeedToMerge = feedsToMerge.get(1); + FeedToMerge futureFeedToMerge = feedsToMerge.get(0); + active = new FeedContext(activeFeedToMerge); + future = new FeedContext(futureFeedToMerge); + + // Determine whether service and trip IDs are exact matches. + serviceIdsMatch = activeFeedToMerge.serviceIdsInUse.equals(futureFeedToMerge.serviceIdsInUse); + tripIdsMatch = active.tripIds.equals(future.tripIds); + sharedTripIds = Sets.intersection(active.tripIds, future.tripIds); + + // Initialize, before processing any rows, the first calendar start dates from the future feed. + LocalDate futureFirstCalStartDate = LocalDate.MAX; + for (Calendar c : future.feed.calendars.getAll()) { + if (futureFirstCalStartDate.isAfter(c.start_date)) { + futureFirstCalStartDate = c.start_date; + } + } + this.futureFirstCalendarStartDate = futureFirstCalStartDate; + } + + public void collectServiceIdsToRemove() { + active.setServiceIdsToRemoveUsingOtherFeed(getActiveTripIdsNotInFutureFeed()); + future.setServiceIdsToRemoveUsingOtherFeed(getFutureTripIdsNotInActiveFeed()); + } + + @Override + public void close() throws IOException { + for (FeedToMerge feed : feedsToMerge) { + feed.close(); + } + } + + /** + * Partially handles the Revised MTC Feed Merge Requirement + * to detect disjoint trip ids between the active/future feeds. + * @return true if no trip ids from the active feed is found in the future feed, and vice-versa. + */ + public boolean areActiveAndFutureTripIdsDisjoint() { + return sharedTripIds.isEmpty(); + } + + /** + * Obtains the trip ids found in the active feed, but not in the future feed. + */ + public Sets.SetView getActiveTripIdsNotInFutureFeed() { + return Sets.difference(active.tripIds, future.tripIds); + } + + /** + * Obtains the trip ids found in the future feed, but not in the active feed. + */ + public Sets.SetView getFutureTripIdsNotInActiveFeed() { + return Sets.difference(future.tripIds, active.tripIds); + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedToMerge.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedToMerge.java new file mode 100644 index 000000000..91ba441d4 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedToMerge.java @@ -0,0 +1,59 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.gtfs.loader.Table; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.SetMultimap; +import com.google.common.collect.Sets; + +import java.io.Closeable; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.zip.ZipFile; + +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.getIdsForTable; + +/** + * Helper class that collects the feed version and its zip file. Note: this class helps with sorting versions to + * merge in a list collection. + */ +public class FeedToMerge implements Closeable { + public FeedVersion version; + public ZipFile zipFile; + public SetMultimap idsForTable = HashMultimap.create(); + public Set serviceIds = new HashSet<>(); + public Set serviceIdsInUse; + private static final Set
tablesToCheck = Sets.newHashSet(Table.TRIPS, Table.CALENDAR, Table.CALENDAR_DATES); + + public FeedToMerge(FeedVersion version) throws IOException { + this.version = version; + this.zipFile = new ZipFile(version.retrieveGtfsFile()); + } + + /** Collects all trip/service IDs (tables noted in {@link #tablesToCheck}) for comparing feeds during merge. */ + public void collectTripAndServiceIds() throws IOException { + for (Table table : tablesToCheck) { + idsForTable.get(table).addAll(getIdsForTable(zipFile, table)); + } + serviceIds.addAll(idsForTable.get(Table.CALENDAR)); + serviceIds.addAll(idsForTable.get(Table.CALENDAR_DATES)); + + serviceIdsInUse = getServiceIdsInUse(idsForTable.get(Table.TRIPS)); + } + + /** + * Obtains the service ids corresponding to the provided trip ids. + * FIXME: Duplicate of MergeFeedsJob. + */ + private Set getServiceIdsInUse(Set tripIds) { + return tripIds.stream() + .map(tripId -> version.retrieveFeed().trips.get(tripId).service_id) + .collect(Collectors.toSet()); + } + + public void close() throws IOException { + this.zipFile.close(); + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FieldContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FieldContext.java new file mode 100644 index 000000000..0c33ad92b --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FieldContext.java @@ -0,0 +1,55 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.gtfs.loader.Field; + + +/** + * Holds data when processing a field in a CSV row during a feed merge. + */ +public class FieldContext { + private final Field field; + private String value; + private String valueToWrite; + + public FieldContext(Field field, String value) { + this.field = field; + // Default value to write is unchanged from value found in csv (i.e. val). Note: if looking to + // modify the value that is written in the merged file, you must update valueToWrite (e.g., + // updating this feed's end_date or accounting for cases where IDs conflict). + resetValue(value); + } + + public Field getField() { + return field; + } + + public String getValue() { + return value; + } + + public void setValue(String newValue) { + value = newValue; + } + + public String getValueToWrite() { + return valueToWrite; + } + + public void setValueToWrite(String newValue) { + valueToWrite = newValue; + } + + /** + * Resets both value and valueToWrite to a desired new value. + */ + public void resetValue(String newValue) { + value = valueToWrite = newValue; + } + + /** + * Convenience method to compare if this field name equals a specified one. + */ + public boolean nameEquals(String fieldName) { + return field.name.equals(fieldName); + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsResult.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeFeedsResult.java similarity index 85% rename from src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsResult.java rename to src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeFeedsResult.java index 86b8aa643..4a0c8f6bd 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsResult.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeFeedsResult.java @@ -1,4 +1,6 @@ -package com.conveyal.datatools.manager.jobs; +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; import java.io.Serializable; import java.util.Date; @@ -13,13 +15,9 @@ public class MergeFeedsResult implements Serializable { private static final long serialVersionUID = 1L; - /** Number of feeds merged */ - public int feedCount; - public int errorCount; /** Type of merge operation performed */ public MergeFeedsType type; - /** Contains a set of strings for which there were error-causing duplicate values */ - public Set idConflicts = new HashSet<>(); + public MergeStrategy mergeStrategy = MergeStrategy.DEFAULT; /** Contains the set of IDs for records that were excluded in the merged feed */ public Set skippedIds = new HashSet<>(); /** @@ -35,8 +33,10 @@ public class MergeFeedsResult implements Serializable { public int recordsSkipCount; public Date startTime; public boolean failed; + public int errorCount; /** Set of reasons explaining why merge operation failed */ public Set failureReasons = new HashSet<>(); + public Set tripIdsToCheck = new HashSet<>(); public MergeFeedsResult (MergeFeedsType type) { this.type = type; diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsType.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeFeedsType.java similarity index 82% rename from src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsType.java rename to src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeFeedsType.java index 6c9e61b1c..bbdfc6ccd 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsType.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeFeedsType.java @@ -1,4 +1,6 @@ -package com.conveyal.datatools.manager.jobs; +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; /** * This enum contains the types of merge feeds that {@link MergeFeedsJob} can currently perform. diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeLineContext.java new file mode 100644 index 000000000..5fb7edd0f --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeLineContext.java @@ -0,0 +1,827 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.datatools.manager.models.FeedSource; +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.ReferenceTracker; +import com.conveyal.gtfs.loader.Table; +import com.csvreader.CsvReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.supercsv.io.CsvListWriter; +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.REGIONAL; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.containsField; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.getAllFields; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.getMergeKeyField; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.hasDuplicateError; +import static com.conveyal.datatools.manager.utils.StringUtils.getCleanName; +import static com.conveyal.gtfs.loader.DateField.GTFS_DATE_FORMATTER; + +public class MergeLineContext { + protected static final String AGENCY_ID = "agency_id"; + protected static final String SERVICE_ID = "service_id"; + private static final Logger LOG = LoggerFactory.getLogger(MergeLineContext.class); + protected final MergeFeedsJob job; + private final ZipOutputStream out; + private final Set allFields; + private boolean handlingActiveFeed; + private boolean handlingFutureFeed; + private String idScope; + // CSV writer used to write to zip file. + private final CsvListWriter writer; + private CsvReader csvReader; + private boolean skipRecord; + protected boolean keyFieldMissing; + private String[] originalRowValues; + private String[] rowValues; + private int lineNumber = 0; + protected final Table table; + protected FeedToMerge feed; + protected String keyValue; + protected final ReferenceTracker referenceTracker = new ReferenceTracker(); + protected String keyField; + private String orderField; + protected final MergeFeedsResult mergeFeedsResult; + protected final FeedMergeContext feedMergeContext; + protected int keyFieldIndex; + private Field[] fieldsFoundInZip; + private List fieldsFoundList; + // Set up objects for tracking the rows encountered + private final Map rowValuesForStopOrRouteId = new HashMap<>(); + private final Set rowStrings = new HashSet<>(); + private List sharedSpecFields; + private int feedIndex; + + public FeedVersion version; + public FeedSource feedSource; + public boolean skipFile; + public int mergedLineNumber = 0; + private boolean headersWritten = false; + + public static MergeLineContext create(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + switch (table.name) { + case "agency": + return new AgencyMergeLineContext(job, table, out); + case "calendar": + return new CalendarMergeLineContext(job, table, out); + case "calendar_attributes": + return new CalendarAttributesMergeLineContext(job, table, out); + case "calendar_dates": + return new CalendarDatesMergeLineContext(job, table, out); + case "routes": + return new RoutesMergeLineContext(job, table, out); + case "shapes": + return new ShapesMergeLineContext(job, table, out); + case "stops": + return new StopsMergeLineContext(job, table, out); + case "trips": + case "timepoints": + // Use same merge logic to filter out trips in both tables. + return new TripsMergeLineContext(job, table, out); + default: + return new MergeLineContext(job, table, out); + } + } + + protected MergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + this.job = job; + this.table = table; + this.feedMergeContext = job.getFeedMergeContext(); + // Get shared fields between all feeds being merged. This is used to filter the spec fields so that only + // fields found in the collection of feeds are included in the merged table. + allFields = getAllFields(feedMergeContext.feedsToMerge, table); + this.mergeFeedsResult = job.mergeFeedsResult; + this.writer = new CsvListWriter(new OutputStreamWriter(out), CsvPreference.STANDARD_PREFERENCE); + this.out = out; + } + + public void startNewFeed(int feedIndex) throws IOException { + lineNumber = 0; + handlingActiveFeed = feedIndex > 0; + handlingFutureFeed = feedIndex == 0; + this.feedIndex = feedIndex; + this.feed = feedMergeContext.feedsToMerge.get(feedIndex); + this.version = feed.version; + this.feedSource = version.parentFeedSource(); + keyField = getMergeKeyField(table, job.mergeType); + orderField = table.getOrderFieldName(); + keyFieldMissing = false; + + idScope = makeIdScope(version); + csvReader = table.getCsvReader(feed.zipFile, null); + // If csv reader is null, the table was not found in the zip file. There is no need + // to handle merging this table for this zip file. + // No need to iterate over second (active) file if strategy is to simply extend the future GTFS + // service to start earlier. + skipFile = shouldSkipFile(); + if (csvReader == null) { + skipFile = true; + LOG.warn("Table {} not found in the zip file for {}{}", table.name, feedSource.name, version.version); + return; + } + fieldsFoundInZip = table.getFieldsFromFieldHeaders(csvReader.getHeaders(), null); + fieldsFoundList = Arrays.asList(fieldsFoundInZip); + // Determine the index of the key field for this version's table. + keyFieldIndex = getFieldIndex(keyField); + if (keyFieldIndex == -1) { + LOG.error("No {} field exists for {} table (feed={})", keyField, table.name, version.id); + keyFieldMissing = true; + // If there is no agency_id for agency table, create one and ensure that + // route#agency_id gets set. + } + + if (handlingFutureFeed) { + mergeFeedsResult.serviceIds.addAll( + job.serviceIdsToCloneRenameAndExtend.stream().map( + this::getIdWithScope + ).collect(Collectors.toSet()) + ); + } + } + + /** + * Returns a scoped identifier of the form e.g. FeedName3:some_id + * (to distinguish an id when used in multiple tables). + */ + protected String getIdWithScope(String id, String scope) { + return String.join(":", scope, id); + } + + /** + * Shorthand for above using current idScope. + */ + protected String getIdWithScope(String id) { + return getIdWithScope(id, idScope); + } + + public boolean shouldSkipFile() { + if (handlingActiveFeed && job.mergeType.equals(SERVICE_PERIOD)) { + // Always prefer the "future" file for the feed_info table, which means + // we can skip any iterations following the first one. + return table.name.equals("feed_info"); + } + return false; + } + + /** + * Overridable method that determines whether to process rows of the current feed table. + * @return true by default. + */ + public boolean shouldProcessRows() { + return true; + } + + /** + * Iterate over all rows in table and write them to the output zip. + * + * @return false, if a failing condition was encountered. true, if everything was ok. + */ + public boolean iterateOverRows() throws IOException { + // Iterate over rows in table, writing them to the out file. + while (csvReader.readRecord()) { + startNewRow(); + + if (!shouldProcessRows()) { + // e.g. If there is a mismatched agency, return immediately. + return false; + } + + // If checkMismatchedAgency flagged skipFile, loop back to the while loop. (Note: this is + // intentional because we want to check all agency ids in the file). + if (skipFile || lineIsBlank()) continue; + // Check certain initial conditions on the first line of the file. + if (lineNumber == 0) { + checkFirstLineConditions(); + } + initializeRowValues(); + // Construct row values. If a failure condition was encountered, return. + if (!constructRowValues()) { + return false; + } + + finishRowAndWriteToZip(); + } + return true; + } + + public void startNewRow() throws IOException { + keyValue = csvReader.get(keyFieldIndex); + // Get the spec fields to export + List specFields = table.specFields(); + // Filter the spec fields on the set of fields found in all feeds to be merged. + sharedSpecFields = specFields.stream() + .filter(f -> containsField(allFields, f.name)) + .collect(Collectors.toList()); + } + + public boolean checkForeignReferences(FieldContext fieldContext) throws IOException { + Field field = fieldContext.getField(); + if (field.isForeignReference()) { + String key = getTableScopedValue(field.referenceTable, fieldContext.getValue()); + // Check if we're performing a service period merge, this ref field is a service_id, and it + // is not found in the list of service_ids (e.g., it was removed). + boolean isValidServiceId = mergeFeedsResult.serviceIds.contains(fieldContext.getValueToWrite()); + + // If the current foreign ref points to another record that has + // been skipped or is a ref to a non-existent service_id during a service period merge, skip + // this record and add its primary key to the list of skipped IDs (so that other references + // can be properly omitted). + if (serviceIdHasKeyOrShouldBeSkipped(fieldContext, key, isValidServiceId)) { + // If a calendar#service_id has been skipped (it's listed in skippedIds), but there were + // valid service_ids found in calendar_dates, do not skip that record for both the + // calendar_date and any related trips. + if (fieldContext.nameEquals(SERVICE_ID) && isValidServiceId) { + LOG.warn("Not skipping valid service_id {} for {} {}", fieldContext.getValueToWrite(), table.name, keyValue); + } else { + String skippedKey = getTableScopedValue(keyValue); + if (orderField != null) { + skippedKey = String.join(":", skippedKey, getCsvValue(orderField)); + } + mergeFeedsResult.skippedIds.add(skippedKey); + return false; + } + } + // If the field is a foreign reference, check to see whether the reference has been + // remapped due to a conflicting ID from another feed (e.g., calendar#service_id). + if (mergeFeedsResult.remappedIds.containsKey(key)) { + mergeFeedsResult.remappedReferences++; + // If the value has been remapped update the value to write. + fieldContext.setValueToWrite(mergeFeedsResult.remappedIds.get(key)); + } + } + return true; + } + + private boolean serviceIdHasKeyOrShouldBeSkipped(FieldContext fieldContext, String key, boolean isValidServiceId) { + boolean serviceIdShouldBeSkipped = job.mergeType.equals(SERVICE_PERIOD) && + fieldContext.nameEquals(SERVICE_ID) && + !isValidServiceId; + return mergeFeedsResult.skippedIds.contains(key) || serviceIdShouldBeSkipped; + } + + + /** + * Overridable method whose default behavior below is to skip a record if it creates a duplicate id. + * @return false, if a failing condition was encountered. true, if everything was ok. + * @throws IOException Some overrides throw IOException. + */ + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) throws IOException { + return !hasDuplicateError(idErrors); + } + + private Set getIdErrors(FieldContext fieldContext) { + // If analyzing the second feed (active feed), the service_id always gets feed scoped. + // See https://github.com/ibi-group/datatools-server/issues/244 + String fieldValue = handlingActiveFeed && fieldContext.nameEquals(SERVICE_ID) + ? fieldContext.getValueToWrite() + : fieldContext.getValue(); + + return referenceTracker.checkReferencesAndUniqueness(keyValue, lineNumber, fieldContext.getField(), + fieldValue, table, keyField, orderField); + } + + protected boolean checkRoutesAndStopsIds(Set idErrors, FieldContext fieldContext) throws IOException { + boolean shouldSkipRecord = false; + // First, check uniqueness of primary key value (i.e., stop or route ID) + // in case the stop_code or route_short_name are being used. This + // must occur unconditionally because each record must be tracked + // by the reference tracker. + String primaryKeyValue = csvReader.get(getKeyFieldIndex()); + Set primaryKeyErrors = referenceTracker + .checkReferencesAndUniqueness(primaryKeyValue, lineNumber, fieldContext.getField(), fieldContext.getValue(), table); + // Merging will be based on route_short_name/stop_code in the active and future datasets. All + // matching route_short_names/stop_codes between the datasets shall be considered same route/stop. Any + // route_short_name/stop_code in active data not present in the future will be appended to the + // future routes/stops file. + if (useAltKey()) { + if (hasBlankPrimaryKey(fieldContext)) { + // If alt key is empty (which is permitted) and primary key is duplicate, skip + // checking of alt key dupe errors/re-mapping values and + // simply use the primary key (route_id/stop_id). + // + // Otherwise, allow the record to be written in output. + if (hasDuplicateError(primaryKeyErrors)) { + shouldSkipRecord = true; + } + } else if (hasDuplicateError(idErrors)) { + // If we encounter a route/stop that shares its alt. + // ID with a previous route/stop, we need to + // remap its route_id/stop_id field so that + // references point to the previous + // route_id/stop_id. For example, + // route_short_name in both feeds is "ABC" but + // each route has a different route_id (123 and + // 456). This block will map references to 456 to + // 123 so that ABC/123 is the route of record. + //////////////////////////////////////////////////////// + // Get current route/stop ID. (Note: primary + // ID index is always zero because we're + // iterating over the spec fields). + String currentPrimaryKey = rowValues[0]; + // Get unique key to check for remapped ID when + // writing values to file. + String key = getTableScopedValue(currentPrimaryKey); + // Extract the route/stop ID value used for the + // route/stop with already encountered matching + // short name/stop code. + String[] strings = rowValuesForStopOrRouteId.get( + String.join(":", keyField, fieldContext.getValue()) + ); + String keyForMatchingAltId = strings[0]; + if (!keyForMatchingAltId.equals(currentPrimaryKey)) { + // Remap this row's route_id/stop_id to ensure + // that referencing entities (trips, stop_times) + // have their references updated. + mergeFeedsResult.remappedIds.put(key, keyForMatchingAltId); + } + shouldSkipRecord = true; + } + // Next check for regular ID conflicts (e.g., on route_id or stop_id) because any + // conflicts here will actually break the feed. This essentially handles the case + // where two routes have different short_names, but share the same route_id. We want + // both of these routes to end up in the merged feed in this case because we're + // matching on short name, so we must modify the route_id. + if ( + !shouldSkipRecord && + !referenceTracker.transitIds.contains(String.join(":", keyField, keyValue)) && + hasDuplicateError(primaryKeyErrors) + ) { + // Modify route_id and ensure that referencing trips + // have route_id updated. + updateAndRemapOutput(fieldContext); + } + } else { + // Key field has defaulted to the standard primary key field + // (stop_id or route_id), which makes the check much + // simpler (just skip the duplicate record). + if (hasDuplicateError(idErrors)) { + shouldSkipRecord = true; + } + } + + String newAgencyId = getNewAgencyIdForFeed(); + if (newAgencyId != null && fieldContext.nameEquals(AGENCY_ID)) { + LOG.info( + "Updating route#agency_id to (auto-generated) {} for route={}", + newAgencyId, keyValue); + fieldContext.setValue(newAgencyId); + } + + return !shouldSkipRecord; + } + + private boolean hasBlankPrimaryKey(FieldContext fieldContext) { + return "".equals(keyValue) && fieldContext.nameEquals(table.getKeyFieldName()); + } + + private String getNewAgencyIdForFeed() { + return (handlingActiveFeed + ? feedMergeContext.active + : feedMergeContext.future + ).getNewAgencyId(); + } + + private boolean useAltKey() { + return keyField.equals("stop_code") || keyField.equals("route_short_name"); + } + + public boolean updateAgencyIdIfNeeded(FieldContext fieldContext) { + String newAgencyId = getNewAgencyIdForFeed(); + if (newAgencyId != null && fieldContext.nameEquals(AGENCY_ID) && job.mergeType.equals(REGIONAL)) { + if (fieldContext.getValue().equals("") && table.name.equals("agency") && lineNumber > 0) { + // If there is no agency_id value for a second (or greater) agency + // record, return null which will trigger a failed merge feed job. + job.failMergeJob(String.format( + "Feed %s has multiple agency records but no agency_id values.", + feed.version.id + )); + return false; + } + LOG.info("Updating {}#agency_id to (auto-generated) {} for ID {}", table.name, newAgencyId, keyValue); + fieldContext.setValue(newAgencyId); + } + return true; + } + + private void updateServiceIdsIfNeeded(FieldContext fieldContext) { + String fieldValue = fieldContext.getValue(); + if (table.name.equals(Table.TRIPS.name) && + fieldContext.nameEquals(SERVICE_ID) && + job.serviceIdsToCloneRenameAndExtend.contains(fieldValue) && + job.mergeType.equals(SERVICE_PERIOD) + ) { + // Future trip ids not in the active feed will not get the service id remapped, + // they will use the service id as defined in the future feed instead. + if (!(handlingFutureFeed && feedMergeContext.getFutureTripIdsNotInActiveFeed().contains(keyValue))) { + String newServiceId = getIdWithScope(fieldValue); + LOG.info("Updating {}#service_id to (auto-generated) {} for ID {}", table.name, newServiceId, keyValue); + fieldContext.setValueToWrite(newServiceId); + } + } + } + + public boolean storeRowAndStopValues() { + String newLine = String.join(",", rowValues); + switch (table.name) { + // Store row values for route or stop ID (or alternative ID field) in order + // to check for ID conflicts. NOTE: This is only intended to be used for + // routes and stops. Otherwise, this might (will) consume too much memory. + case "stops": + case "routes": + // FIXME: This should be revised for tables with order fields, but it should work fine for its + // primary purposes: to detect exact copy rows and to temporarily hold the data in case a reference + // needs to be looked up in order to remap an entity to that key. + // Here we need to get the key field index according to the spec + // table definition. Otherwise, if we use the keyFieldIndex variable + // defined above, we will be using the found fields index, which will + // cause major issues when trying to put and get values into the + // below map. + int fieldIndex = Field.getFieldIndex(sharedSpecFields.toArray(new Field[0]), keyField); + String key = String.join(":", keyField, rowValues[fieldIndex]); + rowValuesForStopOrRouteId.put(key, rowValues); + break; + case "transfers": + case "fare_rules": + case "directions": // GTFS+ table + if (!rowStrings.add(newLine)) { + // The line already exists in the output file, do not append it again. This prevents duplicate + // entries for certain files that do not contain primary keys (e.g., fare_rules and transfers) and + // do not otherwise have convenient ways to track uniqueness (like an order field). + // FIXME: add ordinal field/compound keys for transfers (from/to_stop_id) and fare_rules (?). + // Perhaps it makes sense to include all unique fare rules rows, but transfers that share the + // same from/to stop IDs but different transfer times or other values should not both be + // included in the merged feed (yet this strategy would fail to filter those out). + mergeFeedsResult.recordsSkipCount++; + return true; + } + break; + default: + // Do nothing. + break; + } + return false; + } + + /** + * Overridable placeholder for checking the first line of a file. + */ + public void checkFirstLineConditions() throws IOException { + // Default is to do nothing. + } + + /** + * Overridable placeholder for additional processing after writing the current row. + */ + public void afterRowWrite() throws IOException { + // Default is to do nothing. + } + + /** + * Overridable placeholder for additional processing after processing the table + * (whether any rows are available or not). + */ + public void afterTableRecords() throws IOException { + // Default is to do nothing. + } + + /** + * Overridable placeholder for checking internal table references. E.g. parent_station references stop_id. It is + * illegal to have a self reference within a {@link Table} configuration. + */ + public void checkFieldsForReferences(FieldContext fieldContext) { + // Default is to do nothing. + } + + public void scopeValueIfNeeded(FieldContext fieldContext) { + boolean isKeyField = fieldContext.getField().isForeignReference() || fieldContext.nameEquals(keyField); + if (job.mergeType.equals(REGIONAL) && isKeyField && !fieldContext.getValue().isEmpty()) { + // For regional merge, if field is a GTFS identifier (e.g., route_id, + // stop_id, etc.), add scoped prefix. + fieldContext.setValueToWrite(getIdWithScope(fieldContext.getValue())); + } + } + + public void initializeRowValues() { + // Re-initialize skipRecord to false for next row. + skipRecord = false; + // Reset the row values (this must happen after the first line is checked). + rowValues = new String[sharedSpecFields.size()]; + originalRowValues = new String[sharedSpecFields.size()]; + } + + public void writeValuesToTable(String[] values, boolean incrementLineNumbers) throws IOException { + writer.write(values); + if (incrementLineNumbers) { + lineNumber++; + mergedLineNumber++; + } + } + + public void flushAndClose() throws IOException { + writer.flush(); + out.closeEntry(); + } + + private void writeHeaders() throws IOException { + // Create entry for zip file. + ZipEntry tableEntry = new ZipEntry(table.name + ".txt"); + out.putNextEntry(tableEntry); + // Write headers to table. + String[] headers = sharedSpecFields.stream() + .map(f -> f.name) + .toArray(String[]::new); + writeValuesToTable(headers, false); + + headersWritten = true; + } + + /** + * Constructs a new row value. + * @return false, if a failing condition was encountered. true, if everything was ok. + */ + public boolean constructRowValues() throws IOException { + boolean result = true; + // Piece together the row to write, which should look practically identical to the original + // row except for the identifiers receiving a prefix to avoid ID conflicts. + for (int specFieldIndex = 0; specFieldIndex < sharedSpecFields.size(); specFieldIndex++) { + Field field = sharedSpecFields.get(specFieldIndex); + // Default value to write is unchanged from value found in csv (i.e. val). Note: if looking to + // modify the value that is written in the merged file, you must update valueToWrite (e.g., + // updating this feed's end_date or accounting for cases where IDs conflict). + FieldContext fieldContext = new FieldContext( + field, + csvReader.get(fieldsFoundList.indexOf(field)) + ); + originalRowValues[specFieldIndex] = fieldContext.getValueToWrite(); + if (!skipRecord) { + // Handle filling in agency_id if missing when merging regional feeds. If false is returned, + // the job has encountered a failing condition (the method handles failing the job itself). + if (!updateAgencyIdIfNeeded(fieldContext)) { + result = false; + } + // Determine if field is a GTFS identifier (and scope if needed). + scopeValueIfNeeded(fieldContext); + // Only need to check for merge conflicts if using MTC merge type because + // the regional merge type scopes all identifiers by default. Also, the + // reference tracker will get far too large if we attempt to use it to + // track references for a large number of feeds (e.g., every feed in New + // York State). + if (job.mergeType.equals(SERVICE_PERIOD)) { + // Remap service id from active feed to distinguish them + // from entries with the same id in the future feed. + // See https://github.com/ibi-group/datatools-server/issues/244 + if (handlingActiveFeed && fieldContext.nameEquals(SERVICE_ID)) { + updateAndRemapOutput(fieldContext); + } + + updateServiceIdsIfNeeded(fieldContext); + + // Store values for key fields that have been encountered and update any key values that need modification due + // to conflicts. + if (!checkFieldsForMergeConflicts(getIdErrors(fieldContext), fieldContext)) { + skipRecord = true; + continue; + } + } else if (job.mergeType.equals(REGIONAL)) { + // If merging feed versions from different agencies, the reference id is updated to avoid conflicts. + // e.g. stop_id becomes Fake_Agency2:123 instead of 123. This method allows referencing fields to be + // updated to the newer id. + checkFieldsForReferences(fieldContext); + } + + // If the current field is a foreign reference, check if the reference has been removed in the + // merged result. If this is the case (or other conditions are met), we will need to skip this + // record. Likewise, if the reference has been modified, ensure that the value written to the + // merged result is correctly updated. + if (!checkForeignReferences(fieldContext)) { + skipRecord = true; + continue; + } + rowValues[specFieldIndex] = fieldContext.getValueToWrite(); + } + } + return result; + } + + private void finishRowAndWriteToZip() throws IOException { + boolean shouldWriteCurrentRow = true; + // Do not write rows that are designated to be skipped. + if (skipRecord && job.mergeType.equals(SERVICE_PERIOD)) { + mergeFeedsResult.recordsSkipCount++; + shouldWriteCurrentRow = false; + } + // Store row and stop values. If the return value is true, the record has been skipped and we + // should skip writing the row to the merged table. + if (storeRowAndStopValues()) { + shouldWriteCurrentRow = false; + } + + // Finally, handle writing lines to zip entry. + if (mergedLineNumber == 0 && !headersWritten) { + writeHeaders(); + } + + if (shouldWriteCurrentRow) { + // Write line to table. + writeValuesToTable(rowValues, true); + } + + // Optional table-specific additional processing. + afterRowWrite(); + } + + public boolean lineIsBlank() throws IOException { + if (csvReader.getValues().length == 1) { + LOG.warn("Found blank line. Skipping..."); + return true; + } + return false; + } + + public boolean isHandlingActiveFeed() { + return handlingActiveFeed; + } + + public boolean isHandlingFutureFeed() { + return handlingFutureFeed; + } + + protected CsvReader getCsvReader() { + return csvReader; + } + + protected int getFieldIndex(String fieldName) { + return Field.getFieldIndex(fieldsFoundInZip, fieldName); + } + + /** + * Generate ID prefix to scope GTFS identifiers to avoid conflicts. + */ + private String makeIdScope(FeedVersion version) { + return getCleanName(feedSource.name) + version.version; + } + + /** Get table-scoped value used for key when remapping references for a particular feed. */ + protected String getTableScopedValue(Table table, String id) { + return String.join( + ":", + table.name, + idScope, + id + ); + } + + /** Shorthand for above using ambient table. */ + protected String getTableScopedValue(String id) { + return getTableScopedValue(table, id); + } + + /** + * Obtains the id scope to use for cloned items. + * It is set to the id scope corresponding to the future feed. + */ + protected String getClonedIdScope() { + return makeIdScope(feedMergeContext.future.feedToMerge.version); + } + + protected int getFeedIndex() { return feedIndex; } + + protected int getLineNumber() { + return lineNumber; + } + + protected String[] getOriginalRowValues() { return originalRowValues; } + + /** + * Retrieves the value for the specified CSV field. + */ + protected String getCsvValue(String fieldName) throws IOException { + int fieldIndex = getFieldIndex(fieldName); + return csvReader.get(fieldIndex); + } + + /** + * Retrieves the value for the specified CSV field as {@link LocalDate}. + */ + protected LocalDate getCsvDate(String fieldName) throws IOException { + return LocalDate.parse(getCsvValue(fieldName), GTFS_DATE_FORMATTER); + } + + /** + * Updates output for the current field and remaps the record id. + */ + protected void updateAndRemapOutput(FieldContext fieldContext, boolean updateKeyValue) { + String value = fieldContext.getValue(); + String valueToWrite = getIdWithScope(value); + fieldContext.setValueToWrite(valueToWrite); + if (updateKeyValue) { + keyValue = valueToWrite; + } + mergeFeedsResult.remappedIds.put( + getTableScopedValue(value), + valueToWrite + ); + } + + /** + * Shorthand for the above method. + */ + protected void updateAndRemapOutput(FieldContext fieldContext) { + updateAndRemapOutput(fieldContext,false); + } + + /** + * Add the specified field once record reading has started. + */ + protected void addField(Field field) { + List fieldsList = new ArrayList<>(Arrays.asList(fieldsFoundInZip)); + fieldsList.add(field); + fieldsFoundInZip = fieldsList.toArray(fieldsFoundInZip); + allFields.add(field); + fieldsFoundList = Arrays.asList(fieldsFoundInZip); + } + + /** + * Helper method to get the key field position. + */ + protected int getKeyFieldIndex() { + return table.getKeyFieldIndex(fieldsFoundInZip); + } + + /** + * Helper method that determines whether a service id for the + * current calendar-related table is unused or not. + */ + protected boolean isServiceIdUnused() { + boolean isUnused = false; + FeedContext feedContext = handlingActiveFeed ? feedMergeContext.active : feedMergeContext.future; + + if (feedContext.getServiceIdsToRemove().contains(keyValue)) { + String activeOrFuture = handlingActiveFeed ? "active" : "future"; + LOG.warn( + "Skipping {} {} entry {} because it will become unused in the merged feed.", + activeOrFuture, + table.name, + keyValue + ); + + mergeFeedsResult.skippedIds.add(getTableScopedValue(keyValue)); + + isUnused = true; + } + + return isUnused; + } + + /** + * Adds a cloned service id for trips with the same signature in both the active & future feeds. + * The cloned service id spans from the start date in the active feed until the end date in the future feed. + * If dealing with the calendar table, this will update the start_date field accordingly. + */ + public void addClonedServiceId() throws IOException { + if (isHandlingFutureFeed() && job.mergeType.equals(SERVICE_PERIOD)) { + String originalServiceId = keyValue; + if (job.serviceIdsToCloneRenameAndExtend.contains(originalServiceId)) { + String[] clonedValues = getOriginalRowValues().clone(); + String newServiceId = clonedValues[keyFieldIndex] = getIdWithScope(originalServiceId); + + if (table.name.equals(Table.CALENDAR.name)) { + // Modify start date only (preserve the end date from the future calendar entry). + int startDateIndex = Table.CALENDAR.getFieldIndex("start_date"); + clonedValues[startDateIndex] = feedMergeContext.active.feed.calendars.get(originalServiceId) + .start_date.format(GTFS_DATE_FORMATTER); + } + + referenceTracker.checkReferencesAndUniqueness( + keyValue, + getLineNumber(), + table.fields[0], + newServiceId, + table, + keyField, + table.getOrderFieldName() + ); + writeValuesToTable(clonedValues, true); + } + } + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeStrategy.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeStrategy.java new file mode 100644 index 000000000..c3b783152 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeStrategy.java @@ -0,0 +1,34 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +/** + * This enum defines the different strategies for merging, which is currently dependent on whether trip_ids and/or + * service_ids between the two feeds are exactly matching. + */ +public enum MergeStrategy { + /** + * If service_ids and trip_ids between active and future feed are all unique, all IDs shall be included + * in merged feed. If a service_id from the active calendar has end_date in the future, the end_date shall be + * set to one day prior to the earliest start_date in future dataset before appending the calendar record to + * the merged file. It shall be ensured that trip_ids between active and future datasets must not match. + */ + DEFAULT, + /** + * If service_ids in active and future feed exactly match but only some of the trip_ids match then the merge + * strategy shall handle the following three cases: + * - *trip_id in both feeds*: The service shall start from the data merge date and end at the future feed’s service + * end date. + * Note: The merge process shall validate records in stop_times.txt file for same trip signature (same set of + * stops with same sequence). Trips with matching stop_times will be included as is (but not duplicated of course). + * Trips that do not match on stop_times will be handled with the below approaches. + * Note: Same service IDs shall be used (but extended to account for the full range of dates from active to future). + * - *trip_id in active feed*: A new service shall be created starting from the merge date and expiring at the end + * of active service period. + * Note: a new service_id will be generated for these active trips in the merged feed (rather than using the + * service_id with extended range). + * - *trip_id in future feed*: A new service shall be created for these trips with service period defined in future + * feed + * Note: a new service_id will be generated for these future trips in the merged feed (rather than using the + * service_id with extended range). + */ + CHECK_STOP_TIMES +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/RoutesMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/RoutesMergeLineContext.java new file mode 100644 index 000000000..276979c79 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/RoutesMergeLineContext.java @@ -0,0 +1,20 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Table; + +import java.io.IOException; +import java.util.Set; +import java.util.zip.ZipOutputStream; + +public class RoutesMergeLineContext extends MergeLineContext { + public RoutesMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) throws IOException { + return checkRoutesAndStopsIds(idErrors, fieldContext); + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/ShapesMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/ShapesMergeLineContext.java new file mode 100644 index 000000000..07af45349 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/ShapesMergeLineContext.java @@ -0,0 +1,64 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Table; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.hasDuplicateError; + +public class ShapesMergeLineContext extends MergeLineContext { + // Track shape_ids found in future feed in order to check for conflicts with active feed (MTC only). + private final Set shapeIdsInFutureFeed = new HashSet<>(); + + public ShapesMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) { + return checkShapeIds(idErrors, fieldContext); + } + + private boolean checkShapeIds(Set idErrors, FieldContext fieldContext) { + boolean shouldSkipRecord = false; + // If a shape_id is found in both future and active datasets, all shape points from + // the active dataset must be feed-scoped. Otherwise, the merged dataset may contain + // shape_id:shape_pt_sequence values from both datasets (e.g., if future dataset contains + // sequences 1,2,3,10 and active contains 1,2,7,9,10; the merged set will contain + // 1,2,3,7,9,10). + if (fieldContext.nameEquals("shape_id")) { + String val = fieldContext.getValue(); + if (isHandlingFutureFeed()) { + // Track shape_id if working on future feed. + shapeIdsInFutureFeed.add(val); + } else if (shapeIdsInFutureFeed.contains(val)) { + // For the active feed, if the shape_id was already processed from the + // future feed, we need to add the feed-scope to avoid weird, hybrid shapes + // with points from both feeds. + updateAndRemapOutput(fieldContext,true); + // Re-check refs and uniqueness after changing shape_id value. (Note: this + // probably won't have any impact, but there's not much harm in including it.) + idErrors = referenceTracker + .checkReferencesAndUniqueness( + keyValue, + getLineNumber(), + fieldContext.getField(), + fieldContext.getValueToWrite(), + table, + keyField, + table.getOrderFieldName()); + } + } + // Skip record if normal duplicate errors are found. + if (hasDuplicateError(idErrors)) { + shouldSkipRecord = true; + } + + return !shouldSkipRecord; + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/StopsMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/StopsMergeLineContext.java new file mode 100644 index 000000000..4644c8c3a --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/StopsMergeLineContext.java @@ -0,0 +1,130 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.Table; +import com.csvreader.CsvReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Set; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.stopCodeFailureMessage; + +public class StopsMergeLineContext extends MergeLineContext { + private static final Logger LOG = LoggerFactory.getLogger(StopsMergeLineContext.class); + + private boolean stopCodeMissingFromFutureFeed = false; + + public StopsMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public void checkFirstLineConditions() throws IOException { + checkThatStopCodesArePopulatedWhereRequired(); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) throws IOException { + return checkRoutesAndStopsIds(idErrors, fieldContext); + } + + @Override + public void checkFieldsForReferences(FieldContext fieldContext) { + updateParentStationReference(fieldContext); + } + + /** + * If there is a parent station reference, update to include the scope stop_id. + */ + private void updateParentStationReference(FieldContext fieldContext) { + if (fieldContext.nameEquals("parent_station")) { + String parentStation = fieldContext.getValue(); + if (!"".equals(parentStation)) { + LOG.debug("Updating parent station to: {}", getIdWithScope(parentStation)); + updateAndRemapOutput(fieldContext); + } + } + } + + /** + * Checks that the stop_code field of the Stop entities to merge is populated where required. + * @throws IOException + */ + private void checkThatStopCodesArePopulatedWhereRequired() throws IOException { + if (shouldCheckStopCodes()) { + // Before reading any lines in stops.txt, first determine whether all records contain + // properly filled stop_codes. The rules governing this logic are as follows: + // 1. Stops with location_type greater than 0 (i.e., anything but 0 or empty) are permitted + // to have empty stop_codes (even if there are other stops in the feed that have + // stop_code values). This is because these location_types represent special entries + // that are either stations, entrances/exits, or generic nodes (e.g., for + // pathways.txt). + // 2. For regular stops (location_type = 0 or empty), all or none of the stops must + // contain stop_codes. Otherwise, the merge feeds job will be failed. + int stopsMissingStopCodeCount = 0; + int stopsCount = 0; + int specialStopsCount = 0; + int locationTypeIndex = getFieldIndex("location_type"); + int stopCodeIndex = getFieldIndex("stop_code"); + // Get special stops reader to iterate over every stop and determine if stop_code values + // are present. + CsvReader stopsReader = table.getCsvReader(feed.zipFile, null); + while (stopsReader.readRecord()) { + stopsCount++; + // Special stop records (i.e., a station, entrance, or anything with + // location_type > 0) do not need to specify stop_code. Other stops should. + String stopCode = stopsReader.get(stopCodeIndex); + boolean stopCodeIsMissing = "".equals(stopCode); + String locationType = stopsReader.get(locationTypeIndex); + if (isSpecialStop(locationType)) specialStopsCount++; + else if (stopCodeIsMissing) stopsMissingStopCodeCount++; + } + stopsReader.close(); + LOG.info("total stops: {}", stopsCount); + LOG.info("stops missing stop_code: {}", stopsMissingStopCodeCount); + if (stopsMissingStopCodeCount + specialStopsCount == stopsCount) { + // If all stops are missing stop_code (taking into account the special stops that do + // not require stop_code), we simply default to merging on stop_id. + LOG.warn( + "stop_code is not present in file {}/{}. Reverting to stop_id", + getFeedIndex() + 1, feedMergeContext.feedsToMerge.size()); + // If the key value for stop_code is not present, revert to stop_id. + keyField = table.getKeyFieldName(); + keyFieldIndex = getKeyFieldIndex(); + keyValue = getCsvReader().get(keyFieldIndex); + // When all stops missing stop_code for the first feed, there's nothing to do (i.e., + // no failure condition has been triggered yet). Just indicate this in the flag and + // proceed with the merge. + if (isHandlingFutureFeed()) { + stopCodeMissingFromFutureFeed = true; + } else if (!stopCodeMissingFromFutureFeed) { + // However... if the second feed was missing stop_codes and the first feed was not, + // fail the merge job. + job.failMergeJob( + stopCodeFailureMessage(stopsMissingStopCodeCount, stopsCount, specialStopsCount) + ); + } + } else if (stopsMissingStopCodeCount > 0) { + // If some, but not all, stops are missing stop_code, the merge feeds job must fail. + job.failMergeJob( + stopCodeFailureMessage(stopsMissingStopCodeCount, stopsCount, specialStopsCount) + ); + } + } + } + + private boolean shouldCheckStopCodes() { + return job.mergeType.equals(SERVICE_PERIOD); + } + + /** Determine if stop is "special" via its locationType. I.e., a station, entrance, (location_type > 0). */ + private boolean isSpecialStop(String locationType) { + return !"".equals(locationType) && !"0".equals(locationType); + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/TripsMergeLineContext.java b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/TripsMergeLineContext.java new file mode 100644 index 000000000..859708bb7 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/TripsMergeLineContext.java @@ -0,0 +1,50 @@ +package com.conveyal.datatools.manager.jobs.feedmerge; + +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Table; + +import java.io.IOException; +import java.util.Set; +import java.util.zip.ZipOutputStream; + +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; +import static com.conveyal.datatools.manager.utils.MergeFeedUtils.hasDuplicateError; + +public class TripsMergeLineContext extends MergeLineContext { + public TripsMergeLineContext(MergeFeedsJob job, Table table, ZipOutputStream out) throws IOException { + super(job, table, out); + } + + @Override + public boolean checkFieldsForMergeConflicts(Set idErrors, FieldContext fieldContext) { + return checkTripIds(idErrors, fieldContext); + } + + private boolean checkTripIds(Set idErrors, FieldContext fieldContext) { + // For the MTC revised feed merge process, + // the updated logic requires to insert all trips from both the active and future feed, + // except if they are present in both, in which case + // we only insert the trip entry from the future feed and skip the one in the active feed. + boolean shouldSkipRecord = + job.mergeType.equals(SERVICE_PERIOD) && + isHandlingActiveFeed() && + job.sharedTripIdsWithConsistentSignature.contains(keyValue); + + // Remap duplicate trip ids for records that are not skipped. + if (!shouldSkipRecord && hasDuplicateError(idErrors)) { + updateAndRemapOutput(fieldContext, true); + } + + // Remove remapped service_ids associated to the trips table from the merge summary + // (the remapped id is already listed under the calendar/calendar_dates tables, + // so there is no need to add that foreign key again). + if (fieldContext.nameEquals(SERVICE_ID)) { + mergeFeedsResult.remappedIds.remove( + getTableScopedValue(fieldContext.getValue()) + ); + } + + return !shouldSkipRecord; + } +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/models/Deployment.java b/src/main/java/com/conveyal/datatools/manager/models/Deployment.java index 8541d5ddc..db2ce0a7c 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/Deployment.java +++ b/src/main/java/com/conveyal/datatools/manager/models/Deployment.java @@ -74,8 +74,7 @@ public class Deployment extends Model implements Serializable { private ObjectMapper otpConfigMapper = new ObjectMapper().setSerializationInclusion(Include.NON_NULL); /* Pelias fields, used to determine where/if to send data to the Pelias webhook */ - public String peliasWebhookUrl; - public boolean peliasUpdate; + public boolean peliasResetDb; public List peliasCsvFiles = new ArrayList<>(); /** diff --git a/src/main/java/com/conveyal/datatools/manager/models/FeedSource.java b/src/main/java/com/conveyal/datatools/manager/models/FeedSource.java index 612e45049..4aa8b73c9 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/FeedSource.java +++ b/src/main/java/com/conveyal/datatools/manager/models/FeedSource.java @@ -18,6 +18,8 @@ import com.conveyal.datatools.manager.models.transform.FeedTransformation; import com.conveyal.datatools.manager.persistence.Persistence; import com.conveyal.datatools.manager.utils.JobUtils; +import com.conveyal.datatools.manager.utils.connections.ConnectionResponse; +import com.conveyal.datatools.manager.utils.connections.HttpURLConnectionResponse; import com.conveyal.gtfs.GTFS; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -45,6 +47,7 @@ import java.util.Objects; import java.util.stream.Collectors; +import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.FETCHED_AUTOMATICALLY; import static com.conveyal.datatools.manager.utils.StringUtils.getCleanName; import static com.mongodb.client.model.Filters.and; import static com.mongodb.client.model.Filters.eq; @@ -102,6 +105,12 @@ public String organizationId () { /** Is this feed deployable? */ public boolean deployable; + /** + * Determines whether this feed will be auto-published (e.g. after fetching a new version) + * if no blocking errors are found (requires MTC extension). + */ + public boolean autoPublish; + /** * How do we receive this feed? */ @@ -208,8 +217,46 @@ public FeedVersion fetch (MonitorableJob.Status status, String optionalUrlOverri // We create a new FeedVersion now, so that the fetched date is (milliseconds) before // fetch occurs. That way, in the highly unlikely event that a feed is updated while we're // fetching it, we will not miss a new feed. - FeedVersion version = new FeedVersion(this, FeedRetrievalMethod.FETCHED_AUTOMATICALLY); + FeedVersion version = new FeedVersion(this, FETCHED_AUTOMATICALLY); + + // Get latest version to check that the fetched version does not duplicate a feed already loaded. + FeedVersion latest = retrieveLatest(); + + HttpURLConnection conn = makeHttpURLConnection(status, optionalUrlOverride, getModifiedThreshold(latest)); + if (conn == null) return null; + + try { + conn.connect(); + return processFetchResponse(status, optionalUrlOverride, version, latest, new HttpURLConnectionResponse(conn)); + } catch (IOException e) { + String message = String.format("Unable to connect to %s; not fetching %s feed", conn.getURL(), this.name); // url, this.name); + LOG.error(message); + status.fail(message); + e.printStackTrace(); + return null; + } + } + /** + * Computes the modified time to set to the HttpURLConnection + * so that if a version has not been published since the last fetch, + * then download can be skipped. + * @return The computed threshold if the latest feed version exists and was auto-fetched + * and there is a record for the last fetch action, null otherwise. + */ + private Long getModifiedThreshold(FeedVersion latest) { + Long modifiedThreshold = null; + // lastFetched is set to null when the URL changes and when latest feed version is deleted + if (latest != null && latest.retrievalMethod.equals(FETCHED_AUTOMATICALLY) && this.lastFetched != null) { + modifiedThreshold = Math.min(latest.updated.getTime(), this.lastFetched.getTime()); + } + return modifiedThreshold; + } + + /** + * Builds an {@link HttpURLConnection}. + */ + private HttpURLConnection makeHttpURLConnection(MonitorableJob.Status status, String optionalUrlOverride, Long modifiedThreshold) { // build the URL from which to fetch URL url = null; try { @@ -237,18 +284,27 @@ public FeedVersion fetch (MonitorableJob.Status status, String optionalUrlOverri } conn.setDefaultUseCaches(true); - // Get latest version to check that the fetched version does not duplicate a feed already loaded. - FeedVersion latest = retrieveLatest(); - // lastFetched is set to null when the URL changes and when latest feed version is deleted - if (latest != null && this.lastFetched != null) - conn.setIfModifiedSince(Math.min(latest.updated.getTime(), this.lastFetched.getTime())); - File newGtfsFile; + if (modifiedThreshold != null) conn.setIfModifiedSince(modifiedThreshold); + + return conn; + } + /** + * Processes the given fetch response. + * @return true if a new FeedVersion was created from the response, false otherwise. + */ + public FeedVersion processFetchResponse( + MonitorableJob.Status status, + String optionalUrlOverride, + FeedVersion version, + FeedVersion latest, + ConnectionResponse response + ) { + File newGtfsFile; try { - conn.connect(); String message; - int responseCode = conn.getResponseCode(); + int responseCode = response.getResponseCode(); LOG.info("Fetch feed response code={}", responseCode); switch (responseCode) { case HttpURLConnection.HTTP_NOT_MODIFIED: @@ -263,17 +319,17 @@ public FeedVersion fetch (MonitorableJob.Status status, String optionalUrlOverri status.update(message, 75.0); // Create new file from input stream (this also handles hashing the file and other version fields // calculated from the GTFS file. - newGtfsFile = version.newGtfsFile(conn.getInputStream()); + newGtfsFile = version.newGtfsFile(response.getInputStream()); break; case HttpURLConnection.HTTP_MOVED_TEMP: case HttpURLConnection.HTTP_MOVED_PERM: case HttpURLConnection.HTTP_SEE_OTHER: // Get redirect url from "location" header field - String newUrl = conn.getHeaderField("Location"); + String redirectUrl = response.getRedirectUrl(); if (optionalUrlOverride != null) { // Only permit recursion one level deep. If more than one redirect is detected, fail the job and // suggest that user try again with new URL. - message = String.format("More than one redirects for fetch URL detected. Please try fetch again with latest URL: %s", newUrl); + message = String.format("More than one redirects for fetch URL detected. Please try fetch again with latest URL: %s", redirectUrl); LOG.error(message); status.fail(message); return null; @@ -281,13 +337,13 @@ public FeedVersion fetch (MonitorableJob.Status status, String optionalUrlOverri // If override URL is null, this is the zeroth fetch. Recursively call fetch, but only one time // to prevent multiple (possibly infinite?) redirects. Any more redirects than one should // probably be met with user action to update the fetch URL. - LOG.info("Recursively calling fetch feed with new URL: {}", newUrl); - return fetch(status, newUrl); + LOG.info("Recursively calling fetch feed with new URL: {}", redirectUrl); + return fetch(status, redirectUrl); } default: // Any other HTTP codes result in failure. // FIXME Are there "success" codes we're not accounting for? - message = String.format("HTTP status (%d: %s) retrieving %s feed", responseCode, conn.getResponseMessage(), this.name); + message = String.format("HTTP status (%d: %s) retrieving %s feed", responseCode, response.getResponseMessage(), this.name); LOG.error(message); status.fail(message); return null; @@ -299,7 +355,7 @@ public FeedVersion fetch (MonitorableJob.Status status, String optionalUrlOverri e.printStackTrace(); return null; } - if (latest != null && version.hash.equals(latest.hash)) { + if (version.isSameAs(latest)) { // If new version hash equals the hash for the latest version, do not error. Simply indicate that server // operators should add If-Modified-Since support to avoid wasting bandwidth. String message = String.format("Feed %s was fetched but has not changed; server operators should add If-Modified-Since support to avoid wasting bandwidth", this.name); @@ -319,7 +375,7 @@ public FeedVersion fetch (MonitorableJob.Status status, String optionalUrlOverri Persistence.feedSources.updateField(this.id, "lastFetched", version.updated); // Set file timestamp according to last modified header from connection - version.fileTimestamp = conn.getLastModified(); + version.fileTimestamp = response.getLastModified(); String message = String.format("Fetch complete for %s", this.name); LOG.info(message); status.completeSuccessfully(message); diff --git a/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java b/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java index 657d90d33..456a7d3b5 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java +++ b/src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java @@ -407,10 +407,34 @@ private boolean hasFeedVersionExpired() { * @return whether high severity error types have been flagged. */ private boolean hasHighSeverityErrorTypes() { - Set highSeverityErrorTypes = Stream.of(NewGTFSErrorType.values()) - .filter(type -> type.priority == Priority.HIGH) - .map(NewGTFSErrorType::toString) - .collect(Collectors.toSet()); + return hasSpecificErrorTypes(Stream.of(NewGTFSErrorType.values()) + .filter(type -> type.priority == Priority.HIGH)); + } + + /** + * Checks for issues that block feed publishing, consistent with UI. + */ + public boolean hasBlockingIssuesForPublishing() { + if (this.validationResult.fatalException != null) return true; + + return hasSpecificErrorTypes(Stream.of( + NewGTFSErrorType.ILLEGAL_FIELD_VALUE, + NewGTFSErrorType.MISSING_COLUMN, + NewGTFSErrorType.REFERENTIAL_INTEGRITY, + NewGTFSErrorType.SERVICE_WITHOUT_DAYS_OF_WEEK, + NewGTFSErrorType.TABLE_MISSING_COLUMN_HEADERS, + NewGTFSErrorType.TABLE_IN_SUBDIRECTORY, + NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS + )); + } + + /** + * Determines whether this feed has specific error types. + */ + private boolean hasSpecificErrorTypes(Stream errorTypes) { + Set highSeverityErrorTypes = errorTypes + .map(NewGTFSErrorType::toString) + .collect(Collectors.toSet()); try (Connection connection = GTFS_DATA_SOURCE.getConnection()) { String sql = String.format("select distinct error_type from %s.errors", namespace); PreparedStatement preparedStatement = connection.prepareStatement(sql); @@ -427,6 +451,7 @@ private boolean hasHighSeverityErrorTypes() { // is invalid for one reason or another. return true; } + return false; } @@ -506,4 +531,14 @@ public void assignGtfsFileAttributes(File newGtfsFile, Long lastModifiedOverride public void assignGtfsFileAttributes(File newGtfsFile) { assignGtfsFileAttributes(newGtfsFile, null); } + + /** + * Determines whether this feed version matches another one specified, i.e., + * whether the otherVersion doesn't have a different hash, thus has not changed, compared to this one. + * @param otherVersion The version to compare the hash to. + * @return true if the otherVersion hash is the same, false if the hashes differ or the otherVersion is null. + */ + public boolean isSameAs(FeedVersion otherVersion) { + return otherVersion != null && this.hash.equals(otherVersion.hash); + } } diff --git a/src/main/java/com/conveyal/datatools/manager/models/Project.java b/src/main/java/com/conveyal/datatools/manager/models/Project.java index 9b7cbde17..28b3d677e 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/Project.java +++ b/src/main/java/com/conveyal/datatools/manager/models/Project.java @@ -90,6 +90,12 @@ public List availableOtpServers() { */ public String regionalFeedSourceId; + /** + * Webhook URL for the Pelias webhook endpoint, used during Pelias deployment. + */ + public String peliasWebhookUrl; + + public Project() { this.buildConfig = new OtpBuildConfig(); this.routerConfig = new OtpRouterConfig(); diff --git a/src/main/java/com/conveyal/datatools/manager/utils/ErrorUtils.java b/src/main/java/com/conveyal/datatools/manager/utils/ErrorUtils.java index a3dd840ab..19eba4ee3 100644 --- a/src/main/java/com/conveyal/datatools/manager/utils/ErrorUtils.java +++ b/src/main/java/com/conveyal/datatools/manager/utils/ErrorUtils.java @@ -7,6 +7,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; import java.util.Map; /** @@ -32,6 +33,21 @@ public static void reportToBugsnag(Throwable e, Auth0UserProfile userProfile) { reportToBugsnag(e, null, userProfile); } + /** + * Log an error, and create and send a report to bugsnag if configured. + * + * @param e The throwable object to send to Bugsnag. This MUST be provided or a report will not be generated. + * @param sourceApp The application generating the message (datatools, otp-runner, ...). + * @param message The message to log and to send to Bugsnag. + * @param userProfile An optional user profile. If provided, the email address from this profile will be set in the + * Bugsnag report. + */ + public static void reportToBugsnag(Throwable e, String sourceApp, String message, Auth0UserProfile userProfile) { + Map debuggingMessages = new HashMap<>(); + debuggingMessages.put(sourceApp + " message", message); + reportToBugsnag(e, debuggingMessages, userProfile); + } + /** * Create and send a report to bugsnag if configured. * @@ -77,4 +93,4 @@ public static void initialize() { bugsnag = new Bugsnag(bugsnagKey); } } -} \ No newline at end of file +} diff --git a/src/main/java/com/conveyal/datatools/manager/utils/MergeFeedUtils.java b/src/main/java/com/conveyal/datatools/manager/utils/MergeFeedUtils.java new file mode 100644 index 000000000..0205ef003 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/utils/MergeFeedUtils.java @@ -0,0 +1,229 @@ +package com.conveyal.datatools.manager.utils; + +import com.conveyal.datatools.manager.DataManager; +import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.jobs.feedmerge.FeedToMerge; +import com.conveyal.datatools.manager.jobs.MergeFeedsJob; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType; +import com.conveyal.datatools.manager.models.FeedRetrievalMethod; +import com.conveyal.datatools.manager.models.FeedSource; +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.datatools.manager.models.Project; +import com.conveyal.datatools.manager.persistence.Persistence; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.NewGTFSErrorType; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.Table; +import com.conveyal.gtfs.model.StopTime; +import com.csvreader.CsvReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.zip.ZipFile; + +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.REGIONAL; +import static com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType.SERVICE_PERIOD; +import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.REGIONAL_MERGE; +import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.SERVICE_PERIOD_MERGE; +import static com.conveyal.gtfs.loader.Field.getFieldIndex; + +public class MergeFeedUtils { + private static final Logger LOG = LoggerFactory.getLogger(MergeFeedUtils.class); + + /** + * Get the ids (e.g., trip_id, service_id) for the provided table from the zipfile. + */ + public static Set getIdsForTable(ZipFile zipFile, Table table) throws IOException { + Set ids = new HashSet<>(); + String keyField = table.getKeyFieldName(); + CsvReader csvReader = table.getCsvReader(zipFile, null); + if (csvReader == null) { + LOG.warn("Table {} not found in zip file: {}", table.name, zipFile.getName()); + return ids; + } + try { + Field[] fieldsFoundInZip = table.getFieldsFromFieldHeaders(csvReader.getHeaders(), null); + // Get the key field (id value) for each row. + int keyFieldIndex = getFieldIndex(fieldsFoundInZip, keyField); + while (csvReader.readRecord()) { + ids.add(csvReader.get(keyFieldIndex)); + } + } finally { + csvReader.close(); + } + return ids; + } + + /** + * Construct stop_code failure message for {@link com.conveyal.datatools.manager.jobs.MergeFeedsJob} in the case of + * incomplete stop_code values for all records. + */ + public static String stopCodeFailureMessage(int stopsMissingStopCodeCount, int stopsCount, int specialStopsCount) { + return String.format( + "If stop_code is provided for some stops (for those with location_type = " + + "empty or 0), all stops must have stop_code values. The merge process " + + "found %d of %d total stops that were incorrectly missing stop_code values. " + + "Note: \"special\" stops with location_type > 0 need not specify this value " + + "(%d special stops found in feed).", + stopsMissingStopCodeCount, + stopsCount, + specialStopsCount + ); + } + + /** + * Collect zipFiles for each feed version before merging tables. + * Note: feed versions are sorted by first calendar date so that future dataset is iterated over first. This is + * required for the MTC merge strategy which prefers entities from the future dataset over active feed entities. + */ + public static List collectAndSortFeeds(Set feedVersions, Auth0UserProfile owner) { + return feedVersions.stream() + .map(version -> { + try { + return new FeedToMerge(version); + } catch (Exception e) { + LOG.error("Could not create zip file for version: {}", version.version); + ErrorUtils.reportToBugsnag(e, owner); + return null; + } + }) + // Filter out any feeds that do not have zip files (see above try/catch) and feeds that were never fully + // validated (which suggests that they would break things during validation). + .filter(Objects::nonNull) + .filter( + entry -> entry.version.validationResult != null + && entry.version.validationResult.firstCalendarDate != null + ) + // MTC-specific sort mentioned in above comment. + // TODO: If another merge strategy requires a different sort order, a merge type check should be added. + .sorted( + Comparator.comparing( + entry -> entry.version.validationResult.firstCalendarDate, + Comparator.reverseOrder()) + ).collect(Collectors.toList()); + } + + /** Get all fields found in the feeds being merged for a specific table. */ + public static Set getAllFields(List feedsToMerge, Table table) throws IOException { + Set sharedFields = new HashSet<>(); + // First, iterate over each feed to collect the shared fields that need to be output in the merged table. + for (FeedToMerge feed : feedsToMerge) { + CsvReader csvReader = table.getCsvReader(feed.zipFile, null); + // If csv reader is null, the table was not found in the zip file. + if (csvReader == null) { + continue; + } + try { + // Get fields found from headers and add them to the shared fields set. + Field[] fieldsFoundInZip = table.getFieldsFromFieldHeaders(csvReader.getHeaders(), null); + sharedFields.addAll(Arrays.asList(fieldsFoundInZip)); + } finally { + csvReader.close(); + } + } + return sharedFields; + } + + /** + * Checks whether a collection of fields contains a field with the provided name. + */ + public static boolean containsField(Collection fields, String fieldName) { + for (Field field : fields) if (field.name.equals(fieldName)) return true; + return false; + } + + /** Checks that any of a set of errors is of the type {@link NewGTFSErrorType#DUPLICATE_ID}. */ + public static boolean hasDuplicateError(Set errors) { + for (NewGTFSError error : errors) { + if (error.errorType.equals(NewGTFSErrorType.DUPLICATE_ID)) return true; + } + return false; + } + + /** + * Checks whether the future and active stop_times for a particular trip_id are an exact match, + * using these criteria only: arrival_time, departure_time, stop_id, and stop_sequence + * instead of StopTime::equals (Revised MTC feed merge requirement). + */ + public static boolean stopTimesMatchSimplified(List futureStopTimes, List activeStopTimes) { + if (futureStopTimes.size() != activeStopTimes.size()) { + return false; + } + for (int i = 0; i < activeStopTimes.size(); i++) { + StopTime activeTime = activeStopTimes.get(i); + StopTime futureTime = futureStopTimes.get(i); + + if ( + activeTime.arrival_time != futureTime.arrival_time || + activeTime.departure_time != futureTime.departure_time || + activeTime.stop_sequence != futureTime.stop_sequence || + !activeTime.stop_id.equals(futureTime.stop_id) + ) { + return false; + } + } + return true; + } + + /** + * Get parent feed source depending on merge type. Assign regional feed source or simply the first parent feed + * source found in the feed version list (these should all belong to the same feed source if the merge is not + * regional). + */ + public static FeedSource getParentFeedSourceForMerge(MergeFeedsJob job, boolean storeNewVersion) { + FeedSource regionalFeedSource = null; + Project project = Persistence.projects.getById(job.projectId); + // If storing a regional merge as a new version, find the feed source designated by the project. + if (job.mergeType.equals(REGIONAL) && storeNewVersion) { + regionalFeedSource = Persistence.feedSources.getById(project.regionalFeedSourceId); + // Create new feed source if this is the first regional merge. + if (regionalFeedSource == null) { + regionalFeedSource = new FeedSource("REGIONAL MERGE", project.id, REGIONAL_MERGE); + // Store new feed source. + Persistence.feedSources.create(regionalFeedSource); + // Update regional feed source ID on project. + project.regionalFeedSourceId = regionalFeedSource.id; + Persistence.projects.replace(project.id, project); + } + } + return job.mergeType.equals(REGIONAL) + ? regionalFeedSource + : job.getFeedVersions().iterator().next().parentFeedSource(); + } + + public static FeedVersion getMergedVersion(MergeFeedsJob job, boolean storeNewVersion) { + FeedSource feedSource = getParentFeedSourceForMerge(job, storeNewVersion); + FeedRetrievalMethod retrievalMethod = job.mergeType.equals(REGIONAL) + ? REGIONAL_MERGE + : SERVICE_PERIOD_MERGE; + return storeNewVersion ? new FeedVersion(feedSource, retrievalMethod) : null; + } + + public static String getMergeKeyField(Table table, MergeFeedsType mergeType) { + String keyField = table.getKeyFieldName(); + if (mergeType.equals(SERVICE_PERIOD) && DataManager.isExtensionEnabled("mtc")) { + // MTC requires that the stop and route records be merged based on different key fields. + switch (table.name) { + case "stops": + keyField = "stop_code"; + break; + case "routes": + keyField = "route_short_name"; + break; + default: + // Otherwise, use the standard key field (see keyField declaration. + break; + } + } + return keyField; + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/utils/connections/ConnectionResponse.java b/src/main/java/com/conveyal/datatools/manager/utils/connections/ConnectionResponse.java new file mode 100644 index 000000000..3ab0b42f5 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/utils/connections/ConnectionResponse.java @@ -0,0 +1,19 @@ +package com.conveyal.datatools.manager.utils.connections; + +import java.io.IOException; +import java.io.InputStream; + +/** + * An interface for getting HTTP connection response data. + */ +public interface ConnectionResponse { + int getResponseCode() throws IOException; + + String getResponseMessage() throws IOException; + + String getRedirectUrl(); + + InputStream getInputStream() throws IOException; + + Long getLastModified(); +} diff --git a/src/main/java/com/conveyal/datatools/manager/utils/connections/HttpURLConnectionResponse.java b/src/main/java/com/conveyal/datatools/manager/utils/connections/HttpURLConnectionResponse.java new file mode 100644 index 000000000..b316fac55 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/utils/connections/HttpURLConnectionResponse.java @@ -0,0 +1,36 @@ +package com.conveyal.datatools.manager.utils.connections; + +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; + +/** + * Builds a {@link ConnectionResponse} instance sent to FeedSource from an {@link HttpURLConnection} instance. + */ +public class HttpURLConnectionResponse implements ConnectionResponse { + private final HttpURLConnection connection; + + public HttpURLConnectionResponse(HttpURLConnection conn) { + this.connection = conn; + } + + public int getResponseCode() throws IOException { + return connection.getResponseCode(); + } + + public InputStream getInputStream() throws IOException { + return connection.getInputStream(); + } + + public String getResponseMessage() throws IOException { + return connection.getResponseMessage(); + } + + public String getRedirectUrl() { + return connection.getHeaderField("Location"); + } + + public Long getLastModified() { + return connection.getLastModified(); + } +} diff --git a/src/test/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResourceTest.java b/src/test/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResourceTest.java new file mode 100644 index 000000000..16ee55831 --- /dev/null +++ b/src/test/java/com/conveyal/datatools/manager/extensions/mtc/MtcFeedResourceTest.java @@ -0,0 +1,183 @@ +package com.conveyal.datatools.manager.extensions.mtc; + +import com.conveyal.datatools.DatatoolsTest; +import com.conveyal.datatools.UnitTest; +import com.conveyal.datatools.manager.models.ExternalFeedSourceProperty; +import com.conveyal.datatools.manager.models.FeedSource; +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.datatools.manager.models.Project; +import com.conveyal.datatools.manager.persistence.Persistence; +import com.fasterxml.jackson.databind.JsonNode; +import com.github.tomakehurst.wiremock.WireMockServer; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Date; + +import static com.conveyal.datatools.TestUtils.createFeedVersion; +import static com.conveyal.datatools.TestUtils.parseJson; +import static com.conveyal.datatools.TestUtils.zipFolderFiles; +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.get; +import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo; +import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options; +import static com.mongodb.client.model.Filters.and; +import static com.mongodb.client.model.Filters.eq; +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +class MtcFeedResourceTest extends UnitTest { + private static Project project; + private static FeedSource feedSource; + private static WireMockServer wireMockServer; + + private static final String AGENCY_CODE = "DE"; + + /** + * Add project, server, and deployment to prepare for tests. + */ + @BeforeAll + static void setUp() throws IOException { + // start server if it isn't already running + DatatoolsTest.setUp(); + // Create a project, feed sources. + project = new Project(); + project.name = String.format("Test %s", new Date()); + Persistence.projects.create(project); + + feedSource = new FeedSource("Test feed source"); + feedSource.projectId = project.id; + Persistence.feedSources.create(feedSource); + + // This sets up a mock server that accepts requests and sends predefined responses to mock an Auth0 server. + wireMockServer = new WireMockServer( + options() + .usingFilesUnderDirectory("src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/") + ); + wireMockServer.start(); + } + + @AfterAll + static void tearDown() { + wireMockServer.stop(); + if (project != null) { + project.delete(); + } + } + + @Test + void shouldConvertRtdNullToEmptyString() { + assertThat(MtcFeedResource.convertRtdString("null"), equalTo("")); + assertThat(MtcFeedResource.convertRtdString("Other text"), equalTo("Other text")); + } + + @Test + void canUpdateFeedExternalPropertiesToMongo() throws IOException { + final String rtdCarrierApiPath = "/api/Carrier/" + AGENCY_CODE; + + // create wiremock stub for get users endpoint + wireMockServer.stubFor( + get(urlPathEqualTo(rtdCarrierApiPath)) + .willReturn( + aResponse() + .withBodyFile("rtdGetResponse.json") + ) + ); + + // Set up some entries in the ExternalFeedSourceProperties collection. + // This one (AgencyId) should not change. + ExternalFeedSourceProperty agencyIdProp = new ExternalFeedSourceProperty( + feedSource, + "MTC", + "AgencyId", + AGENCY_CODE + ); + Persistence.externalFeedSourceProperties.create(agencyIdProp); + + // This one (AgencyPublicId) should be deleted after this test (not in RTD response). + ExternalFeedSourceProperty agencyPublicIdProp = new ExternalFeedSourceProperty( + feedSource, + "MTC", + "AgencyPublicId", + AGENCY_CODE + ); + Persistence.externalFeedSourceProperties.create(agencyPublicIdProp); + + // This one (AgencyEmail) should be updated with this test. + ExternalFeedSourceProperty agencyEmailProp = new ExternalFeedSourceProperty( + feedSource, + "MTC", + "AgencyEmail", + "old@email.example.com" + ); + Persistence.externalFeedSourceProperties.create(agencyEmailProp); + + // make RTD request and parse the json response + JsonNode rtdResponse = parseJson( + given() + .get(rtdCarrierApiPath) + .then() + .extract() + .response() + .asString() + ); + // Also extract desired values from response + String responseEmail = rtdResponse.get("AgencyEmail").asText(); + String responseAgencyName = rtdResponse.get("AgencyName").asText(); + + // Update MTC Feed properties in Mongo based response. + new MtcFeedResource().updateMongoExternalFeedProperties(feedSource, rtdResponse); + + // Existing field AgencyId should retain the same value. + ExternalFeedSourceProperty updatedAgencyIdProp = Persistence.externalFeedSourceProperties.getById(agencyIdProp.id); + assertThat(updatedAgencyIdProp.value, equalTo(agencyIdProp.value)); + + // Existing field AgencyEmail should be updated from RTD response. + ExternalFeedSourceProperty updatedEmailProp = Persistence.externalFeedSourceProperties.getById(agencyEmailProp.id); + assertThat(updatedEmailProp.value, equalTo(responseEmail)); + + // New field AgencyName (not set up above) from RTD response should be added to Mongo. + ExternalFeedSourceProperty newAgencyNameProp = Persistence.externalFeedSourceProperties.getOneFiltered( + and( + eq("feedSourceId", feedSource.id), + eq("resourceType", "MTC"), + eq("name", "AgencyName") ) + ); + assertThat(newAgencyNameProp, notNullValue()); + assertThat(newAgencyNameProp.value, equalTo(responseAgencyName)); + + // Removed field AgencyPublicId from RTD should be deleted from Mongo. + ExternalFeedSourceProperty removedPublicIdProp = Persistence.externalFeedSourceProperties.getById(agencyPublicIdProp.id); + assertThat(removedPublicIdProp, nullValue()); + + Persistence.externalFeedSourceProperties.removeById(agencyIdProp.id); + Persistence.externalFeedSourceProperties.removeById(agencyPublicIdProp.id); + Persistence.externalFeedSourceProperties.removeById(agencyEmailProp.id); + } + + @Test + void shouldTolerateNullObjectInExternalPropertyAgencyId() throws IOException { + // Add an entry in the ExternalFeedSourceProperties collection + // with AgencyId value set to null. + ExternalFeedSourceProperty agencyIdProp = new ExternalFeedSourceProperty( + feedSource, + "MTC", + "AgencyId", + null + ); + Persistence.externalFeedSourceProperties.create(agencyIdProp); + + // Trigger the feed update process (it should not upload anything to S3). + FeedVersion feedVersion = createFeedVersion(feedSource, zipFolderFiles("mini-bart-new")); + MtcFeedResource mtcFeedResource = new MtcFeedResource(); + assertDoesNotThrow(() -> mtcFeedResource.feedVersionCreated(feedVersion, null)); + + Persistence.externalFeedSourceProperties.removeById(agencyIdProp.id); + } +} diff --git a/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java b/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java index 52d77ae65..abbebb9bf 100644 --- a/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java +++ b/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java @@ -23,6 +23,7 @@ public class GtfsPlusValidationTest extends UnitTest { private static final Logger LOG = LoggerFactory.getLogger(MergeFeedsJobTest.class); private static FeedVersion bartVersion1; + private static FeedVersion bartVersion1WithQuotedValues; private static Project project; /** @@ -40,13 +41,25 @@ public static void setUp() throws IOException { bart.projectId = project.id; Persistence.feedSources.create(bart); bartVersion1 = createFeedVersionFromGtfsZip(bart, "bart_new.zip"); + bartVersion1WithQuotedValues = createFeedVersionFromGtfsZip(bart, "bart_new_with_quoted_values.zip"); } @Test - public void canValidateCleanGtfsPlus() throws Exception { + void canValidateCleanGtfsPlus() throws Exception { LOG.info("Validation BART GTFS+"); GtfsPlusValidation validation = GtfsPlusValidation.validate(bartVersion1.id); // Expect issues to be zero. assertThat("Issues count for clean BART feed is zero", validation.issues.size(), equalTo(0)); } + + @Test + void canValidateGtfsPlusWithQuotedValues() throws Exception { + LOG.info("Validation BART GTFS+ with quoted values"); + GtfsPlusValidation validation = GtfsPlusValidation.validate(bartVersion1WithQuotedValues.id); + // Expect issues to be zero. + assertThat( + "Issues count for clean BART feed (quoted values) is zero", + validation.issues.size(), equalTo(0) + ); + } } diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/AutoPublishJobTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/AutoPublishJobTest.java new file mode 100644 index 000000000..3c21a1599 --- /dev/null +++ b/src/test/java/com/conveyal/datatools/manager/jobs/AutoPublishJobTest.java @@ -0,0 +1,296 @@ +package com.conveyal.datatools.manager.jobs; + +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.conveyal.datatools.DatatoolsTest; +import com.conveyal.datatools.UnitTest; +import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.models.ExternalFeedSourceProperty; +import com.conveyal.datatools.manager.models.FeedSource; +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.datatools.manager.models.Project; +import com.conveyal.datatools.manager.persistence.Persistence; +import com.google.common.collect.Lists; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static com.conveyal.datatools.TestUtils.createFeedVersion; +import static com.conveyal.datatools.TestUtils.createFeedVersionFromGtfsZip; +import static com.conveyal.datatools.TestUtils.zipFolderFiles; +import static com.conveyal.datatools.manager.extensions.mtc.MtcFeedResource.TEST_AGENCY; +import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.FETCHED_AUTOMATICALLY; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for the various {@link AutoPublishJob} cases. + */ +public class AutoPublishJobTest extends UnitTest { + private static final String TEST_COMPLETED_FOLDER = "test-completed"; + private static final Auth0UserProfile user = Auth0UserProfile.createTestAdminUser(); + private static Project project; + private static FeedSource feedSource; + private static ExternalFeedSourceProperty agencyIdProp; + + /** + * Prepare and start a testing-specific web server + */ + @BeforeAll + public static void setUp() throws IOException { + // start server if it isn't already running + DatatoolsTest.setUp(); + + // Create a project, feed sources, and feed versions to merge. + project = new Project(); + project.name = String.format("Test %s", new Date()); + Persistence.projects.create(project); + + FeedSource fakeAgency = new FeedSource("Feed source", project.id, FETCHED_AUTOMATICALLY); + Persistence.feedSources.create(fakeAgency); + feedSource = fakeAgency; + + // Add an AgencyId entry to ExternalFeedSourceProperty + // (one-time, it will be reused for this feed source) + // but set the value to TEST_AGENCY to prevent actual S3 upload. + agencyIdProp = new ExternalFeedSourceProperty( + feedSource, + "MTC", + "AgencyId", + TEST_AGENCY + ); + Persistence.externalFeedSourceProperties.create(agencyIdProp); + } + + @AfterAll + public static void tearDown() { + if (project != null) { + project.delete(); + } + Persistence.externalFeedSourceProperties.removeById(agencyIdProp.id); + } + + /** + * Ensures that a feed is or is not published depending on errors in the feed. + */ + @ParameterizedTest + @MethodSource("createPublishFeedCases") + void shouldProcessFeed(String resourceName, boolean isError, String errorMessage) throws IOException { + // Add the version to the feed source + FeedVersion originalFeedVersion; + if (resourceName.endsWith(".zip")) { + originalFeedVersion = createFeedVersionFromGtfsZip(feedSource, resourceName); + } else { + originalFeedVersion = createFeedVersion(feedSource, zipFolderFiles(resourceName)); + } + + // Create the job + AutoPublishJob autoPublishJob = new AutoPublishJob(feedSource, user); + + // Run the job in this thread (we're not concerned about concurrency here). + autoPublishJob.run(); + + assertEquals( + isError, + autoPublishJob.status.error, + "AutoPublish job error status was incorrectly determined." + ); + + if (isError) { + assertEquals(errorMessage, autoPublishJob.status.message); + + // In case of error, the sentToExternalPublisher flag should not be set. + FeedVersion updatedFeedVersion = Persistence.feedVersions.getById(originalFeedVersion.id); + assertNull(updatedFeedVersion.sentToExternalPublisher); + } + } + + private static Stream createPublishFeedCases() { + return Stream.of( + Arguments.of( + "fake-agency-with-only-calendar-expire-in-2099-with-failed-referential-integrity", + true, + "Could not publish this feed version because it contains blocking errors." + ), + Arguments.of( + "bart_old_lite.zip", + true, + "Could not publish this feed version because it contains GTFS+ blocking errors." + ), + Arguments.of( + "bart_new_lite.zip", + false, + null + ) + ); + } + + @ParameterizedTest + @MethodSource("createUpdateFeedInfoCases") + void shouldUpdateFeedInfoAfterPublishComplete(String agencyId, boolean isUnknownFeedId) { + // Add the version to the feed source + FeedVersion createdVersion = createFeedVersionFromGtfsZip(feedSource, "bart_new_lite.zip"); + + // Create the job + AutoPublishJob autoPublishJob = new AutoPublishJob(feedSource, user); + + // Run the job in this thread (we're not concerned about concurrency here). + autoPublishJob.run(); + + assertFalse(autoPublishJob.status.error); + + // Make sure that the publish-pending attribute has been set for the feed version in Mongo. + FeedVersion updatedFeedVersion = Persistence.feedVersions.getById(createdVersion.id); + assertNotNull(updatedFeedVersion.sentToExternalPublisher); + + // Create a test FeedUpdater instance, and simulate running the task. + TestCompletedFeedRetriever completedFeedRetriever = new TestCompletedFeedRetriever(agencyId); + FeedUpdater feedUpdater = FeedUpdater.createForTest(completedFeedRetriever); + + // The list of feeds processed externally (completed) should be empty at this point. + Map etags = feedUpdater.checkForUpdatedFeeds(); + assertTrue(etags.isEmpty()); + + // Simulate completion of feed publishing. + completedFeedRetriever.makePublished(); + + // The etags should contain the id of the agency. + // If a feed has been republished since last check, it will have a new etag/file hash, + // and the scenario below should apply. + Map etagsAfter = feedUpdater.checkForUpdatedFeeds(); + + FeedVersion updatedFeedVersionAfter = Persistence.feedVersions.getById(createdVersion.id); + Date updatedDate = updatedFeedVersionAfter.processedByExternalPublisher; + String namespace = updatedFeedVersionAfter.namespace; + + if (!isUnknownFeedId) { + // Regular scenario: updating a known/existing feed. + assertEquals(1, etagsAfter.size()); + assertTrue(etagsAfter.containsValue("test-etag")); + + // Make sure that the publish-complete attribute has been set for the feed version in Mongo. + assertNotNull(updatedDate); + + // At the next check for updates, the metadata for the feeds completed above + // should not be updated again. + feedUpdater.checkForUpdatedFeeds(); + FeedVersion updatedFeedVersionAfter2 = Persistence.feedVersions.getById(createdVersion.id); + assertEquals(updatedDate, updatedFeedVersionAfter2.processedByExternalPublisher); + assertEquals(namespace, updatedFeedVersionAfter2.namespace); + } else { + // Edge case: an unknown feed id was provided, + // so no update of the feed should be happening (and there should not be an exception). + assertEquals(0, etagsAfter.size()); + assertNull(updatedDate); + } + } + + private static Stream createUpdateFeedInfoCases() { + return Stream.of( + Arguments.of( + TEST_AGENCY, + false + ), + Arguments.of( + "12345", + true + ) + ); + } + + /** + * This test ensures that, upon server startup, + * feeds that meet all these criteria should not be updated/marked as published: + * - the feed has been sent to publisher (RTD), + * - the publisher has not published the feed, + * - a previous version of the feed was already published. + */ + @Test + void shouldNotUpdateFromAPreviouslyPublishedVersionOnStartup() { + final int TWO_DAYS_MILLIS = 48 * 3600000; + + // Set up a test FeedUpdater instance that fakes an external published date in the past. + TestCompletedFeedRetriever completedFeedRetriever = new TestCompletedFeedRetriever(TEST_AGENCY); + FeedUpdater feedUpdater = FeedUpdater.createForTest(completedFeedRetriever); + completedFeedRetriever.makePublished(new Date(System.currentTimeMillis() - TWO_DAYS_MILLIS)); + + // Add the version to the feed source, with + // sentToExternalPublisher set to a date after a previous publish date. + FeedVersion createdVersion = createFeedVersionFromGtfsZip(feedSource, "bart_new_lite.zip"); + createdVersion.sentToExternalPublisher = new Date(); + Persistence.feedVersions.replace(createdVersion.id, createdVersion); + + // The list of feeds processed externally (completed) should contain an entry for the agency we want. + Map etags = feedUpdater.checkForUpdatedFeeds(); + assertNotNull(etags.get(TEST_AGENCY)); + + // Make sure that the feed remains unpublished. + FeedVersion updatedFeedVersion = Persistence.feedVersions.getById(createdVersion.id); + assertNull(updatedFeedVersion.processedByExternalPublisher); + + // Now perform publishing. + AutoPublishJob autoPublishJob = new AutoPublishJob(feedSource, user); + autoPublishJob.run(); + assertFalse(autoPublishJob.status.error); + + // Simulate another publishing process + completedFeedRetriever.makePublished(new Date()); + + // The list of feeds processed externally (completed) should contain an entry for the agency we want. + Map etagsAfter = feedUpdater.checkForUpdatedFeeds(); + assertNotNull(etagsAfter.get(TEST_AGENCY)); + + // The feed should be published. + FeedVersion publishedFeedVersion = Persistence.feedVersions.getById(createdVersion.id); + assertNotNull(publishedFeedVersion.processedByExternalPublisher); + + } + + /** + * Mocks the results of an {@link S3ObjectSummary} retrieval before/after the + * external MTC publishing process is complete. + */ + private static class TestCompletedFeedRetriever implements FeedUpdater.CompletedFeedRetriever { + private final String agencyId; + private boolean isPublishingComplete; + private Date publishDate; + + public TestCompletedFeedRetriever(String agencyId) { + this.agencyId = agencyId; + } + + @Override + public List retrieveCompletedFeeds() { + if (!isPublishingComplete) { + return new ArrayList<>(); + } else { + S3ObjectSummary objSummary = new S3ObjectSummary(); + objSummary.setETag("test-etag"); + objSummary.setKey(String.format("%s/%s", TEST_COMPLETED_FOLDER, agencyId)); + objSummary.setLastModified(publishDate); + return Lists.newArrayList(objSummary); + } + } + + public void makePublished() { + makePublished(new Date()); + } + + public void makePublished(Date publishDate) { + isPublishingComplete = true; + this.publishDate = publishDate; + } + } +} diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/FetchLoadFeedCombinationTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/FetchLoadFeedCombinationTest.java new file mode 100644 index 000000000..56c99d32c --- /dev/null +++ b/src/test/java/com/conveyal/datatools/manager/jobs/FetchLoadFeedCombinationTest.java @@ -0,0 +1,227 @@ +package com.conveyal.datatools.manager.jobs; + +import com.conveyal.datatools.DatatoolsTest; +import com.conveyal.datatools.UnitTest; +import com.conveyal.datatools.common.status.MonitorableJob; +import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.models.FeedSource; +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.datatools.manager.models.Project; +import com.conveyal.datatools.manager.persistence.Persistence; +import com.conveyal.datatools.manager.utils.connections.ConnectionResponse; +import com.github.tomakehurst.wiremock.WireMockServer; +import io.restassured.response.Response; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.util.Date; +import java.util.List; + +import static com.conveyal.datatools.TestUtils.createFeedVersionFromGtfsZip; +import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.FETCHED_AUTOMATICALLY; +import static com.conveyal.datatools.manager.models.FeedRetrievalMethod.MANUALLY_UPLOADED; +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.configureFor; +import static com.github.tomakehurst.wiremock.client.WireMock.get; +import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo; +import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options; +import static com.mongodb.client.model.Filters.eq; +import static io.restassured.RestAssured.given; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Tests for the various combinations of {@link FetchSingleFeedJob} and {@link LoadFeedJob} cases. + */ +public class FetchLoadFeedCombinationTest extends UnitTest { + private static final Auth0UserProfile user = Auth0UserProfile.createTestAdminUser(); + private static Project project; + private static final String MOCKED_HOST = "fakehost.com"; + private static final String MOCKED_FETCH_URL = "/dev/schedules/google_transit.zip"; + + private static WireMockServer wireMockServer; + private FeedSource feedSource; + + /** + * Prepare and start a testing-specific web server + */ + @BeforeAll + public static void setUp() throws IOException { + // start server if it isn't already running + DatatoolsTest.setUp(); + + // Create a project and feed sources. + project = new Project(); + project.name = String.format("Test %s", new Date()); + Persistence.projects.create(project); + + // This sets up a mock server that accepts requests and sends predefined responses to mock a GTFS file download. + configureFor(MOCKED_HOST, 80); + wireMockServer = new WireMockServer( + options() + .usingFilesUnderDirectory("src/test/resources/com/conveyal/datatools/gtfs/") + ); + wireMockServer.start(); + } + + @AfterAll + public static void tearDown() { + wireMockServer.stop(); + if (project != null) { + project.delete(); + } + } + + @BeforeEach + public void setUpEach() { + feedSource = new FeedSource("Feed source", project.id, MANUALLY_UPLOADED); + Persistence.feedSources.create(feedSource); + + // Create wiremock stub for gtfs download. + wireMockServer.stubFor( + get(urlPathEqualTo(MOCKED_FETCH_URL)) + .willReturn( + aResponse() + .withBodyFile("bart_new_lite.zip") + ) + ); + } + + /** + * Refetching should be allowed in the following scenario: + * 1. Feed is fetched as Version 1. + * 2. Another feed version is uploaded as Version 2. + * 3. Feed is refetched as Version 3. + */ + @Test + void shouldRefetchAfterFetchAndManualUpload() { + // Simulate the first job for the initial fetch. + simulateFetch(); + + // Assert Version 1 is created. + assertVersionCount(1); + + // Create the second job for manual upload. + FeedVersion uploadedFeedVersion = createFeedVersionFromGtfsZip(feedSource, "bart_old_lite.zip"); + new LoadFeedJob(uploadedFeedVersion, user, true).run(); + + // Assert Version 2 is created. + assertVersionCount(2); + + // Simulate the third job for the refetch. + simulateFetch(); + + // Assert Version 3 is created. + assertVersionCount(3); + } + + /** + * Refetching should not happen when doing two successive fetches (existing functionality). + * 1. Feed is fetched as Version 1. + * 2. Feed is refetched but no version is created, because either + * 304 NOT MODIFIED was returned, or the exact same file was downloaded again. + */ + @Test + void shouldNotLoadVersionIdenticalToPrevious() { + // Simulate the first fetch. + simulateFetch(); + + // Assert Version 1 is created. + assertVersionCount(1); + + // Simulate the second fetch with the response as "unchanged". + simulateFetch(); + + // Assert no version 2 is created. + assertVersionCount(1); + + // Some servers support a 304 (not modified) response, + // and that should also result in no new version created. + wireMockServer.stubFor( + get(urlPathEqualTo(MOCKED_FETCH_URL)) + .willReturn( + aResponse() + .withStatus(HttpURLConnection.HTTP_NOT_MODIFIED) + ) + ); + + // Simulate the re-fetch with the response as "unchanged". + simulateFetch(); + + // Assert no version 2 is created. + assertVersionCount(1); + } + + /** + * Simulates a fetch on the feed source. + */ + private void simulateFetch() { + MockConnectionResponse response = new MockConnectionResponse( + given() + .get(MOCKED_FETCH_URL) + .then() + .extract() + .response() + ); + FeedVersion newVersion = new FeedVersion(feedSource, FETCHED_AUTOMATICALLY); + newVersion = feedSource.processFetchResponse( + new MonitorableJob.Status(), + null, + newVersion, + feedSource.retrieveLatest(), + response + ); + if (newVersion != null) { + new ProcessSingleFeedJob(newVersion, user, true).run(); + } + } + + /** + * Assert feed version count. + */ + private void assertVersionCount(int size) { + // Fetch versions. + List versions = Persistence.feedVersions.getFiltered( + eq("feedSourceId", feedSource.id) + ); + assertEquals(size, versions.size()); + } + + /** + * Simulates a {@link ConnectionResponse} instance sent to FeedSource + * from a mock {@link Response} instance. + * TODO: Handle mock redirects. + */ + public static class MockConnectionResponse implements ConnectionResponse { + private final Response response; + + public MockConnectionResponse(Response resp) { + this.response = resp; + } + + public int getResponseCode() { + return response.statusCode(); + } + + public InputStream getInputStream() { + return response.asInputStream(); + } + + public Long getLastModified() { + return response.time(); + } + + public String getResponseMessage() { + return response.statusLine(); + } + + @Override + public String getRedirectUrl() { + return response.getHeader("Location"); + } + } +} diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/MergeFeedsJobTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/MergeFeedsJobTest.java index 602421393..efcadf01f 100644 --- a/src/test/java/com/conveyal/datatools/manager/jobs/MergeFeedsJobTest.java +++ b/src/test/java/com/conveyal/datatools/manager/jobs/MergeFeedsJobTest.java @@ -3,10 +3,14 @@ import com.conveyal.datatools.DatatoolsTest; import com.conveyal.datatools.UnitTest; import com.conveyal.datatools.manager.auth.Auth0UserProfile; +import com.conveyal.datatools.manager.gtfsplus.GtfsPlusValidation; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeFeedsType; +import com.conveyal.datatools.manager.jobs.feedmerge.MergeStrategy; import com.conveyal.datatools.manager.models.FeedSource; import com.conveyal.datatools.manager.models.FeedVersion; import com.conveyal.datatools.manager.models.Project; import com.conveyal.datatools.manager.persistence.Persistence; +import com.conveyal.datatools.manager.utils.SqlAssert; import com.conveyal.gtfs.error.NewGTFSErrorType; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -21,7 +25,6 @@ import java.util.Set; import static com.conveyal.datatools.TestUtils.assertThatFeedHasNoErrorsOfType; -import static com.conveyal.datatools.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.datatools.TestUtils.createFeedVersion; import static com.conveyal.datatools.TestUtils.createFeedVersionFromGtfsZip; import static com.conveyal.datatools.TestUtils.zipFolderFiles; @@ -36,22 +39,45 @@ */ public class MergeFeedsJobTest extends UnitTest { private static final Logger LOG = LoggerFactory.getLogger(MergeFeedsJobTest.class); - private static Auth0UserProfile user = Auth0UserProfile.createTestAdminUser(); + private static final Auth0UserProfile user = Auth0UserProfile.createTestAdminUser(); private static FeedVersion bartVersion1; - private static FeedVersion bartVersion2; - private static FeedVersion calTrainVersion; + private static FeedVersion bartVersion2SameTrips; private static FeedVersion bartVersionOldLite; private static FeedVersion bartVersionNewLite; private static FeedVersion calTrainVersionLite; private static Project project; - private static FeedVersion napaVersion; private static FeedVersion napaVersionLite; private static FeedVersion bothCalendarFilesVersion; private static FeedVersion bothCalendarFilesVersion2; private static FeedVersion bothCalendarFilesVersion3; private static FeedVersion onlyCalendarVersion; private static FeedVersion onlyCalendarDatesVersion; + /** The base feed for testing the MTC merge strategies. */ + private static FeedVersion fakeTransitBase; + /** The base feed but with calendar start/end dates that have been transposed to the future. */ + private static FeedVersion fakeTransitFuture; + /** The base feed with start/end dates that have been transposed to the future AND unique trip and service IDs. */ + private static FeedVersion fakeTransitFutureUnique; + /** The base feed but with differing service_ids. */ + private static FeedVersion fakeTransitModService; + /** + * The base feed (transposed to the future dates), with some trip_ids from the base feed with different signatures + * and some added trips. + */ + private static FeedVersion fakeTransitNewSignatureTrips; + /** + * The base feed (transposed to the future dates), with some trip_ids from the base feed with the same signature, + * and some added trips, and a trip from the base feed removed. + */ + private static FeedVersion fakeTransitSameSignatureTrips; + /** + * The base feed (transposed to the future dates), with some trip_ids from the base feed with the same signature, + * and a trip from the base feed removed. + */ + private static FeedVersion fakeTransitSameSignatureTrips2; private static FeedSource bart; + private static FeedVersion noAgencyVersion1; + private static FeedVersion noAgencyVersion2; /** * Prepare and start a testing-specific web server @@ -63,27 +89,25 @@ public static void setUp() throws IOException { // Create a project, feed sources, and feed versions to merge. project = new Project(); - project.name = String.format("Test %s", new Date().toString()); + project.name = String.format("Test %s", new Date()); Persistence.projects.create(project); // Bart bart = new FeedSource("BART", project.id, MANUALLY_UPLOADED); Persistence.feedSources.create(bart); bartVersion1 = createFeedVersionFromGtfsZip(bart, "bart_old.zip"); - bartVersion2 = createFeedVersionFromGtfsZip(bart, "bart_new.zip"); + bartVersion2SameTrips = createFeedVersionFromGtfsZip(bart, "bart_new.zip"); bartVersionOldLite = createFeedVersionFromGtfsZip(bart, "bart_old_lite.zip"); bartVersionNewLite = createFeedVersionFromGtfsZip(bart, "bart_new_lite.zip"); // Caltrain FeedSource caltrain = new FeedSource("Caltrain", project.id, MANUALLY_UPLOADED); Persistence.feedSources.create(caltrain); - calTrainVersion = createFeedVersionFromGtfsZip(caltrain, "caltrain_gtfs.zip"); calTrainVersionLite = createFeedVersionFromGtfsZip(caltrain, "caltrain_gtfs_lite.zip"); // Napa FeedSource napa = new FeedSource("Napa", project.id, MANUALLY_UPLOADED); Persistence.feedSources.create(napa); - napaVersion = createFeedVersionFromGtfsZip(napa, "napa-no-agency-id.zip"); napaVersionLite = createFeedVersionFromGtfsZip(napa, "napa-no-agency-id-lite.zip"); // Fake agencies (for testing calendar service_id merges with MTC strategy). @@ -109,8 +133,28 @@ public static void setUp() throws IOException { fakeAgency, zipFolderFiles("fake-agency-with-calendar-and-calendar-dates-3") ); + + // Other fake feeds for testing MTC MergeStrategy types. + FeedSource fakeTransit = new FeedSource("Fake Transit", project.id, MANUALLY_UPLOADED); + Persistence.feedSources.create(fakeTransit); + fakeTransitBase = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-base")); + fakeTransitFuture = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-future")); + fakeTransitFutureUnique = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-future-unique-ids")); + fakeTransitModService = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-mod-services")); + fakeTransitNewSignatureTrips = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-mod-trips")); + fakeTransitSameSignatureTrips = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-added-trips")); + fakeTransitSameSignatureTrips2 = createFeedVersion(fakeTransit, zipFolderFiles("merge-data-added-trips-2")); + + // Feeds with no agency id + FeedSource noAgencyIds = new FeedSource("no-agency-ids", project.id, MANUALLY_UPLOADED); + Persistence.feedSources.create(noAgencyIds); + noAgencyVersion1 = createFeedVersion(noAgencyIds, zipFolderFiles("no-agency-id-1")); + noAgencyVersion2 = createFeedVersion(noAgencyIds, zipFolderFiles("no-agency-id-2")); } + /** + * Delete project on tear down (feed sources/versions will also be deleted). + */ @AfterAll public static void tearDown() { if (project != null) { @@ -122,7 +166,7 @@ public static void tearDown() { * Ensures that a regional feed merge will produce a feed that includes all entities from each feed. */ @Test - public void canMergeRegional() throws SQLException { + void canMergeRegional() throws SQLException { // Set up list of feed versions to merge. Set versions = new HashSet<>(); versions.add(bartVersionOldLite); @@ -181,122 +225,320 @@ public void canMergeRegional() throws SQLException { * calendar_dates and another with only the calendar. */ @Test - public void canMergeRegionalWithOnlyCalendarFeed () throws SQLException { + void canMergeRegionalWithOnlyCalendarFeed () throws SQLException { Set versions = new HashSet<>(); versions.add(onlyCalendarDatesVersion); versions.add(onlyCalendarVersion); FeedVersion mergedVersion = regionallyMergeVersions(versions); + SqlAssert sqlAssert = new SqlAssert(mergedVersion); + sqlAssert.assertNoRefIntegrityErrors(); - // assert service_ids have been feed scoped properly - String mergedNamespace = mergedVersion.namespace; + // - calendar table should have 2 records. + sqlAssert.calendar.assertCount(2); - // - calendar table - // expect a total of 2 records in calendar table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar", mergedNamespace), - 2 - ); // onlyCalendarVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='Fake_Agency2:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='Fake_Agency2:common_id'"); + // onlyCalendarVersion's only_calendar_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='Fake_Agency2:only_calendar_id'", - mergedNamespace - ), - 1 - ); - - // - calendar_dates table - // expect only 1 record in calendar_dates table - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates", - mergedNamespace - ), - 2 - ); + sqlAssert.calendar.assertCount(1, "service_id='Fake_Agency2:only_calendar_id'"); + + // - calendar_dates table should have 2 records. + sqlAssert.calendarDates.assertCount(2); + // onlyCalendarDatesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency3:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendarDates.assertCount(1, "service_id='Fake_Agency3:common_id'"); + // onlyCalendarDatesVersion's only_calendar_dates_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency3:only_calendar_dates_id'", - mergedNamespace - ), - 1 - ); - - // - trips table - // expect 2 + 1 = 3 records in trips table - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips", - mergedNamespace - ), - 3 - ); + sqlAssert.calendarDates.assertCount(1, "service_id='Fake_Agency3:only_calendar_dates_id'"); + + // - trips table should have 2 + 1 = 3 records. + sqlAssert.trips.assertCount(3); + // onlyCalendarDatesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency3:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.trips.assertCount(1, "service_id='Fake_Agency3:common_id'"); + // 2 trips with onlyCalendarVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency2:common_id'", - mergedNamespace - ), - 2 + sqlAssert.trips.assertCount(2, "service_id='Fake_Agency2:common_id'"); + + // 2 parent stations should reference the updated stop_id for Fake_Agency2 + sqlAssert.stops.assertCount(2, "parent_station='Fake_Agency2:123'"); + + // 2 parent stations should reference the updated stop_id for Fake_Agency3 + sqlAssert.stops.assertCount(2, "parent_station='Fake_Agency3:123'"); + } + + /** + * Ensures that an MTC merge of feeds that has exactly matching trips but mismatched services fails. + */ + @Test + void mergeMTCShouldFailOnDuplicateTripsButMismatchedServices() { + Set versions = new HashSet<>(); + versions.add(fakeTransitBase); + versions.add(fakeTransitModService); + MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); + // Run the job in this thread (we're not concerned about concurrency here). + mergeFeedsJob.run(); + // Result should fail. + assertFalse( + mergeFeedsJob.mergeFeedsResult.failed, + "If feeds have exactly matching trips but mismatched services, new service ids should be created that span both feeds." ); } /** - * Ensures that an MTC merge of feeds with duplicate trip IDs will fail. + * Ensures that an MTC merge of feeds with exact matches of service_ids and trip_ids will utilize the + * {@link MergeStrategy#CHECK_STOP_TIMES} strategy correctly. */ @Test - public void mergeMTCShouldFailOnDuplicateTrip() { + void mergeMTCShouldHandleExtendFutureStrategy() throws SQLException { Set versions = new HashSet<>(); - versions.add(bartVersion1); - versions.add(bartVersion2); + versions.add(fakeTransitBase); + versions.add(fakeTransitFuture); MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); // Run the job in this thread (we're not concerned about concurrency here). mergeFeedsJob.run(); // Result should fail. + assertFalse( + mergeFeedsJob.mergeFeedsResult.failed, + "Merge feeds job should succeed with CHECK_STOP_TIMES strategy." + ); + assertEquals( + MergeStrategy.CHECK_STOP_TIMES, + mergeFeedsJob.mergeFeedsResult.mergeStrategy + ); + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoUnusedServiceIds(); + sqlAssert.assertNoRefIntegrityErrors(); + + // calendar table should have 2 records (all calendar ids are used and extended) + sqlAssert.calendar.assertCount(2); + + // expect that the record in calendar table has the correct start_date. + sqlAssert.calendar.assertCount(1, "start_date='20170918' and monday=1"); + } + + /** + * Ensures that an MTC merge of feeds with exact matches of service_ids and trip_ids, + * trip ids having the same signature (same stop times) will utilize the + * {@link MergeStrategy#CHECK_STOP_TIMES} strategy correctly. + */ + @Test + void mergeMTCShouldHandleMatchingTripIdsWithSameSignature() throws SQLException { + Set versions = new HashSet<>(); + versions.add(fakeTransitBase); + versions.add(fakeTransitSameSignatureTrips); + MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); + // Run the job in this thread (we're not concerned about concurrency here). + mergeFeedsJob.run(); + // Check that correct strategy was used. + assertEquals( + MergeStrategy.CHECK_STOP_TIMES, + mergeFeedsJob.mergeFeedsResult.mergeStrategy + ); + // Result should succeed. + assertFalse( + mergeFeedsJob.mergeFeedsResult.failed, + "Merge feeds job should succeed with CHECK_STOP_TIMES strategy." + ); + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoUnusedServiceIds(); + sqlAssert.assertNoRefIntegrityErrors(); + + // - calendar table + // expect a total of 4 records in calendar table: + // - common_id from the active feed (but start date is changed to one day before first start_date in future feed), + // - common_id from the future feed (because of one future trip not in the active feed), + // - common_id cloned and extended for the matching trip id present in both active and future feeds + // (from MergeFeedsJob#serviceIdsToCloneAndRename), + // - only_calendar_id used in the future feed. + sqlAssert.calendar.assertCount(4); + + // Expect 4 trips in merged output: + // 1 trip from active feed that are not in the future feed, + // (the active trip for only_calendar_id is not included because that service id + // starts after the future feed start date) + // 1 trip in both the active and future feeds, with the same signature (same stop times), + // 2 trips from the future feed not in the active feed. + sqlAssert.trips.assertCount(4); + + // expect that 2 calendars (1 common_id extended from future and 1 Fake_Transit1:common_id from active) have + // start_date pinned to start date of active feed. + sqlAssert.calendar.assertCount(2, "start_date='20170918'"); + + // One of the calendars above should have been extended + // until the end date of that entry in the future feed. + sqlAssert.calendar.assertCount(1, "start_date='20170918' and end_date='20170925'"); + + // The other one should have end_date set to a day before the start of the future feed start date + // (in the test data, that first date comes from the other calendar entry). + sqlAssert.calendar.assertCount(1, "start_date = '20170918' and end_date='20170919'"); + } + + /** + * Ensures that an MTC merge of feeds with exact matches of service_ids and trip_ids, + * trip ids having the same signature (same stop times) will utilize the + * {@link MergeStrategy#CHECK_STOP_TIMES} strategy correctly and drop unused future service ids. + */ + @Test + void mergeMTCShouldHandleMatchingTripIdsAndDropUnusedFutureCalendar() throws Exception { + Set versions = new HashSet<>(); + versions.add(fakeTransitBase); + versions.add(fakeTransitSameSignatureTrips2); + MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); + // Run the job in this thread (we're not concerned about concurrency here). + mergeFeedsJob.run(); + // Check that correct strategy was used. + assertEquals( + MergeStrategy.CHECK_STOP_TIMES, + mergeFeedsJob.mergeFeedsResult.mergeStrategy + ); + // Result should succeed. + assertFalse( + mergeFeedsJob.mergeFeedsResult.failed, + "Merge feeds job should succeed with CHECK_STOP_TIMES strategy." + ); + + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoUnusedServiceIds(); + sqlAssert.assertNoRefIntegrityErrors(); + + // - calendar table + // expect a total of 3 records in calendar table: + // - common_id from the active feed (but start date is changed to one day before first start_date in future feed), + // - common_id cloned and extended for the matching trip id present in both active and future feeds + // (from MergeFeedsJob#serviceIdsToCloneAndRename), + // - only_calendar_id used in the future feed. + sqlAssert.calendar.assertCount(3); + + // Expect 3 trips in merged output: + // 1 trip from active feed that are not in the future feed, + // (the active trip for only_calendar_dates is discarded because that service id + // starts after the future feed start date) + // 1 trip in both the active and future feeds, with the same signature (same stop times), + // 1 trip from the future feed not in the active feed. + sqlAssert.trips.assertCount(3); + + // 5 calendar_dates entries should be in the merged feed: + // (reported by MTC). + sqlAssert.calendarDates.assertCount(5); + // - only_calendar_id: + // 1 from future feed (that service id is not scoped), + sqlAssert.calendarDates.assertCount(1, "service_id='only_calendar_id'"); + // 0 from active feed + // (in the active feed, that service id starts after the future feed start date) + sqlAssert.calendarDates.assertCount(0, "service_id='Fake_Transit1:dropped_calendar_id'"); + // - common_id: + // 2 from active feed for the calendar item that was extended due to shared trip, + sqlAssert.calendarDates.assertCount(2, "service_id='Fake_Transit7:common_id'"); + // 2 from active feed for the active trip not in the future feed. + sqlAssert.calendarDates.assertCount(2, "service_id='Fake_Transit1:common_id'"); + + // The GTFS+ calendar_attributes table should contain the same number of entries as the calendar table + // (reported by MTC). + assertEquals( + 3, + mergeFeedsJob.mergeFeedsResult.linesPerTable.get("calendar_attributes").intValue(), + "Merged calendar_dates table count should equal expected value." + ); + + // The GTFS+ timepoints table should not contain any trip ids not in the trips table + // (reported by MTC). + GtfsPlusValidation validation = GtfsPlusValidation.validate(mergeFeedsJob.mergedVersion.id); + assertEquals( + 0L, + validation.issues.stream().filter( + issue -> issue.tableId.equals("timepoints") && issue.fieldName.equals("trip_id") + ).count(), + "There should not be trip_id issues in the GTFS+ timepoints table." + ); + + // There should be mention of any remapped trip ids in the job summary + // because no remapped trip ids should have been written to the trips/timepoints tables + // (reported by MTC). + assertEquals( + 0L, + mergeFeedsJob.mergeFeedsResult.remappedIds.keySet().stream().filter( + key -> key.startsWith("trips:") + ).count(), + "Job summary should not mention remapped uninserted trip ids." + ); + } + + /** + * Ensures that an MTC merge of feeds with trip_ids matching in the active and future feed, + * but with different signatures (e.g. different stop times) fails. + */ + @Test + void mergeMTCShouldHandleMatchingTripIdsWithDifferentSignatures() { + Set versions = new HashSet<>(); + versions.add(fakeTransitBase); + versions.add(fakeTransitNewSignatureTrips); + MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); + // Run the job in this thread (we're not concerned about concurrency here). + mergeFeedsJob.run(); + // Check that correct strategy was used. + assertEquals( + MergeStrategy.CHECK_STOP_TIMES, + mergeFeedsJob.mergeFeedsResult.mergeStrategy + ); + // Result should fail. assertTrue( mergeFeedsJob.mergeFeedsResult.failed, - "Merge feeds job should fail due to duplicate trip IDs." + "Merge feeds job with trip ids of different signatures should fail." + ); + } + + /** + * Ensures that an MTC merge of feeds with disjoint (non-matching) trip_ids will utilize the + * {@link MergeStrategy#DEFAULT} strategy correctly. + */ + @Test + void mergeMTCShouldHandleDisjointTripIds() throws SQLException { + Set versions = new HashSet<>(); + versions.add(fakeTransitBase); + versions.add(fakeTransitFutureUnique); + MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); + // Run the job in this thread (we're not concerned about concurrency here). + mergeFeedsJob.run(); + // Check that correct strategy was used. + assertEquals( + MergeStrategy.DEFAULT, + mergeFeedsJob.mergeFeedsResult.mergeStrategy + ); + // Result should succeed. + assertFalse( + mergeFeedsJob.mergeFeedsResult.failed, + "Merge feeds job should utilize DEFAULT strategy." ); + + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoRefIntegrityErrors(); + + // calendar table should have 4 records + // - 2 records from future feed, including only_calendar_dates which absorbs its active counterpart, + // - 1 record from active feed that is used + // - 1 unused record from the active feed that is NOT discarded (default strategy). + sqlAssert.calendar.assertCount(4); + + // The calendar entry for the active feed ending 20170920 should end one day before the first calendar start date + // of the future feed. + sqlAssert.calendar.assertCount(1, "end_date='20170919' AND service_id in ('Fake_Transit1:common_id')"); + + // trips table should have 4 records + // (all records from original files except the active trip for only_calendar_trips, + // which is skipped because it operates in the future feed). + sqlAssert.trips.assertCount(4); } /** - * Tests that the MTC merge strategy will successfully merge BART feeds. Note: this test turns off - * {@link MergeFeedsJob#failOnDuplicateTripId} in order to force the merge to succeed even though there are duplicate - * trips contained within. + * Tests that the MTC merge strategy will successfully merge BART feeds. */ @Test - public void canMergeBARTFeeds() throws SQLException { + void canMergeBARTFeeds() throws SQLException { Set versions = new HashSet<>(); versions.add(bartVersionOldLite); versions.add(bartVersionNewLite); MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); - // This time, turn off the failOnDuplicateTripId flag. - mergeFeedsJob.failOnDuplicateTripId = false; // Result should succeed this time. mergeFeedsJob.run(); assertFeedMergeSucceeded(mergeFeedsJob); @@ -328,8 +570,8 @@ public void canMergeBARTFeeds() throws SQLException { mergeFeedsJob.mergedVersion.feedLoadResult.shapes.rowCount, "Merged feed shapes count should equal expected value." ); - // Expect that two calendar dates are excluded from the past feed (because they occur after the first date of - // the future feed) . + // Expect that two calendar dates are excluded from the active feed (because they occur after the first date of + // the future feed). int expectedCalendarDatesCount = bartVersionOldLite.feedLoadResult.calendarDates.rowCount + bartVersionNewLite.feedLoadResult.calendarDates.rowCount - 2; assertEquals( // During merge, if identical shape_id is found in both feeds, active feed shape_id should be feed-scoped. @@ -345,13 +587,32 @@ public void canMergeBARTFeeds() throws SQLException { ); } + /** + * Tests that BART feeds with trips of same id but different signatures + * between active and future feeds cannot be merged per MTC revised merge logic. + */ + @Test + void shouldNotMergeBARTFeedsSameTrips() { + Set versions = new HashSet<>(); + versions.add(bartVersion1); + versions.add(bartVersion2SameTrips); + MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(user, versions, "merged_output", MergeFeedsType.SERVICE_PERIOD); + // Result should succeed this time. + mergeFeedsJob.run(); + // Result should fail. + assertTrue( + mergeFeedsJob.mergeFeedsResult.failed, + "Merge feeds job with trips of different signatures should fail." + ); + } + /** * Tests whether a MTC feed merge of two feed versions correctly feed scopes the service_id's of the feed that is * chronologically before the other one. This tests two feeds where one of them has both calendar files, and the * other has only the calendar file. */ @Test - public void canMergeFeedsWithMTCForServiceIds1 () throws SQLException { + void canMergeFeedsWithMTCForServiceIds1 () throws SQLException { Set versions = new HashSet<>(); versions.add(bothCalendarFilesVersion); versions.add(onlyCalendarVersion); @@ -359,102 +620,50 @@ public void canMergeFeedsWithMTCForServiceIds1 () throws SQLException { // Run the job in this thread (we're not concerned about concurrency here). mergeFeedsJob.run(); assertFeedMergeSucceeded(mergeFeedsJob); - // assert service_ids have been feed scoped properly - String mergedNamespace = mergeFeedsJob.mergedVersion.namespace; + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoRefIntegrityErrors(); + + // - calendar table should have 4 records. + sqlAssert.calendar.assertCount(4); - // - calendar table - // expect a total of 4 records in calendar table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar", mergedNamespace), - 4 - ); // bothCalendarFilesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='Fake_Agency1:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='Fake_Agency1:common_id'"); + // bothCalendarFilesVersion's both_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='Fake_Agency1:both_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='Fake_Agency1:both_id'"); + // onlyCalendarVersion's common id should not be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='common_id'"); + // onlyCalendarVersion's only_calendar_id service_id should not be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='only_calendar_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='only_calendar_id'"); + + // - calendar_dates table should have only 2 records. + sqlAssert.calendarDates.assertCount(2); - // - calendar_dates table - // expect only 2 records in calendar_dates table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar_dates", mergedNamespace), - 2 - ); // bothCalendarFilesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency1:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendarDates.assertCount(1, "service_id='Fake_Agency1:common_id'"); + // bothCalendarFilesVersion's both_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency1:both_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendarDates.assertCount(1, "service_id='Fake_Agency1:both_id'"); + + // - trips should have 2 + 1 = 3 records. + sqlAssert.trips.assertCount(3); - // - trips table - // expect 2 + 1 = 3 records in trips table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.trips", mergedNamespace), - 3 - ); // bothCalendarFilesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency1:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.trips.assertCount(1, "service_id='Fake_Agency1:common_id'"); + // 2 trips with onlyCalendarVersion's common_id service_id should not be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='common_id'", - mergedNamespace - ), - 2 - ); + sqlAssert.trips.assertCount(2, "service_id='common_id'"); } /** - * Tests whether a MTC feed merge of two feed versions correctly feed scopes the service_id's of the feed that is + * Tests whether an MTC feed merge of two feed versions correctly feed scopes the service_id's of the feed that is * chronologically before the other one. This tests two feeds where one of them has only the calendar_dates files, * and the other has only the calendar file. */ @Test - public void canMergeFeedsWithMTCForServiceIds2 () throws SQLException { + void canMergeFeedsWithMTCForServiceIds2 () throws SQLException { Set versions = new HashSet<>(); versions.add(onlyCalendarDatesVersion); versions.add(onlyCalendarVersion); @@ -462,77 +671,36 @@ public void canMergeFeedsWithMTCForServiceIds2 () throws SQLException { // Run the job in this thread (we're not concerned about concurrency here). mergeFeedsJob.run(); assertFeedMergeSucceeded(mergeFeedsJob); - // assert service_ids have been feed scoped properly - String mergedNamespace = mergeFeedsJob.mergedVersion.namespace; + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoRefIntegrityErrors(); + + // - calendar table should have 2 records. + sqlAssert.calendar.assertCount(2); - // - calendar table - // expect a total of 4 records in calendar table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar", mergedNamespace), - 2 - ); // onlyCalendarVersion's common id should not be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='common_id'"); + // onlyCalendarVersion's only_calendar_id service_id should not be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='only_calendar_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='only_calendar_id'"); + + // - calendar_dates table should have only 2 records. + sqlAssert.calendarDates.assertCount(2); - // - calendar_dates table - // expect only 2 records in calendar_dates table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar_dates", mergedNamespace), - 2 - ); // onlyCalendarDatesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency3:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendarDates.assertCount(1, "service_id='Fake_Agency3:common_id'"); + // onlyCalendarDatesVersion's only_calendar_dates_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency3:only_calendar_dates_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendarDates.assertCount(1, "service_id='Fake_Agency3:only_calendar_dates_id'"); + + // - trips table should have 2 + 1 = 3 records. + sqlAssert.trips.assertCount(3); - // - trips table - // expect 2 + 1 = 3 records in trips table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.trips", mergedNamespace), - 3 - ); // bothCalendarFilesVersion's common_id service_id should be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency3:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.trips.assertCount(1, "service_id='Fake_Agency3:common_id'"); + // 2 trips with onlyCalendarVersion's common_id service_id should not be scoped - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='common_id'", - mergedNamespace - ), - 2 - ); + sqlAssert.trips.assertCount(2, "service_id='common_id'"); + // This fails, but if remappedReferences isn't actually needed maybe the current implementation is good-to-go // assertThat(mergeFeedsJob.mergeFeedsResult.remappedReferences, equalTo(1)); } @@ -543,7 +711,7 @@ public void canMergeFeedsWithMTCForServiceIds2 () throws SQLException { * that service_id. */ @Test - public void canMergeFeedsWithMTCForServiceIds3 () throws SQLException { + void canMergeFeedsWithMTCForServiceIds3 () throws SQLException { Set versions = new HashSet<>(); versions.add(bothCalendarFilesVersion); versions.add(bothCalendarFilesVersion2); @@ -551,62 +719,35 @@ public void canMergeFeedsWithMTCForServiceIds3 () throws SQLException { // Run the job in this thread (we're not concerned about concurrency here). mergeFeedsJob.run(); assertFeedMergeSucceeded(mergeFeedsJob); - // assert service_ids have been feed scoped properly - String mergedNamespace = mergeFeedsJob.mergedVersion.namespace; - // - calendar table - // expect a total of 3 records in calendar table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar", mergedNamespace), - 3 - ); - // - calendar_dates table - // expect 3 records in calendar_dates table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar_dates", mergedNamespace), - 3 - ); + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.assertNoRefIntegrityErrors(); + + // - calendar table should have 3 records. + sqlAssert.calendar.assertCount(3); + + // calendar_dates should have 1 record. + // - one for common_id from the future feed, + // Note that the common_id from the active feed is not included because it operates + // within the future feed timespan. + sqlAssert.calendarDates.assertCount(1, "service_id='common_id' and date='20170916'"); + + // trips table should have 2 records. + // - this includes all trips from both feed except the trip associated + // with cal_to_remove, which calendar operates within the future feed. + sqlAssert.trips.assertCount(2); - // - trips table - // expect 3 records in trips table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.trips", mergedNamespace), - 3 - ); // common_id service_id should be scoped for earlier feed version. - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency4:common_id'", - mergedNamespace - ), - 1 - ); - // cal_to_remove service_id should be scoped for earlier feed version. - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency4:cal_to_remove'", - mergedNamespace - ), - 1 - ); + sqlAssert.trips.assertCount(1, "service_id='Fake_Agency4:common_id'"); + + // trips for cal_to_remove service_id should be removed. + sqlAssert.trips.assertCount(0, "service_id='Fake_Agency4:cal_to_remove'"); + // Amended calendar record from earlier feed version should also have a modified end date (one day before the // earliest start_date from the future feed). - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='Fake_Agency4:common_id' AND end_date='20170914'", - mergedNamespace - ), - 1 - ); - // Modified cal_to_remove should still exist in calendar_dates. It is modified even though it does not exist in - // the future feed due to the MTC requirement to update all service_ids in the past feed. - // See https://github.com/ibi-group/datatools-server/issues/244 - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar_dates WHERE service_id='Fake_Agency4:cal_to_remove'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='Fake_Agency4:common_id' AND end_date='20170914'"); + + // cal_to_remove should be removed from calendar_dates. + sqlAssert.calendarDates.assertCount(0, "service_id='Fake_Agency4:cal_to_remove'"); } /** @@ -615,7 +756,7 @@ public void canMergeFeedsWithMTCForServiceIds3 () throws SQLException { * that service_id. */ @Test - public void canMergeFeedsWithMTCForServiceIds4 () throws SQLException { + void canMergeFeedsWithMTCForServiceIds4 () throws SQLException { Set versions = new HashSet<>(); versions.add(bothCalendarFilesVersion); versions.add(bothCalendarFilesVersion3); @@ -623,44 +764,26 @@ public void canMergeFeedsWithMTCForServiceIds4 () throws SQLException { // Run the job in this thread (we're not concerned about concurrency here). mergeFeedsJob.run(); assertFeedMergeSucceeded(mergeFeedsJob); - // assert service_ids have been feed scoped properly - String mergedNamespace = mergeFeedsJob.mergedVersion.namespace; - // - calendar table - // expect a total of 3 records in calendar table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar", mergedNamespace), - 3 - ); - // - calendar_dates table - // expect 2 records in calendar_dates table (all records from future feed removed) - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.calendar_dates", mergedNamespace), - 3 - ); + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + // FIXME: "version3" contains ref integrity errors... was hat intentional? + // sqlAssert.assertNoRefIntegrityErrors(); + + // - calendar table should have 3 records. + sqlAssert.calendar.assertCount(3); + + // calendar_dates table should have 3 records: + // all records from future feed and keep_one from the active feed. + sqlAssert.calendarDates.assertCount(3); + + // - trips table should have 3 records. + sqlAssert.trips.assertCount(3); - // - trips table - // expect 3 records in trips table - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.trips", mergedNamespace), - 3 - ); // common_id service_id should be scoped for earlier feed version. - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.trips WHERE service_id='Fake_Agency5:common_id'", - mergedNamespace - ), - 1 - ); + sqlAssert.trips.assertCount(1, "service_id='Fake_Agency5:common_id'"); + // Amended calendar record from earlier feed version should also have a modified end date (one day before the // earliest start_date from the future feed). - assertThatSqlCountQueryYieldsExpectedCount( - String.format( - "SELECT count(*) FROM %s.calendar WHERE service_id='Fake_Agency5:common_id' AND end_date='20170914'", - mergedNamespace - ), - 1 - ); + sqlAssert.calendar.assertCount(1, "service_id='Fake_Agency5:common_id' AND end_date='20170914'"); } /** @@ -668,7 +791,7 @@ public void canMergeFeedsWithMTCForServiceIds4 () throws SQLException { * entrances, generic nodes, etc.) handles missing stop codes correctly. */ @Test - public void canMergeBARTFeedsWithSpecialStops() throws SQLException, IOException { + void canMergeBARTFeedsWithSpecialStops() throws SQLException, IOException { // Mini BART old/new feeds are pared down versions of the zips (bart_new.zip and bart_old.zip). They each have // only one trip and its corresponding stop_times. They do contain a full set of routes and stops. The stops are // from a recent (as of August 2021) GTFS file that includes a bunch of new stop records that act as entrances). @@ -682,15 +805,41 @@ public void canMergeBARTFeedsWithSpecialStops() throws SQLException, IOException // Job should succeed. assertFeedMergeSucceeded(mergeFeedsJob); // Verify that the stop count is equal to the number of stops found in each of the input stops.txt files. - assertThatSqlCountQueryYieldsExpectedCount( - String.format("SELECT count(*) FROM %s.stops", mergeFeedsJob.mergedVersion.namespace), - 182 - ); + SqlAssert sqlAssert = new SqlAssert(mergeFeedsJob.mergedVersion); + sqlAssert.stops.assertCount(182); + } + + /** + * Tests whether feeds without agency ids can be merged. + * The merged feed should have autogenerated agency ids. + */ + @Test + void canMergeFeedsWithoutAgencyIds () throws SQLException { + Set versions = new HashSet<>(); + versions.add(noAgencyVersion1); + versions.add(noAgencyVersion2); + FeedVersion mergedVersion = regionallyMergeVersions(versions); + SqlAssert sqlAssert = new SqlAssert(mergedVersion); + final String agencyIdIsBlankOrNull = "agency_id='' or agency_id is null"; + + // - agency should have 2 records. + sqlAssert.agency.assertCount(2); + + // there shouldn't be records with blank agency_id + sqlAssert.agency.assertCount(0, agencyIdIsBlankOrNull); + + // - routes should have 2 records + sqlAssert.routes.assertCount(2); + + // there shouldn't be route records with blank agency_id + sqlAssert.routes.assertCount(0, agencyIdIsBlankOrNull); + + // - trips should have 4 records + sqlAssert.trips.assertCount(4); } /** * Verifies that a completed merge feeds job did not fail. - * @param mergeFeedsJob */ private void assertFeedMergeSucceeded(MergeFeedsJob mergeFeedsJob) { if (mergeFeedsJob.mergedVersion.namespace == null || mergeFeedsJob.mergeFeedsResult.failed) { diff --git a/src/test/java/com/conveyal/datatools/manager/models/FeedVersionTest.java b/src/test/java/com/conveyal/datatools/manager/models/FeedVersionTest.java index 01405a204..11889e000 100644 --- a/src/test/java/com/conveyal/datatools/manager/models/FeedVersionTest.java +++ b/src/test/java/com/conveyal/datatools/manager/models/FeedVersionTest.java @@ -3,42 +3,107 @@ import com.conveyal.datatools.DatatoolsTest; import com.conveyal.datatools.UnitTest; import com.conveyal.datatools.manager.persistence.Persistence; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.NewGTFSErrorType; +import com.conveyal.gtfs.error.SQLErrorStorage; +import com.conveyal.gtfs.util.InvalidNamespaceException; +import com.conveyal.gtfs.validator.ValidationResult; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import java.sql.Connection; +import java.sql.SQLException; import java.util.Date; +import java.util.stream.Stream; +import static com.conveyal.datatools.TestUtils.createFeedVersionFromGtfsZip; +import static com.conveyal.datatools.manager.DataManager.GTFS_DATA_SOURCE; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; public class FeedVersionTest extends UnitTest { + private static Project project; + private static FeedSource feedSource; /** Initialize application for tests to run. */ @BeforeAll public static void setUp() throws Exception { // start server if it isn't already running DatatoolsTest.setUp(); + + // set up project + project = new Project(); + project.name = String.format("Test project %s", new Date()); + Persistence.projects.create(project); + + feedSource = new FeedSource("Test feed source"); + feedSource.projectId = project.id; + Persistence.feedSources.create(feedSource); } + @AfterAll + public static void tearDown() { + if (project != null) { + project.delete(); + } + } /** * Make sure FeedVersionIDs are always unique, even if created at the same second. * See https://github.com/ibi-group/datatools-server/issues/251 */ @Test - public void canCreateUniqueFeedVersionIDs() { + void canCreateUniqueFeedVersionIDs() { // Create a project, feed sources, and feed versions to merge. - Project testProject = new Project(); - testProject.name = String.format("Test project %s", new Date().toString()); - Persistence.projects.create(testProject); - FeedSource testFeedsoure = new FeedSource("Test feed source"); - testFeedsoure.projectId = testProject.id; - Persistence.feedSources.create(testFeedsoure); - // create two feedVersions immediately after each other which should end up having unique IDs - FeedVersion feedVersion1 = new FeedVersion(testFeedsoure); - FeedVersion feedVersion2 = new FeedVersion(testFeedsoure); + FeedVersion feedVersion1 = new FeedVersion(feedSource); + FeedVersion feedVersion2 = new FeedVersion(feedSource); assertThat(feedVersion1.id, not(equalTo(feedVersion2.id))); } + + /** + * Detect feeds with fatal exceptions (a blocking issue for publishing). + */ + @Test + void canDetectBlockingFatalExceptionsForPublishing() { + FeedVersion feedVersion1 = new FeedVersion(feedSource); + feedVersion1.validationResult = new ValidationResult(); + feedVersion1.validationResult.fatalException = "A fatal exception occurred"; + + assertThat(feedVersion1.hasBlockingIssuesForPublishing(), equalTo(true)); + } + + /** + * Detect feeds with blocking error types that prevents publishing, per + * https://github.com/ibi-group/datatools-ui/blob/dev/lib/manager/util/version.js#L79. + */ + @ParameterizedTest + @EnumSource(value = NewGTFSErrorType.class, names = { + "ILLEGAL_FIELD_VALUE", + "MISSING_COLUMN", + "REFERENTIAL_INTEGRITY", + "SERVICE_WITHOUT_DAYS_OF_WEEK", + "TABLE_MISSING_COLUMN_HEADERS", + "TABLE_IN_SUBDIRECTORY", + "WRONG_NUMBER_OF_FIELDS" + }) + void canDetectBlockingErrorTypesForPublishing(NewGTFSErrorType errorType) throws InvalidNamespaceException, SQLException { + FeedVersion feedVersion1 = createFeedVersionFromGtfsZip(feedSource, "bart_old_lite.zip"); + + // Add blocking error types to feed version + try (Connection connection = GTFS_DATA_SOURCE.getConnection()) { + SQLErrorStorage errorStorage = new SQLErrorStorage(connection, feedVersion1.namespace + ".", false); + errorStorage.storeError(NewGTFSError.forFeed(errorType, null)); + errorStorage.commitAndClose(); + } + + assertThat(feedVersion1.hasBlockingIssuesForPublishing(), equalTo(true)); + } } diff --git a/src/test/java/com/conveyal/datatools/manager/utils/SqlAssert.java b/src/test/java/com/conveyal/datatools/manager/utils/SqlAssert.java new file mode 100644 index 000000000..dd91d2ba9 --- /dev/null +++ b/src/test/java/com/conveyal/datatools/manager/utils/SqlAssert.java @@ -0,0 +1,74 @@ +package com.conveyal.datatools.manager.utils; + +import com.conveyal.datatools.manager.models.FeedVersion; +import com.conveyal.gtfs.loader.Table; +import com.google.common.base.Strings; + +import java.sql.SQLException; + +import static com.conveyal.datatools.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; + +/** + * This class contains helper methods to assert against various PSQL tables + * of a given namespace. + */ +public class SqlAssert { + private final FeedVersion version; + public final SqlTableAssert agency = new SqlTableAssert(Table.AGENCY); + public final SqlTableAssert calendar = new SqlTableAssert(Table.CALENDAR); + public final SqlTableAssert calendarDates = new SqlTableAssert(Table.CALENDAR_DATES); + public final SqlTableAssert errors = new SqlTableAssert("errors"); + public final SqlTableAssert routes = new SqlTableAssert(Table.ROUTES); + public final SqlTableAssert trips = new SqlTableAssert(Table.TRIPS); + public final SqlTableAssert stops = new SqlTableAssert(Table.STOPS); + + public SqlAssert(FeedVersion version) { + this.version = version; + } + + /** + * Checks there are no unused service ids. + */ + public void assertNoUnusedServiceIds() throws SQLException { + errors.assertCount(0, "error_type='SERVICE_UNUSED'"); + } + + /** + * Checks there are no referential integrity issues. + */ + public void assertNoRefIntegrityErrors() throws SQLException { + errors.assertCount(0, "error_type = 'REFERENTIAL_INTEGRITY'"); + } + + /** + * Helper class to assert against a particular PSQL table. + */ + public class SqlTableAssert { + private final String tableName; + private SqlTableAssert(Table table) { + this.tableName = table.name; + } + + private SqlTableAssert(String tableName) { + this.tableName = tableName; + } + + /** + * Helper method to assert a row count on a simple WHERE clause. + */ + public void assertCount(int count, String condition) throws SQLException { + assertThatSqlCountQueryYieldsExpectedCount( + String.format("SELECT count(*) FROM %s.%s %s", version.namespace, tableName, + Strings.isNullOrEmpty(condition) ? "" : "WHERE " + condition), + count + ); + } + + /** + * Helper method to assert a row count on the entire table. + */ + public void assertCount(int count) throws SQLException { + assertCount(count, null); + } + } +} diff --git a/src/test/resources/com/conveyal/datatools/gtfs/__files/bart_new_lite.zip b/src/test/resources/com/conveyal/datatools/gtfs/__files/bart_new_lite.zip new file mode 100644 index 000000000..493105c7b Binary files /dev/null and b/src/test/resources/com/conveyal/datatools/gtfs/__files/bart_new_lite.zip differ diff --git a/src/test/resources/com/conveyal/datatools/gtfs/bart_new_with_quoted_values.zip b/src/test/resources/com/conveyal/datatools/gtfs/bart_new_with_quoted_values.zip new file mode 100644 index 000000000..233231a7a Binary files /dev/null and b/src/test/resources/com/conveyal/datatools/gtfs/bart_new_with_quoted_values.zip differ diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar.txt new file mode 100755 index 000000000..8d5260fe6 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar.txt @@ -0,0 +1,3 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170923,20170925 +only_calendar_id,1,1,1,1,1,1,1,20170920,20170927 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar_attributes.txt new file mode 100755 index 000000000..64803a6be --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar_attributes.txt @@ -0,0 +1,3 @@ +service_id,service_description +common_id,Description for common_id (added trips) +only_calendar_id,Description for only_calendar_id (added trips) diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar_dates.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar_dates.txt new file mode 100755 index 000000000..f33301897 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/calendar_dates.txt @@ -0,0 +1,2 @@ +service_id,date,exception_type +only_calendar_id,20190218,1 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stop_times.txt new file mode 100755 index 000000000..e6dce4f97 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stop_times.txt @@ -0,0 +1,5 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +trip3,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +trip3,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/timepoints.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/timepoints.txt new file mode 100644 index 000000000..41a7813e4 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/timepoints.txt @@ -0,0 +1,5 @@ +trip_id,stop_id +trip3,4u6g +trip3,johv +only-calendar-trip2,johv +only-calendar-trip2,4u6g \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/trips.txt new file mode 100755 index 000000000..216821978 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips-2/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +2,only-calendar-trip2,,,0,,,0,0,common_id +2,trip3,,,0,,,0,0,only_calendar_id \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/calendar.txt new file mode 100755 index 000000000..8d5260fe6 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/calendar.txt @@ -0,0 +1,3 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170923,20170925 +only_calendar_id,1,1,1,1,1,1,1,20170920,20170927 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stop_times.txt new file mode 100755 index 000000000..04d65a948 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stop_times.txt @@ -0,0 +1,9 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +trip3,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +trip3,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, +only-calendar-trip999,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip999,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/trips.txt new file mode 100755 index 000000000..d745f3502 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-added-trips/trips.txt @@ -0,0 +1,4 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,only-calendar-trip999,,,0,,,0,0,common_id +2,only-calendar-trip2,,,0,,,0,0,common_id +2,trip3,,,0,,,0,0,only_calendar_id \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar.txt new file mode 100755 index 000000000..b8b64fe7a --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar.txt @@ -0,0 +1,4 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170918,20170920 +only_calendar_id,1,1,1,1,1,1,1,20170921,20170922 +dropped_calendar_id,1,1,1,1,1,1,1,20170918,20170919 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar_attributes.txt new file mode 100755 index 000000000..e1ab10f8e --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar_attributes.txt @@ -0,0 +1,3 @@ +service_id,service_description +common_id,Description for common_id +only_calendar_id,Description for only_calendar_id diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar_dates.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar_dates.txt new file mode 100644 index 000000000..e23e5f104 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/calendar_dates.txt @@ -0,0 +1,6 @@ +service_id,date,exception_type +dropped_calendar_id,20170919,1 +only_calendar_id,20170921,1 +only_calendar_id,20170922,1 +common_id,20170917,1 +common_id,20170919,2 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stop_times.txt new file mode 100755 index 000000000..977736d8c --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stop_times.txt @@ -0,0 +1,7 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, +only-calendar-trip3,07:10:00,07:10:00,johv,1,,0,0,0.0000000, +only-calendar-trip3,07:11:00,07:11:00,4u6g,2,,0,0,341.4491961, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/timepoints.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/timepoints.txt new file mode 100644 index 000000000..a7ba2e023 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/timepoints.txt @@ -0,0 +1,5 @@ +trip_id,stop_id +only-calendar-trip1,4u6g +only-calendar-trip1,johv +only-calendar-trip2,johv +only-calendar-trip2,4u6g \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/trips.txt new file mode 100755 index 000000000..d8c0d7031 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-base/trips.txt @@ -0,0 +1,4 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,only-calendar-trip1,,,0,,,0,0,common_id +2,only-calendar-trip2,,,0,,,0,0,common_id +2,only-calendar-trip3,,,0,,,0,0,only_calendar_id diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/calendar.txt new file mode 100755 index 000000000..a5a410036 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/calendar.txt @@ -0,0 +1,3 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +future_id,1,1,1,1,1,1,1,20170920,20170925 +future_id_other,1,1,1,1,1,1,1,20170924,20170927 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stop_times.txt new file mode 100755 index 000000000..cc847bc94 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stop_times.txt @@ -0,0 +1,5 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +future-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +future-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +future-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +future-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/trips.txt new file mode 100755 index 000000000..221d7a051 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future-unique-ids/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,future-trip1,,,0,,,0,0,future_id +2,future-trip2,,,0,,,0,0,future_id \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/calendar.txt new file mode 100755 index 000000000..8d5260fe6 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/calendar.txt @@ -0,0 +1,3 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170923,20170925 +only_calendar_id,1,1,1,1,1,1,1,20170920,20170927 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stop_times.txt new file mode 100755 index 000000000..977736d8c --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stop_times.txt @@ -0,0 +1,7 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, +only-calendar-trip3,07:10:00,07:10:00,johv,1,,0,0,0.0000000, +only-calendar-trip3,07:11:00,07:11:00,4u6g,2,,0,0,341.4491961, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/trips.txt new file mode 100755 index 000000000..d8c0d7031 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-future/trips.txt @@ -0,0 +1,4 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,only-calendar-trip1,,,0,,,0,0,common_id +2,only-calendar-trip2,,,0,,,0,0,common_id +2,only-calendar-trip3,,,0,,,0,0,only_calendar_id diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/calendar.txt new file mode 100755 index 000000000..201a95e65 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/calendar.txt @@ -0,0 +1,4 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170918,20170920 +only_calendar_id,1,1,1,1,1,1,1,20170921,20170922 +new_cal_id,1,1,1,1,1,0,0,20170921,20170922 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stop_times.txt new file mode 100755 index 000000000..646706c5c --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stop_times.txt @@ -0,0 +1,5 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/trips.txt new file mode 100755 index 000000000..387b076cd --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-services/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,only-calendar-trip1,,,0,,,0,0,common_id +2,only-calendar-trip2,,,0,,,0,0,common_id \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/calendar.txt new file mode 100755 index 000000000..e97e3d013 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/calendar.txt @@ -0,0 +1,3 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170923,20170925 +only_calendar_id,1,1,1,1,1,1,1,20170924,20170927 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stop_times.txt new file mode 100755 index 000000000..29b88cefb --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stop_times.txt @@ -0,0 +1,9 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +trip3,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +trip3,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip1,08:00:00,08:00:00,4u6g,1,,0,0,0.0000000, +only-calendar-trip1,08:01:00,08:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, +only-calendar-trip999,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip999,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stops.txt new file mode 100755 index 000000000..0db5a6d40 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/stops.txt @@ -0,0 +1,6 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/trips.txt new file mode 100755 index 000000000..095ff16d5 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/merge-data-mod-trips/trips.txt @@ -0,0 +1,5 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,only-calendar-trip999,,,0,,,0,0,common_id +1,only-calendar-trip1,,,0,,,0,0,common_id +2,only-calendar-trip2,,,0,,,0,0,common_id +2,trip3,,,0,,,0,0,only_calendar_id \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-new/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-new/trips.txt index aa89b063e..056042142 100755 --- a/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-new/trips.txt +++ b/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-new/trips.txt @@ -1,2 +1,2 @@ route_id,service_id,trip_id,trip_headsign,direction_id,block_id,wheelchair_accessible,bikes_allowed -1,WKDY,3610458WKDY,San Francisco International Airport,0,1,1 +1,WKDY,3610458WKDY,San Francisco International Airport,0,1,1,1 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-old/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-old/trips.txt index 1e7e82d6c..c3d8d527c 100755 --- a/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-old/trips.txt +++ b/src/test/resources/com/conveyal/datatools/gtfs/mini-bart-old/trips.txt @@ -1,2 +1,2 @@ route_id,service_id,trip_id,trip_headsign,direction_id,block_id,wheelchair_accessible,bikes_allowed -01,WKDY,3610403WKDY,San Francisco International Airport,0,1,1 +01,WKDY,3610403WKDY,San Francisco International Airport,0,1,1,1 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/agency.txt new file mode 100755 index 000000000..bef4d6072 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +,Agency1,http://agency1.example.com/,en,888-555-1111,agency1@example.com,America/New_York,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/calendar.txt new file mode 100755 index 000000000..7d765be25 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/calendar.txt @@ -0,0 +1,4 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +bb285e71-c906-4b67-ab68-0212fc864728,0,0,0,0,0,0,1,20180823,20200823 +bd6e404d-6e02-45c1-826c-a569ca947fce,1,1,1,1,1,0,0,20180823,20200823 +ef5a027b-353d-4071-b10b-f232e1c6b8cf,0,0,0,0,0,1,0,20180823,20200823 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/calendar_dates.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/calendar_dates.txt new file mode 100755 index 000000000..74c1ef632 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/calendar_dates.txt @@ -0,0 +1 @@ +service_id,date,exception_type diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/fare_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/fare_attributes.txt new file mode 100755 index 000000000..8a1793839 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/fare_attributes.txt @@ -0,0 +1 @@ +fare_id,price,currency_type,payment_method,transfers,transfer_duration diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/fare_rules.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/fare_rules.txt new file mode 100755 index 000000000..c7f6b54a3 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/fare_rules.txt @@ -0,0 +1 @@ +fare_id,route_id,origin_id,destination_id,contains_id diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/feed_info.txt new file mode 100644 index 000000000..6ab097246 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +feed1,IBI,http://www.ibigroup.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/frequencies.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/frequencies.txt new file mode 100755 index 000000000..9b46cdeef --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/frequencies.txt @@ -0,0 +1 @@ +trip_id,start_time,end_time,headway_secs,exact_times diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/routes.txt new file mode 100644 index 000000000..d5fe2002e --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/routes.txt @@ -0,0 +1,2 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +,1,1,Agency1Route1,,3,,6A478F,FFFFFF, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/shapes.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/shapes.txt new file mode 100644 index 000000000..df1a9f70e --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/shapes.txt @@ -0,0 +1,3 @@ +shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled +03c47dc1-bf37-4668-8c58-9847a496f92d,38.3223235,-122.3105574,1,0.0000000 +03c47dc1-bf37-4668-8c58-9847a496f92d,38.3221392,-122.3104330,2,23.1739361 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/stop_times.txt new file mode 100644 index 000000000..fcd8ff8ee --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/stop_times.txt @@ -0,0 +1,3 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +t104-sl2-p18-r45,14:00:00,14:00:00,100,1,,0,0,13.3808067,1 +t104-sl2-p18-r45,14:03:14,14:03:14,102,2,,0,0,1122.9721799,0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/stops.txt new file mode 100644 index 000000000..104792368 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/stops.txt @@ -0,0 +1,3 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +100,89062,Terrace Dr at Liberty Dr,,38.2933330,-122.2694440,,http://ridethevine.rideralerts.com/InfoPoint/89062,0,,, +102,89140,Trancas St at Jefferson St,,38.3225000,-122.3011110,,http://ridethevine.rideralerts.com/InfoPoint/89140,0,,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/transfers.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/transfers.txt new file mode 100755 index 000000000..357103c47 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/transfers.txt @@ -0,0 +1 @@ +from_stop_id,to_stop_id,transfer_type,min_transfer_time diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/trips.txt new file mode 100644 index 000000000..2502fbb6a --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-1/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,t104-sl2-p18-r45,08_1,,0,802,03c47dc1-bf37-4668-8c58-9847a496f92d,0,0,bd6e404d-6e02-45c1-826c-a569ca947fce +1,t104-sl3-p17-r1B,08_1,,0,2008,03c47dc1-bf37-4668-8c58-9847a496f92d,0,0,ef5a027b-353d-4071-b10b-f232e1c6b8cf \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/agency.txt new file mode 100755 index 000000000..0cb24cbd5 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +,Agency2,http://agency2.example.com/,en,888-555-2222,agency2@example.com,America/New_York,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/calendar.txt new file mode 100755 index 000000000..d6546a48e --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/calendar.txt @@ -0,0 +1,4 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +2-bb285e71-c906-4b67-ab68-0212fc864728,0,0,0,0,0,0,1,20180823,20190823 +2-bd6e404d-6e02-45c1-826c-a569ca947fce,1,1,1,1,1,0,0,20180823,20190823 +2-ef5a027b-353d-4071-b10b-f232e1c6b8cf,0,0,0,0,0,1,0,20180823,20190823 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/calendar_dates.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/calendar_dates.txt new file mode 100755 index 000000000..74c1ef632 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/calendar_dates.txt @@ -0,0 +1 @@ +service_id,date,exception_type diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/fare_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/fare_attributes.txt new file mode 100755 index 000000000..8a1793839 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/fare_attributes.txt @@ -0,0 +1 @@ +fare_id,price,currency_type,payment_method,transfers,transfer_duration diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/fare_rules.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/fare_rules.txt new file mode 100755 index 000000000..c7f6b54a3 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/fare_rules.txt @@ -0,0 +1 @@ +fare_id,route_id,origin_id,destination_id,contains_id diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/feed_info.txt new file mode 100644 index 000000000..6ab097246 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +feed1,IBI,http://www.ibigroup.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/frequencies.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/frequencies.txt new file mode 100755 index 000000000..9b46cdeef --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/frequencies.txt @@ -0,0 +1 @@ +trip_id,start_time,end_time,headway_secs,exact_times diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/routes.txt new file mode 100644 index 000000000..fdd60f057 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/routes.txt @@ -0,0 +1,2 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +,2,2,Agency2Route2,,3,,6A478F,FFFFFF, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/shapes.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/shapes.txt new file mode 100644 index 000000000..78f8e4c59 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/shapes.txt @@ -0,0 +1,3 @@ +shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled +2-03c47dc1-bf37-4668-8c58-9847a496f92d,38.3223235,-122.3105574,1,0.0000000 +2-03c47dc1-bf37-4668-8c58-9847a496f92d,38.3221392,-122.3104330,2,23.1739361 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/stop_times.txt new file mode 100644 index 000000000..1aa8dd0bf --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/stop_times.txt @@ -0,0 +1,3 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +2-t104-sl2-p18-r45,14:00:00,14:00:00,100,1,,0,0,13.3808067,1 +2-t104-sl2-p18-r45,14:03:14,14:03:14,102,2,,0,0,1122.9721799,0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/stops.txt new file mode 100644 index 000000000..104792368 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/stops.txt @@ -0,0 +1,3 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +100,89062,Terrace Dr at Liberty Dr,,38.2933330,-122.2694440,,http://ridethevine.rideralerts.com/InfoPoint/89062,0,,, +102,89140,Trancas St at Jefferson St,,38.3225000,-122.3011110,,http://ridethevine.rideralerts.com/InfoPoint/89140,0,,, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/transfers.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/transfers.txt new file mode 100755 index 000000000..357103c47 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/transfers.txt @@ -0,0 +1 @@ +from_stop_id,to_stop_id,transfer_type,min_transfer_time diff --git a/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/trips.txt new file mode 100644 index 000000000..7245d28c6 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/no-agency-id-2/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +2,2-t104-sl2-p18-r45,08_1,,0,20802,2-03c47dc1-bf37-4668-8c58-9847a496f92d,0,0,2-bd6e404d-6e02-45c1-826c-a569ca947fce +2,2-t104-sl3-p17-r1B,08_1,,0,22008,2-03c47dc1-bf37-4668-8c58-9847a496f92d,0,0,2-ef5a027b-353d-4071-b10b-f232e1c6b8cf \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/__files/package-info.java b/src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/__files/package-info.java new file mode 100644 index 000000000..df13e5fc2 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/__files/package-info.java @@ -0,0 +1,7 @@ +/** + * All of the files in this directory (aside from this one) are mock responses used by a Wiremock server. The Wiremock + * server is used to emulate a 3rd party API (in this case Auth0) and it makes things less cluttered to store the json + * files in here instead of in the test class code. + */ + +package com.conveyal.datatools.mtc-rtd-mock-respones.__files; diff --git a/src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/__files/rtdGetResponse.json b/src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/__files/rtdGetResponse.json new file mode 100644 index 000000000..018834197 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/mtc-rtd-mock-responses/__files/rtdGetResponse.json @@ -0,0 +1 @@ +{"AgencyId":"DE","AgencyName":"Dumbarton Express Consortium","AgencyPhone":null,"RttAgencyName":"Dumbarton Express","RttEnabled":"Y","AgencyShortName":"Dumbarton","AddressLat":null,"AddressLon":null,"DefaultRouteType":null,"CarrierStatus":null,"AgencyAddress":"AC Transit (administrator of the Dumbarton Express)","AgencyEmail":"new@email.example.com","AgencyUrl":"https://dumbartonexpress.com","AgencyFareUrl":"","EditedBy":"binh.dam@ibigroup.com","EditedDate":"2021-10-29T16:06:07.914796"} \ No newline at end of file