Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Adding output fields for dataset items #51

Merged
merged 5 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 3.2.0 / 2024-11-23

* Improve output fields for all actions where dataset is included
* Updated packages


## 3.1.1 / 2023-11-14

* Fix falsy values in Actor run action
Expand Down
2 changes: 2 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const actorRunFinishedTrigger = require('./src/triggers/actor_run_finished');
const actorsTrigger = require('./src/triggers/actors');
const actorsWithStoreTrigger = require('./src/triggers/actors_with_store');
const getActorAdditionalFieldsTest = require('./src/triggers/actor_additional_fields');
const getActorDatasetOutputFieldsTest = require('./src/triggers/actor_dataset_additional_output_fields');
const taskRunCreate = require('./src/creates/task_run');
const actorRunCreate = require('./src/creates/actor_run');
const scrapeSingleUrlCreate = require('./src/creates/scrape_single_url');
Expand Down Expand Up @@ -46,6 +47,7 @@ const App = {
[actorRunFinishedTrigger.key]: actorRunFinishedTrigger,
[actorsTrigger.key]: actorsTrigger,
[getActorAdditionalFieldsTest.key]: getActorAdditionalFieldsTest,
[getActorDatasetOutputFieldsTest.key]: getActorDatasetOutputFieldsTest,
[actorsWithStoreTrigger.key]: actorsWithStoreTrigger,
},

Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "apify-zapier-integration",
"version": "3.1.1",
"version": "3.2.0",
"description": "Apify integration for Zapier platform",
"homepage": "https://apify.com/",
"author": "Jakub Drobník <[email protected]>",
Expand Down
4 changes: 2 additions & 2 deletions src/apify_helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
};

// Process to subscribe to Apify webhook
const subscribeWebkook = async (z, bundle, condition) => {
const subscribeWebhook = async (z, bundle, condition) => {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just fixing typo

const webhookOpts = {
eventTypes: WEBHOOK_EVENT_TYPE_GROUPS.ACTOR_RUN_TERMINAL,
condition,
Expand Down Expand Up @@ -370,7 +370,7 @@
}
default: {
// This should not happen.
console.log(`Unknown input schema type: ${definition.type}`, definition);

Check warning on line 373 in src/apify_helpers.js

View workflow job for this annotation

GitHub Actions / run-test

Unexpected console statement
// eslint-disable-next-line no-continue
continue;
}
Expand Down Expand Up @@ -534,7 +534,7 @@

module.exports = {
enrichActorRun,
subscribeWebkook,
subscribeWebhook,
unsubscribeWebhook,
getActorRun,
getOrCreateKeyValueStore,
Expand Down
12 changes: 8 additions & 4 deletions src/consts.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,12 @@ const ACTOR_RUN_OUTPUT_FIELDS = [
{ key: 'defaultDatasetId', label: 'Default dataset ID', type: 'string' },
{ key: 'defaultRequestQueueId', label: 'Default request queue ID', type: 'string' },
{ key: 'OUTPUT', label: 'Output' },
{ key: 'datasetItems', label: 'Dataset items' },
{ key: 'datasetItemsFileUrls', label: 'Dataset items file URLs', type: 'string' },
{ key: 'detailsPageUrl', label: 'Details page URL', type: 'string' },
{ key: 'containerUrl', label: 'Container URL', type: 'string' },
{ key: 'datasetItemsFileUrls__xml', label: 'Dataset items XML file URL', type: 'string' },
{ key: 'datasetItemsFileUrls__csv', label: 'Dataset items CSV file URL', type: 'string' },
{ key: 'datasetItemsFileUrls__json', label: 'Dataset items JSON file URL', type: 'string' },
{ key: 'datasetItemsFileUrls__xlsx', label: 'Dataset items Excel file URL', type: 'string' },
];

const SCRAPE_SINGLE_URL_RUN_OUTPUT_FIELDS = [
Expand Down Expand Up @@ -220,8 +222,10 @@ const DATASET_OUTPUT_FIELDS = [
{ key: 'cleanItemCount', label: 'Clean item count', type: 'integer' },
{ key: 'actId', label: 'Actor ID', type: 'string' },
{ key: 'actRunId', label: 'Actor run ID', type: 'string' },
{ key: 'items', label: 'Items' },
{ key: 'itemsFileUrls', label: 'Items file URLs', type: 'string' },
{ key: 'itemsFileUrls__xml', label: 'Items XML file URL', type: 'string' },
{ key: 'itemsFileUrls__csv', label: 'Items CSV file URL', type: 'string' },
{ key: 'itemsFileUrls__json', label: 'Items JSON file URL', type: 'string' },
{ key: 'itemsFileUrls__xlsx', label: 'Items Excel file URL', type: 'string' },
];

/**
Expand Down
6 changes: 5 additions & 1 deletion src/creates/actor_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const {
prefixInputFieldKey,
} = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getActorDatasetOutputFields } = require('../output_fields');

const runActor = async (z, bundle) => {
const { actorId, runSync, inputBody, inputContentType, build, timeoutSecs, memoryMbytes } = bundle.inputData;
Expand Down Expand Up @@ -123,6 +124,9 @@ module.exports = {
perform: runActor,

sample: ACTOR_RUN_SAMPLE,
outputFields: ACTOR_RUN_OUTPUT_FIELDS,
outputFields: [
...ACTOR_RUN_OUTPUT_FIELDS,
getActorDatasetOutputFields,
],
},
};
6 changes: 5 additions & 1 deletion src/creates/task_run.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const { APIFY_API_ENDPOINTS, TASK_RUN_SAMPLE, TASK_RUN_OUTPUT_FIELDS } = require('../consts');
const { enrichActorRun } = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getTaskDatasetOutputFields } = require('../output_fields');

const RAW_INPUT_LABEL = 'Input JSON overrides';

Expand Down Expand Up @@ -85,6 +86,9 @@ module.exports = {
perform: runTask,

sample: TASK_RUN_SAMPLE,
outputFields: TASK_RUN_OUTPUT_FIELDS,
outputFields: [
...TASK_RUN_OUTPUT_FIELDS,
getTaskDatasetOutputFields,
],
},
};
81 changes: 81 additions & 0 deletions src/output_fields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
const _ = require('lodash');
const { ACTOR_JOB_STATUSES } = require('@apify/consts');
const { getDatasetItems } = require('./apify_helpers');
const { wrapRequestWithRetries } = require('./request_helpers');
const { APIFY_API_ENDPOINTS } = require('./consts');
const { convertPlainObjectToFieldSchema } = require('./zapier_helpers');

/**
* Download items from dataset and create FieldSchema out of them.
* @param {string} datasetId
* @returns {Promise<*[]>}
*/
const getDatasetItemsOutputFields = async (z, datasetId, actorId, keyPrefix = 'datasetItems[]') => {
let datasetItems;
try {
datasetItems = await getDatasetItems(z, datasetId, {
limit: 10,
}, actorId);
} catch (err) {
z.console.error('Error while fetching dataset items', err);
// Return default output fields, if there is no successful run yet or any other error.
return [];
}

const { items } = datasetItems;
// If there are no items, return default output fields.
if (items.length === 0) return [];
// NOTE: We are using the first 10 items to generate output fields to cover most of the cases.
const mergedItem = _.merge({}, ...items);
return convertPlainObjectToFieldSchema(mergedItem, keyPrefix);
};

const getActorDatasetOutputFields = async (z, bundle) => {
const { actorId } = bundle.inputData;
let lastSuccessDatasetItems;
try {
lastSuccessDatasetItems = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.actors}/${actorId}/runs/last`,
params: {
status: ACTOR_JOB_STATUSES.SUCCEEDED,
},
});
} catch (err) {
// 404 status = There is not successful run yet.
if (err.status !== 404) {
z.console.error('Error while fetching dataset items', err);
}
// Return default output fields, if there is no successful run yet or any other error.
return [];
}
const { data: run } = lastSuccessDatasetItems;
return getDatasetItemsOutputFields(z, run.defaultDatasetId, actorId);
};

const getTaskDatasetOutputFields = async (z, bundle) => {
const { taskId } = bundle.inputData;
let lastSuccessDatasetItems;
try {
lastSuccessDatasetItems = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.tasks}/${taskId}/runs/last`,
params: {
status: ACTOR_JOB_STATUSES.SUCCEEDED,
},
});
} catch (err) {
// 404 status = There is not successful run yet.
if (err.status !== 404) {
z.console.error('Error while fetching dataset items', err);
}
// Return default output fields, if there is no successful run yet or any other error.
return [];
}
const { data: run } = lastSuccessDatasetItems;
return getDatasetItemsOutputFields(z, run.defaultDatasetId, run.actId);
};

module.exports = {
getDatasetItemsOutputFields,
getActorDatasetOutputFields,
getTaskDatasetOutputFields,
};
6 changes: 5 additions & 1 deletion src/searches/actor_last_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const { ACTOR_JOB_STATUSES } = require('@apify/consts');
const { ACTOR_RUN_SAMPLE, ACTOR_RUN_OUTPUT_FIELDS, APIFY_API_ENDPOINTS } = require('../consts');
const { enrichActorRun } = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getActorDatasetOutputFields } = require('../output_fields');

const getLastActorRun = async (z, bundle) => {
const { actorId, status } = bundle.inputData;
Expand Down Expand Up @@ -51,6 +52,9 @@ module.exports = {
perform: getLastActorRun,

sample: ACTOR_RUN_SAMPLE,
outputFields: ACTOR_RUN_OUTPUT_FIELDS,
outputFields: [
...ACTOR_RUN_OUTPUT_FIELDS,
getActorDatasetOutputFields,
],
},
};
43 changes: 27 additions & 16 deletions src/searches/fetch_items.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,33 @@ const { APIFY_API_ENDPOINTS, DATASET_PUBLISH_FIELDS,
DATASET_OUTPUT_FIELDS, DATASET_SAMPLE } = require('../consts');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getDatasetItems } = require('../apify_helpers');
const { getDatasetItemsOutputFields } = require('../output_fields');

const getItems = async (z, bundle) => {
const { datasetIdOrName, limit, offset } = bundle.inputData;
let dataset;
const findDatasetByNameOrId = async (z, datasetIdOrName) => {
// The first try to get dataset by ID.
try {
const datasetResponse = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.datasets}/${datasetIdOrName}`,
method: 'GET',
});
dataset = datasetResponse.data;
return datasetResponse.data;
} catch (err) {
if (!err.message.includes('not found')) throw err;
}

// The second creates dataset with name, in case datasetId not found.
if (!dataset) {
const storeResponse = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.datasets}`,
method: 'POST',
params: {
name: datasetIdOrName,
},
});
dataset = storeResponse.data;
}
const storeResponse = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.datasets}`,
method: 'POST',
params: {
name: datasetIdOrName,
},
});
return storeResponse.data;
};

const getItems = async (z, bundle) => {
const { datasetIdOrName, limit, offset } = bundle.inputData;
const dataset = await findDatasetByNameOrId(z, datasetIdOrName);

// NOTE: Because testing user had _id instead of id in data and we run integration tests under this user.
dataset.id = dataset.id || dataset._id;
Expand All @@ -44,6 +45,13 @@ const getItems = async (z, bundle) => {
}];
};

const getAdditionalDatasetItemsOutputFields = async (z, bundle) => {
const { datasetIdOrName } = bundle.inputData;
const dataset = await findDatasetByNameOrId(z, datasetIdOrName);

return getDatasetItemsOutputFields(z, dataset.id, dataset.actId, 'items[]');
};

module.exports = {
key: 'fetchDatasetItems',
noun: 'Dataset Items',
Expand Down Expand Up @@ -80,6 +88,9 @@ module.exports = {

perform: getItems,
sample: DATASET_SAMPLE,
outputFields: DATASET_OUTPUT_FIELDS,
outputFields: [
...DATASET_OUTPUT_FIELDS,
getAdditionalDatasetItemsOutputFields,
],
},
};
6 changes: 5 additions & 1 deletion src/searches/task_last_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const { ACTOR_JOB_STATUSES } = require('@apify/consts');
const { TASK_RUN_SAMPLE, TASK_RUN_OUTPUT_FIELDS, APIFY_API_ENDPOINTS } = require('../consts');
const { enrichActorRun } = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getTaskDatasetOutputFields } = require('../output_fields');

const getLastTaskRun = async (z, bundle) => {
const { taskId, status } = bundle.inputData;
Expand Down Expand Up @@ -53,6 +54,9 @@ module.exports = {
perform: getLastTaskRun,

sample: TASK_RUN_SAMPLE,
outputFields: TASK_RUN_OUTPUT_FIELDS,
outputFields: [
...TASK_RUN_OUTPUT_FIELDS,
getTaskDatasetOutputFields,
],
},
};
20 changes: 20 additions & 0 deletions src/triggers/actor_dataset_additional_output_fields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const { getActorDatasetOutputFields } = require('../output_fields');

/**
* This is hidden trigger is used to test getActorDatasetOutputFields function.
* It is only way how to test function like that in Zapier context,
* check my issue https://github.com/zapier/zapier-platform-cli/issues/418
*/
module.exports = {
key: 'getActorDatasetOutputFieldsTest',
noun: 'Dataset Output Additional Fields',
display: {
label: 'Dataset Output Additional Fields',
description: 'This is a hidden trigger used to test getActorDatasetOutputFields function.',
hidden: true,
},
operation: {
// since this is a "hidden" trigger, there aren't any inputFields needed
perform: getActorDatasetOutputFields,
},
};
14 changes: 9 additions & 5 deletions src/triggers/actor_run_finished.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
const { ACTOR_JOB_STATUSES } = require('@apify/consts');
const { APIFY_API_ENDPOINTS, TASK_RUN_SAMPLE, TASK_RUN_OUTPUT_FIELDS } = require('../consts');
const { enrichActorRun, subscribeWebkook, unsubscribeWebhook, getActorRun } = require('../apify_helpers');
const { APIFY_API_ENDPOINTS, ACTOR_RUN_SAMPLE, ACTOR_RUN_OUTPUT_FIELDS } = require('../consts');
const { enrichActorRun, subscribeWebhook, unsubscribeWebhook, getActorRun } = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getActorDatasetOutputFields } = require('../output_fields');

const getFallbackActorRuns = async (z, bundle) => {
const response = await wrapRequestWithRetries(z.request, {
Expand Down Expand Up @@ -41,14 +42,17 @@ module.exports = {
},
],
type: 'hook',
performSubscribe: (z, bundle) => subscribeWebkook(z, bundle, { actorId: bundle.inputData.actorId }),
performSubscribe: (z, bundle) => subscribeWebhook(z, bundle, { actorId: bundle.inputData.actorId }),
performUnsubscribe: unsubscribeWebhook,
// Perform is called after each hit to the webhook API
perform: getActorRun,
// PerformList is used to get testing data for users in Zapier app
performList: getFallbackActorRuns,
// In cases where Zapier needs to show an example record to the user, but we are unable to get a live example
sample: TASK_RUN_SAMPLE,
outputFields: TASK_RUN_OUTPUT_FIELDS,
sample: ACTOR_RUN_SAMPLE,
outputFields: [
...ACTOR_RUN_OUTPUT_FIELDS,
getActorDatasetOutputFields,
],
},
};
Loading
Loading