Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Adding output fields for dataset items #51

Merged
merged 5 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const actorRunFinishedTrigger = require('./src/triggers/actor_run_finished');
const actorsTrigger = require('./src/triggers/actors');
const actorsWithStoreTrigger = require('./src/triggers/actors_with_store');
const getActorAdditionalFieldsTest = require('./src/triggers/actor_additional_fields');
const getDatasetOutputFieldsTest = require('./src/triggers/dataset_additional_output_fields');
const taskRunCreate = require('./src/creates/task_run');
const actorRunCreate = require('./src/creates/actor_run');
const scrapeSingleUrlCreate = require('./src/creates/scrape_single_url');
Expand Down Expand Up @@ -46,6 +47,7 @@ const App = {
[actorRunFinishedTrigger.key]: actorRunFinishedTrigger,
[actorsTrigger.key]: actorsTrigger,
[getActorAdditionalFieldsTest.key]: getActorAdditionalFieldsTest,
[getDatasetOutputFieldsTest.key]: getDatasetOutputFieldsTest,
[actorsWithStoreTrigger.key]: actorsWithStoreTrigger,
},

Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "apify-zapier-integration",
"version": "3.1.1",
"version": "3.1.2",
"description": "Apify integration for Zapier platform",
"homepage": "https://apify.com/",
"author": "Jakub Drobník <[email protected]>",
Expand Down
11 changes: 8 additions & 3 deletions src/consts.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,12 @@ const ACTOR_RUN_OUTPUT_FIELDS = [
{ key: 'defaultDatasetId', label: 'Default dataset ID', type: 'string' },
{ key: 'defaultRequestQueueId', label: 'Default request queue ID', type: 'string' },
{ key: 'OUTPUT', label: 'Output' },
{ key: 'datasetItems', label: 'Dataset items' },
{ key: 'datasetItemsFileUrls', label: 'Dataset items file URLs', type: 'string' },
{ key: 'detailsPageUrl', label: 'Details page URL', type: 'string' },
{ key: 'containerUrl', label: 'Container URL', type: 'string' },
{ key: 'datasetItemsFileUrls__xml', label: 'Dataset items XML file URL', type: 'string' },
{ key: 'datasetItemsFileUrls__csv', label: 'Dataset items CSV file URL', type: 'string' },
{ key: 'datasetItemsFileUrls__json', label: 'Dataset items JSON file URL', type: 'string' },
{ key: 'datasetItemsFileUrls__xlsx', label: 'Dataset items Excel file URL', type: 'string' },
];

const SCRAPE_SINGLE_URL_RUN_OUTPUT_FIELDS = [
Expand Down Expand Up @@ -220,8 +222,11 @@ const DATASET_OUTPUT_FIELDS = [
{ key: 'cleanItemCount', label: 'Clean item count', type: 'integer' },
{ key: 'actId', label: 'Actor ID', type: 'string' },
{ key: 'actRunId', label: 'Actor run ID', type: 'string' },
{ key: 'items', label: 'Items' },
{ key: 'itemsFileUrls', label: 'Items file URLs', type: 'string' },
{ key: 'itemsFileUrls_xml', label: 'Items XML file URL', type: 'string' },
{ key: 'itemsFileUrls_csv', label: 'Items CSV file URL', type: 'string' },
{ key: 'itemsFileUrls_json', label: 'Items JSON file URL', type: 'string' },
{ key: 'itemsFileUrls_xlsx', label: 'Items Excel file URL', type: 'string' },
];

/**
Expand Down
6 changes: 5 additions & 1 deletion src/creates/actor_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const {
prefixInputFieldKey,
} = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getDatasetOutputFields } = require('../output_fields');

const runActor = async (z, bundle) => {
const { actorId, runSync, inputBody, inputContentType, build, timeoutSecs, memoryMbytes } = bundle.inputData;
Expand Down Expand Up @@ -123,6 +124,9 @@ module.exports = {
perform: runActor,

sample: ACTOR_RUN_SAMPLE,
outputFields: ACTOR_RUN_OUTPUT_FIELDS,
outputFields: [
...ACTOR_RUN_OUTPUT_FIELDS,
getDatasetOutputFields,
],
},
};
58 changes: 58 additions & 0 deletions src/output_fields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
const _ = require('lodash');
const { ACTOR_JOB_STATUSES } = require('@apify/consts');
const { getDatasetItems } = require('./apify_helpers');
const { wrapRequestWithRetries } = require('./request_helpers');
const { APIFY_API_ENDPOINTS } = require('./consts');
const { convertPlainObjectToFieldSchema } = require('./zapier_helpers');

/**
* Transforms object items to output fields.
* @param {string} datasetId
* @returns {Promise<*[]>}
*/
const getDatasetItemsOutputFields = async (z, datasetId, actorId, keyPrefix = 'datasetItems[]') => {
let datasetItems;
try {
datasetItems = await getDatasetItems(z, datasetId, {
limit: 10,
}, actorId);
} catch (err) {
z.console.error('Error while fetching dataset items', err);
// Return default output fields, if there is no successful run yet or any other error.
return [];
}

const { items } = datasetItems;
// If there are no items, return default output fields.
if (items.length === 0) return [];
// NOTE: We are using the first 10 items to generate output fields to cover most of the cases.
const mergedItem = _.merge({}, ...items);
return convertPlainObjectToFieldSchema(mergedItem, keyPrefix);
};

const getDatasetOutputFields = async (z, bundle) => {
const { actorId } = bundle.inputData;
let lastSuccessDatasetItems;
try {
lastSuccessDatasetItems = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.actors}/${actorId}/runs/last`,
params: {
status: ACTOR_JOB_STATUSES.SUCCEEDED,
},
});
} catch (err) {
// 404 status = There is not successful run yet.
if (err.status !== 404) {
z.console.error('Error while fetching dataset items', err);
}
// Return default output fields, if there is no successful run yet or any other error.
return [];
}
const { data: run } = lastSuccessDatasetItems;
return getDatasetItemsOutputFields(z, run.defaultDatasetId, actorId);
};

module.exports = {
getDatasetItemsOutputFields,
getDatasetOutputFields,
};
6 changes: 5 additions & 1 deletion src/searches/actor_last_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const { ACTOR_JOB_STATUSES } = require('@apify/consts');
const { ACTOR_RUN_SAMPLE, ACTOR_RUN_OUTPUT_FIELDS, APIFY_API_ENDPOINTS } = require('../consts');
const { enrichActorRun } = require('../apify_helpers');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getDatasetOutputFields } = require('../output_fields');

const getLastActorRun = async (z, bundle) => {
const { actorId, status } = bundle.inputData;
Expand Down Expand Up @@ -51,6 +52,9 @@ module.exports = {
perform: getLastActorRun,

sample: ACTOR_RUN_SAMPLE,
outputFields: ACTOR_RUN_OUTPUT_FIELDS,
outputFields: [
...ACTOR_RUN_OUTPUT_FIELDS,
getDatasetOutputFields,
],
},
};
43 changes: 27 additions & 16 deletions src/searches/fetch_items.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,33 @@ const { APIFY_API_ENDPOINTS, DATASET_PUBLISH_FIELDS,
DATASET_OUTPUT_FIELDS, DATASET_SAMPLE } = require('../consts');
const { wrapRequestWithRetries } = require('../request_helpers');
const { getDatasetItems } = require('../apify_helpers');
const { getDatasetItemsOutputFields } = require('../output_fields');

const getItems = async (z, bundle) => {
const { datasetIdOrName, limit, offset } = bundle.inputData;
let dataset;
const findDatasetByNameOrId = async (z, datasetIdOrName) => {
// The first try to get dataset by ID.
try {
const datasetResponse = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.datasets}/${datasetIdOrName}`,
method: 'GET',
});
dataset = datasetResponse.data;
return datasetResponse.data;
} catch (err) {
if (!err.message.includes('not found')) throw err;
}

// The second creates dataset with name, in case datasetId not found.
if (!dataset) {
const storeResponse = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.datasets}`,
method: 'POST',
params: {
name: datasetIdOrName,
},
});
dataset = storeResponse.data;
}
const storeResponse = await wrapRequestWithRetries(z.request, {
url: `${APIFY_API_ENDPOINTS.datasets}`,
method: 'POST',
params: {
name: datasetIdOrName,
},
});
return storeResponse.data;
};

const getItems = async (z, bundle) => {
const { datasetIdOrName, limit, offset } = bundle.inputData;
const dataset = await findDatasetByNameOrId(z, datasetIdOrName);

// NOTE: Because testing user had _id instead of id in data and we run integration tests under this user.
dataset.id = dataset.id || dataset._id;
Expand All @@ -44,6 +45,13 @@ const getItems = async (z, bundle) => {
}];
};

const getAdditionalDatasetItemsOutputFields = async (z, bundle) => {
const { datasetIdOrName } = bundle.inputData;
const dataset = await findDatasetByNameOrId(z, datasetIdOrName);

return getDatasetItemsOutputFields(z, dataset.id, dataset.actId, 'items[]');
};

module.exports = {
key: 'fetchDatasetItems',
noun: 'Dataset Items',
Expand Down Expand Up @@ -80,6 +88,9 @@ module.exports = {

perform: getItems,
sample: DATASET_SAMPLE,
outputFields: DATASET_OUTPUT_FIELDS,
outputFields: [
...DATASET_OUTPUT_FIELDS,
getAdditionalDatasetItemsOutputFields,
],
},
};
20 changes: 20 additions & 0 deletions src/triggers/dataset_additional_output_fields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const { getDatasetOutputFields } = require('../output_fields');

/**
* This is hidden trigger is used to test getDatasetOutputFields function.
* It is only way how to test function like that in Zapier context,
* check my issue https://github.com/zapier/zapier-platform-cli/issues/418
*/
module.exports = {
key: 'getDatasetOutputFieldsTest',
noun: 'Dataset Output Additional Fields',
display: {
label: 'Dataset Output Additional Fields',
description: 'This is a hidden trigger used to test getDatasetOutputFields function.',
hidden: true,
},
operation: {
// since this is a "hidden" trigger, there aren't any inputFields needed
perform: getDatasetOutputFields,
},
};
48 changes: 48 additions & 0 deletions src/zapier_helpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const _ = require('lodash');
const dayjs = require('dayjs');
/**
* Converts a plain object to an array of field schema objects.
* @param object
* @param keyPrefix
* @returns {*[]}
*/
const convertPlainObjectToFieldSchema = (object, keyPrefix = '') => {
if (!_.isPlainObject(object)) return [];

const fieldSchema = [];

Object.entries(object).forEach(([key, value]) => {
const fullKey = keyPrefix ? `${keyPrefix}${key}` : key;

if (_.isString(value)) {
fieldSchema.push({ key: fullKey, type: dayjs(value).isValid() ? 'datetime' : 'string' });
} else if (_.isNumber(value)) {
fieldSchema.push({ key: fullKey, type: 'number' });
} else if (_.isBoolean(value)) {
fieldSchema.push({ key: fullKey, type: 'boolean' });
} else if (_.isArray(value)) {
// Process array elements. If array contains objects, use keys with []
if (_.isPlainObject(value[0])) {
fieldSchema.push(...convertPlainObjectToFieldSchema(value[0], `${fullKey}[]`));
} else {
// Array of primitives or datetime
const type = _.isString(value[0]) && dayjs(value[0]).isValid()
? 'datetime'
: typeof value[0];
fieldSchema.push({ key: fullKey, type, list: true });
}
} else if (_.isPlainObject(value)) {
// For nested objects, recursively process fields
fieldSchema.push(...convertPlainObjectToFieldSchema(value, `${fullKey}__`));
} else {
// Any other object, null and possibly undefined.
fieldSchema.push({ key: fullKey });
}
});

return fieldSchema;
};

module.exports = {
convertPlainObjectToFieldSchema,
};
44 changes: 40 additions & 4 deletions test/creates/actor_run.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ describe('create actor run', () => {
testActorId = actor.id;
});

after(async () => {
await apifyClient.actor(testActorId).delete();
});

it('load correctly Actors with Actors from store with hidden trigger', async () => {
const bundle = {
authData: {
Expand Down Expand Up @@ -149,6 +153,42 @@ describe('create actor run', () => {
expect(waitUntilField.default).to.be.equal(JSON.stringify(waitUntilFieldSchema.prefill, null, 2));
}).timeout(120000);

it('loading of dynamic output fields for dataset items work', async () => {
const bundle = {
authData: {
token: TEST_USER_TOKEN,
},
inputData: {
// Actor with input schema
actorId: testActorId,
},
};
const items = [
{ a: 1, b: 2, c: 'c', d: 'd' },
{ a: 2, b: 3, c: 'c', d: 'd' },
{ a: 3, b: 4, e: { a: 1, b: 2 } },
{ a: 4, b: 5, f: new Date() },
{ a: 4, b: 5, g: ['a', 'b'], h: [{ a: 1, b: 2 }] },
];
// Run an Actor, the output items will be generated based on latest success run
await apifyClient.actor(testActorId).call({
datasetItems: items,
});
const fields = await appTester(App.triggers.getDatasetOutputFieldsTest.operation.perform, bundle);
expect(fields).to.be.eql([
{ key: 'datasetItems[]a', type: 'number' },
{ key: 'datasetItems[]b', type: 'number' },
{ key: 'datasetItems[]c', type: 'string' },
{ key: 'datasetItems[]d', type: 'string' },
{ key: 'datasetItems[]e__a', type: 'number' },
{ key: 'datasetItems[]e__b', type: 'number' },
{ key: 'datasetItems[]f', type: 'datetime' },
{ key: 'datasetItems[]g', type: 'string', list: true },
{ key: 'datasetItems[]h[]a', type: 'number' },
{ key: 'datasetItems[]h[]b', type: 'number' },
]);
}).timeout(120000);

it('runSync work', async () => {
const runOptions = {
build: 'latest',
Expand Down Expand Up @@ -235,8 +275,4 @@ describe('create actor run', () => {
expect(testResult).to.have.all.keys(_.without(Object.keys(ACTOR_RUN_SAMPLE), 'exitCode'));
expect(testResult.finishedAt).to.be.eql(null);
}).timeout(50000);

after(async () => {
await apifyClient.actor(testActorId).delete();
});
});
6 changes: 5 additions & 1 deletion test/helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ const createAndBuildActor = async () => {
Apify.main(async (context) => {
const input = await Apify.getInput();
console.log('It works.');
await Apify.pushData({ foo: 'bar' });
if (input && input.datasetItems) {
await Apify.pushData(input.datasetItems);
} else {
await Apify.pushData({ foo: 'bar' });
}
if (input && input.outputRandomFile) {
await Apify.setValue('OUTPUT', 'blabla', { contentType: 'text/plain' });
} else {
Expand Down
Loading
Loading