apify · MFori · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/apify-api/openapi/components/schemas/datasets/DatasetFieldStatistics.yaml b/apify-api/openapi/components/schemas/datasets/DatasetFieldStatistics.yaml
@@ -0,0 +1,19 @@
+title: DatasetFieldStatistics
+type: object
+properties:
+  min:
+    type: number
+    description: 'Minimum value of the field. For numbers, this is calculated directly. For strings, this is the length of the shortest string. For arrays, this is the length of the shortest array. For objects, this is the number of keys in the smallest object.'
+    nullable: true
+  max:
+    type: number
+    description: 'Maximum value of the field. For numbers, this is calculated directly. For strings, this is the length of the longest string. For arrays, this is the length of the longest array. For objects, this is the number of keys in the largest object.'
+    nullable: true
+  nullCount:
+    type: number
+    description: 'How many items in the dataset have a null value for this field.'
+    nullable: true
+  emptyCount:
+    type: number
+    description: 'How many items in the dataset are `undefined`, meaning that for example empty string is not considered empty.'
+    nullable: true
diff --git a/apify-api/openapi/components/schemas/datasets/GetDatasetFieldStatisticsResponse.yaml b/apify-api/openapi/components/schemas/datasets/GetDatasetFieldStatisticsResponse.yaml
@@ -0,0 +1,21 @@
+title: GetDatasetFieldStatisticsResponse
+required:
+  - data
+type: object
+properties:
+  data:
+    type: object
+    required:
+      - fields
+      - statistics
+    properties:
+      fields:
+        type: array
+        items:
+          type: string
+        description: 'Keys of the fields for which the statistics are provided.'
+      statistics:
+        type: object
+        additionalProperties:
+          $ref: ./DatasetFieldStatistics.yaml
+        description: 'Statistics for each field. The keys are the same as in the `fields` array.'
diff --git a/apify-api/openapi/components/tags.yaml b/apify-api/openapi/components/tags.yaml
@@ -758,6 +758,10 @@
   x-legacy-doc-urls:
   - '#/reference/datasets/item-collection'
   x-trait: 'true'
+- name: Datasets/Field statistics
+  x-displayName: Field statistics
+  x-parent-tag-name: Datasets
+  x-trait: 'true'
 - name: Request queues
   x-displayName: Request queues
   x-legacy-doc-urls:

diff --git a/apify-api/openapi/components/x-tag-groups.yaml b/apify-api/openapi/components/x-tag-groups.yaml
@@ -62,6 +62,7 @@
   - Datasets/Dataset collection
   - Datasets/Dataset
   - Datasets/Item collection
+  - Datasets/Field statistics
 - name: Request queues
   tags:
   - Request queues

diff --git a/apify-api/openapi/openapi.yaml b/apify-api/openapi/openapi.yaml
@@ -566,6 +566,8 @@ paths:
     $ref: 'paths/datasets/datasets@{datasetId}.yaml'
   '/v2/datasets/{datasetId}/items':
     $ref: 'paths/datasets/datasets@{datasetId}@items.yaml'
+  '/v2/datasets/{datasetId}/field-statistics':
+    $ref: 'paths/datasets/datasets@{datasetId}@field-statistics.yaml'
   /v2/request-queues:
     $ref: paths/request-queues/request-queues.yaml
   '/v2/request-queues/{queueId}':

diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}@field-statistics.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}@field-statistics.yaml
@@ -0,0 +1,55 @@
+get:
+  tags:
+    - Datasets/Field statistics
+  summary: Get field statistics
+  description: |
+    Returns field statistics for given dataset.
+    When you configure the dataset [fields schema](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation), we generate a field list and measure the statistics such as `min`, `max`, `nullCount` and `emptyCount`.
+
+    See dataset fields schema [documentation](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation#dataset-field-statistics) for more information.
+
+  operationId: dataset_field_statistics_get
+  parameters:
+    - name: datasetId
+      in: path
+      description: Dataset ID or `username~dataset-name`.
+      required: true
+      style: simple
+      schema:
+        type: string
+        example: WkzbQMuFYuamGv3YF
+    -   name: token
+        in: query
+        description: |
+            API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`.
+        style: form
+        explode: true
+        schema:
+            type: string
+            example: soSkq9ekdmfOslopH
+  responses:
+    '200':
+      description: ''
+      content:
+        application/json:
+          schema:
+            $ref: "../../components/schemas/datasets/GetDatasetFieldStatisticsResponse.yaml"
+          example:
+            data:
+              fields: ["name", "price"]
+              statistics: {
+                name: {
+                  nullCount: 122
+                },
+                price: {
+                  min: 59,
+                  max: 89
+                }
+              }
+# TODO: add clients methods
+#  x-js-parent: DatasetClient
+#  x-js-name: fieldStatistics
+#  x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#fieldStatistics
+#  x-py-parent: DatasetClientAsync
+#  x-py-name: field_statistics
+#  x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#field_statistics
diff --git a/package-lock.json b/package-lock.json