diff --git a/.github/test_files/config_rasa_demo.yml b/.github/test_files/config_rasa_demo.yml new file mode 100644 index 0000000..09f976d --- /dev/null +++ b/.github/test_files/config_rasa_demo.yml @@ -0,0 +1,49 @@ +language: en +pipeline: +- name: WhitespaceTokenizer +- name: RegexFeaturizer +- name: LexicalSyntacticFeaturizer +- name: CountVectorsFeaturizer + OOV_token: oov + token_pattern: (?u)\b\w+\b +- name: CountVectorsFeaturizer + analyzer: char_wb + min_ngram: 1 + max_ngram: 4 +- name: DIETClassifier + epochs: 5 + ranking_length: 5 +- name: DucklingEntityExtractor + url: http://localhost:8000 + dimensions: + - email + - number + - amount-of-money +- name: EntitySynonymMapper +- name: ResponseSelector + retrieval_intent: out_of_scope + scale_loss: false + epochs: 5 +- name: ResponseSelector + retrieval_intent: faq + scale_loss: false + epochs: 5 +- name: ResponseSelector + retrieval_intent: chitchat + scale_loss: false + epochs: 5 +- name: FallbackClassifier + threshold: 0.7 +policies: +- name: TEDPolicy + max_history: 5 + epochs: 10 + batch_size: + - 32 + - 64 +- max_history: 6 + name: AugmentedMemoizationPolicy +- name: RulePolicy + core_fallback_threshold: 0.3 + core_fallback_action_name: "action_default_fallback" + enable_fallback_prediction: True diff --git a/.github/test_files/config_rasa_x_demo.yml b/.github/test_files/config_rasa_x_demo.yml new file mode 100644 index 0000000..eba35bc --- /dev/null +++ b/.github/test_files/config_rasa_x_demo.yml @@ -0,0 +1,36 @@ +# Configuration for Rasa NLU. +# https://rasa.com/docs/rasa/nlu/components/ +language: en + +pipeline: +# # No configuration for the NLU pipeline was provided. The following default pipeline was used to train your model. +# # If you'd like to customize it, uncomment and adjust the pipeline. +# # See https://rasa.com/docs/rasa/tuning-your-model for more information. + - name: WhitespaceTokenizer + - name: RegexFeaturizer + - name: LexicalSyntacticFeaturizer + - name: CountVectorsFeaturizer + - name: CountVectorsFeaturizer + analyzer: char_wb + min_ngram: 1 + max_ngram: 4 + - name: DIETClassifier + epochs: 5 + - name: EntitySynonymMapper + - name: ResponseSelector + epochs: 5 + - name: FallbackClassifier + threshold: 0.3 + ambiguity_threshold: 0.1 + +# Configuration for Rasa Core. +# https://rasa.com/docs/rasa/core/policies/ +policies: +# # No configuration for policies was provided. The following default policies were used to train your model. +# # If you'd like to customize them, uncomment and adjust the policies. +# # See https://rasa.com/docs/rasa/policies for more information. + - name: MemoizationPolicy + - name: TEDPolicy + max_history: 5 + epochs: 5 + - name: RulePolicy diff --git a/.github/test_files/report_to_compare.json b/.github/test_files/report_to_compare.json new file mode 100644 index 0000000..2120e73 --- /dev/null +++ b/.github/test_files/report_to_compare.json @@ -0,0 +1,76 @@ +{ + "default": { + "Rasa Demo - NLU": { + "entity_prediction": { + "macro_avg": { + "f1-score": 0.2378897757789324, + "precision": 0.7177262292340671, + "recall": 0.630076550644482, + "support": 1451 + }, + "micro_avg": { + "f1-score": 0.557902566009669, + "precision": 0.8231017770597738, + "recall": 0.7022742935906272, + "support": 1451 + }, + "weighted_avg": { + "f1-score": 0.6357964876470346, + "precision": 0.8108170836964997, + "recall": 0.7022742935906272, + "support": 1451 + } + }, + "intent_classification": { + "macro_avg": { + "f1-score": 0.78507804141552495, + "precision": 0.6249297611496761, + "recall": 0.22805363055392253, + "support": 4950 + }, + "micro_avg": { + "f1-score": 0.5612119662490412, + "precision": 0.9004178272980501, + "recall": 0.5224242424242425, + "support": 4950 + }, + "weighted_avg": { + "f1-score": 0.5845322720747205, + "precision": 0.8560612861880288, + "recall": 0.5224242424242425, + "support": 4950 + } + }, + "response_selection": { + "accuracy": 0.657748611812216, + "macro_avg": { + "f1-score": 0.4511394032023792, + "precision": 0.541429844814056, + "recall": 0.47505801532174247, + "support": 1981 + }, + "weighted_avg": { + "f1-score": 0.6172915897390211, + "precision": 0.6469121522031172, + "recall": 0.657748611812216, + "support": 1981 + } + }, + "story_prediction": { + "accuracy": 0.51005291005291, + "macro_avg": { + "f1-score": 0.5328584995251662, + "precision": 0.8363425925925926, + "recall": 0.8482142857142857, + "support": 189 + }, + "weighted_avg": { + "f1-score": 0.5091936869714647, + "precision": 0.9186067019400354, + "recall": 0.91005291005291, + "support": 189 + } + } + } + } + } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b878c34 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,161 @@ +name: Continuous Integration +on: + push: + branches: + - master + tags: + - '*' + pull_request: + +env: + RASA_X_DEMO_VERSION: '0.33.0' + RASA_IMAGE_TAG: '2.1.3-full' + +jobs: + test_action_init: + runs-on: ubuntu-latest + name: Test GH action - rasa init + + steps: + - uses: actions/checkout@v2 + + - name: Create a new project + run: | + mkdir rasa-project + chmod 777 rasa-project + docker run --rm -v ${{ github.workspace }}/rasa-project:/rasa-project rasa/rasa:${{ env.RASA_IMAGE_TAG }} init --no-prompt --init-dir /rasa-project + + - name: Download Rasa X model + run: | + wget https://github.com/RasaHQ/rasa-x-demo/blob/0.33.0/models/model.tar.gz?raw=true \ + -O test_model.tar.gz + + - id: action + name: Rasa Test & Train - rasa init + uses: ./ + with: + rasa_version: ${{ env.RASA_IMAGE_TAG }} + workspace: ${{ github.workspace }}/rasa-project + test_type: all + github_token: ${{ secrets.GITHUB_TOKEN }} + configuration_name: 'rasa-init' + publish_summary: 'false' + + - name: Checkout output + run: | + echo ${{ fromJSON(steps.action.outputs.report).default['rasa-init'].intent_classification.accuracy }} + + - name: Checkout output - if + if: fromJSON(steps.action.outputs.report).default['rasa-init'].intent_classification.accuracy <= 0.5 + run: | + echo "::error::${{ fromJSON(steps.action.outputs.report).default['rasa-init'].intent_classification.accuracy }}" + exit 1 + + # Test the GH action on Rasa X Demo data + # https://github.com/RasaHQ/rasa-x-demo + test_action_x: + runs-on: ubuntu-latest + name: Test GH action - Rasa X Demo + + steps: + - uses: actions/checkout@v2 + + - name: Checkout Rasa X Demo + uses: actions/checkout@v2 + with: + repository: RasaHQ/rasa-x-demo + path: rasa-x-demo + ref: ${{ env.RASA_X_DEMO_VERSION }} + + - name: Download Rasa X model + run: | + wget https://github.com/RasaHQ/rasa-x-demo/blob/0.33.0/models/model.tar.gz?raw=true \ + -O test_model.tar.gz + + - id: action + name: Rasa Test & Train - Rasa X Demo + uses: ./ + with: + rasa_version: '2.0.0-full' + workspace: ${{ github.workspace }}/rasa-x-demo + test_type: all + github_token: ${{ secrets.GITHUB_TOKEN }} + model: github_workspace/test_model.tar.gz + configuration: github_workspace/test_files/config_rasa_x_demo.yml + configuration_name: 'Rasa X Demo' + publish_summary: 'false' + + - name: Checkout output + run: | + echo ${{ fromJSON(steps.action.outputs.report).default['Rasa X Demo'].intent_classification.accuracy }} + + - name: Checkout output - if + if: fromJSON(steps.action.outputs.report).default['Rasa X Demo'].intent_classification.accuracy <= 0.5 + run: | + echo "::error::${{ fromJSON(steps.action.outputs.report).default['Rasa X Demo'].intent_classification.accuracy }}" + exit 1 + + # Test the GH action on Rasa Demo data + # https://github.com/RasaHQ/rasa-x-demo + # Test type: NLU + test_action: + runs-on: ubuntu-latest + name: Test GH action - Rasa Demo - NLU + needs: [test_action_x] + + steps: + - uses: actions/checkout@v2 + + - name: Checkout Rasa Demo + uses: actions/checkout@v2 + with: + repository: RasaHQ/rasa-demo + path: rasa-demo + + - name: Use test configuration + run: | + cp .github/test_files/config_rasa_demo.yml rasa-demo/config.yml + + - id: action + name: Rasa Test & Train - Rasa Demo + uses: ./ + with: + rasa_version: '2.0.0-full' + configuration_name: 'Rasa Demo - NLU' + workspace: ${{ github.workspace }}/rasa-demo + test_type: nlu + github_token: ${{ secrets.GITHUB_TOKEN }} + test_args: '-f 2' + # Use cross validation + cross_validation: 'true' + # Compare results to the test report + compare_report: .github/test_files/report_to_compare.json + + # Test the GH action on Rasa Demo data + # https://github.com/RasaHQ/rasa-x-demo + # Test type: ALL + test_action_all: + runs-on: ubuntu-latest + name: Test GH action - Rasa Demo - ALL + + steps: + - uses: actions/checkout@v2 + + - name: Checkout Rasa Demo + uses: actions/checkout@v2 + with: + repository: RasaHQ/rasa-demo + path: rasa-demo + + - name: Use test configuration + run: | + cp .github/test_files/config_rasa_demo.yml rasa-demo/config.yml + + - id: action + name: Rasa Test & Train - Rasa Demo + uses: ./ + with: + rasa_version: '2.0.0-full' + configuration_name: 'Rasa Demo' + workspace: ${{ github.workspace }}/rasa-demo + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci_summary.yml b/.github/workflows/ci_summary.yml new file mode 100644 index 0000000..a773111 --- /dev/null +++ b/.github/workflows/ci_summary.yml @@ -0,0 +1,79 @@ +name: Continuous Integration - Test summary +on: + push: + branches: + - master + tags: + - '*' + pull_request: + +jobs: + test_action_1: + runs-on: ubuntu-latest + name: Test GH action - Rasa Demo - cfg 1 + + steps: + - uses: actions/checkout@v2 + + - name: Checkout Rasa Demo + uses: actions/checkout@v2 + with: + repository: RasaHQ/rasa-demo + path: rasa-demo + + - name: Use test configuration + run: | + cp .github/test_files/config_rasa_demo.yml rasa-demo/config.yml + + - id: action + name: Rasa Test & Train - Rasa Demo + uses: ./ + with: + rasa_version: '2.0.0-full' + configuration_name: 'Rasa Demo - Configuration 1' + workspace: ${{ github.workspace }}/rasa-demo + publish_summary: 'false' + + - uses: actions/upload-artifact@v2 + with: + name: rasa-demo-cfg-1 + path: rasa-demo/reports + + test_action_2: + runs-on: ubuntu-latest + name: Test GH action - Rasa Demo - cfg 2 + needs: [test_action_1] + + steps: + - uses: actions/checkout@v2 + + - name: Checkout Rasa Demo + uses: actions/checkout@v2 + with: + repository: RasaHQ/rasa-demo + path: rasa-demo + + - uses: actions/download-artifact@v2 + with: + name: rasa-demo-cfg-1 + path: rasa-demo/reports + + - name: Use test configuration + run: | + cp .github/test_files/config_rasa_demo.yml rasa-demo/config.yml + + - id: action + name: Rasa Test & Train - Rasa Demo + uses: ./ + with: + rasa_version: '2.0.0-full' + configuration_name: 'Rasa Demo - Configuration 2' + workspace: ${{ github.workspace }}/rasa-demo + github_token: ${{ secrets.GITHUB_TOKEN }} + + - uses: actions/upload-artifact@v2 + with: + name: rasa-demo-cfg-2 + path: | + rasa-demo/results + rasa-demo/reports diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..7140dbb --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,41 @@ +name: Release +on: + push: + tags: + - '*' + +jobs: + release: + name: Create Release + runs-on: ubuntu-latest + if: startsWith(github.event.ref, 'refs/tags') + steps: + - name: Checkout code + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Get release version + id: release_version + run: | + sed -e 's/_version_/'${GITHUB_REF#refs/tags/}'/g' package.json.tmp > package.json + echo "::set-output name=version::${GITHUB_REF#refs/tags/}" + + - name: Changelog + uses: scottbrenner/generate-changelog-action@1.0.1 + id: changelog + env: + REPO: ${{ github.repository }} + + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.release_version.outputs.version }} + release_name: ${{ steps.release_version.outputs.version }} + body: | + ${{ steps.changelog.outputs.changelog }} + draft: false + prerelease: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b3f8862 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +rasa-demo +results.json +report_master.json diff --git a/README.md b/README.md index db1649b..7fcaf39 100644 --- a/README.md +++ b/README.md @@ -1 +1,229 @@ -# rasa-train-test-gha \ No newline at end of file +# Rasa Train-Test Model GitHub Action + +The GitHub action to run easily `rasa train` and `rasa test` in the CIs. + +## Input Arguments + +In order to pass the input parameters to the GH action, you have to use the [`with`](https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#jobsjob_idstepswith) argument in a step that uses the GH action, e.g.: + +```yaml +jobs: + my_first_job: + name: My first job + runs-on: ubuntu-latest + steps: + # Checks-out GitHub repository + # more: https://github.com/actions/checkout + - uses: actions/checkout@v2 + + # Run rasa train and rasa test + - name: My first step + uses: RasaHQ/rasa-train-test-gha@main + with: + test_type: nlu +``` + +| Input | Description | Default | +| -------------------- | ----------------------------------------------------------------------------- | ------------------------- | +| `rasa_version` | The Rasa Open Source version used to run test and train | `latest-full` | +| `rasa_image` | Custom Rasa Docker image. Useful if you use Rasa with custom Python modules. | `none` | +| `rasa_train` | Run `rasa train` | `true` | +| `rasa_test` | Run `rasa test` | `true` | +| `data_validate` | Validates domain and data files to check for possible mistakes | `true` | +| `workspace` | The root directory containing your Rasa Open Source project | `${{ github.workspace }}` | +| `train_args` | Additional arguments passed to the `rasa train` command | `none` | +| `test_args` | Additional arguments passed to the `rasa test` command | `none` | +| `test_type` | The types of tests to run (available types: `core`/`nlu`/`all`) | `all` | +| `publish_summary` | Publish tests summary as a PR comment | `true` | +| `github_token` | GitHub Token - required to add a comment with summary | `none` | +| `configuration` | Model configuration file | `config.yml` | +| `model` | Path to a file with a model. Use existing model instead of training a new one | `none` | +| `cross_validation` | Switch on cross validation mode. Any provided model will be ignored | `false` | +| `configuration_name` | Configuration name used in summary. If not provided a file name is used | `none` | +| `data_name` | Data name used in summary. If not provided a directory name is used | `default` | +| `compare_report` | Path to a report that will be used to compare results | `none` | +| `result_directory` | Directory name where results are stored in | `results` | +| `report_directory` | Directory name where reports are stored in | `reports` | + +## Outputs + +The list of available output variables: + +| Output | Description | +| -------- | --------------------- | +| `report` | Return report as JSON | + +## Report + +The GH action generates two reports, a report with a summary of run tests, the report is available as JSON and CSV file. The example of a report generated by the GH action can be found [here](examples/). + +## Example Usage + +In the example below, we are using [the Rasa Demo data](https://github.com/RasaHQ/rasa-demo). + +```yaml +jobs: + train_and_test: + # ... + steps: + # ... + - name: Train and Test Rasa Demo + uses: RasaHQ/rasa-train-test-gha@main + with: + # List of available tags: https://hub.docker.com/r/rasa/rasa/tags + rasa_version: '2.0.0-full' + # In order to add a PR comment with summary + # a GH Token has to be pass to the GH action + github_token: ${{ secrets.GITHUB_TOKEN }} + # ... +``` + +The GitHub action by default adds a PR comment with summary (the summary comment can be disabled by setting the `publish_summary` input argument to `false`): + +![Summary](_img/img_1.png) + +### Use JSON report in GH workflow + +The GH action returns a JSON report as an output. The following example shows how to use the output in a GH workflow. + +```yaml +jobs: + train_and_test: + # ... + steps: + # ... + - name: Train and Test Rasa Demo + id: action + uses: RasaHQ/rasa-train-test-gha@main + with: + # List of available tags: https://hub.docker.com/r/rasa/rasa/tags + rasa_version: '2.0.0-full' + # In order to add a PR comment with summary + # a GH Token has to be pass to the GH action + github_token: ${{ secrets.GITHUB_TOKEN }} + + # We have to convert the output to JSON by using fromJSON built-in function + # more: https://docs.github.com/en/free-pro-team@latest/actions/reference/context-and-expression-syntax-for-github-actions#fromjson + # syntax: fromJSON(steps.action.outputs.report).[] + # example: fromJSON(steps.action.outputs.report).default['config.yml'] + - name: Check output + if: fromJSON(steps.action.outputs.report).default['config.yml'].intent_classification.accuracy >= 0.8 + run: | + echo "I'm doing extra work..." + echo ${{ fromJSON(steps.action.outputs.report).default['config.yml'].intent_classification.accuracy }} +``` + +## Advanced examples + +### Compare results with the base report + +It possible to compare results to the other report, the feature is useful for example to see that a model is better than before changes. The difference against the report that we compare to is included in brackets. + +```yaml +jobs: + train_and_test: + # ... + steps: + # ... + - name: Train and Test Rasa Demo + uses: RasaHQ/rasa-train-test-gha@main + with: + # List of available tags: https://hub.docker.com/r/rasa/rasa/tags + rasa_version: '2.0.0-full' + # In order to add a PR comment with summary + # a GH Token has to be pass to the GH action + github_token: ${{ secrets.GITHUB_TOKEN }} + # A path to the report that we want to compare to + compare_report: 'report_to_compare.json' + test_type: 'nlu' + # ... +``` + +![Compare](_img/img_3.png) + +### Use the existing model + +It's possible to use the existing model instead of training a new one. + +```yaml +jobs: + test: + # ... + steps: + # ... + - name: Download Rasa X model + run: | + wget https://github.com/RasaHQ/rasa-x-demo/blob/0.33.0/models/model.tar.gz?raw=true \ + -O test_model.tar.gz + + - name: Train and Test Rasa Demo + uses: RasaHQ/rasa-train-test-gha@main + with: + # List of available tags: https://hub.docker.com/r/rasa/rasa/tags + rasa_version: '2.0.0-full' + # In order to add a PR comment with summary + # a GH Token has to be pass to the GH action + github_token: ${{ secrets.GITHUB_TOKEN }} + # If a file with the model is provided, training is disabled automatically + model: test_model.tar.gz + # ... +``` + +### Cross-validate NLU model + +```yaml +jobs: + train_and_test: + # ... + steps: + # ... + - name: Train and Test Rasa Demo + uses: RasaHQ/rasa-train-test-gha@main + with: + # List of available tags: https://hub.docker.com/r/rasa/rasa/tags + rasa_version: '2.0.0-full' + # In order to add a PR comment with summary + # a GH Token has to be pass to the GH action + github_token: ${{ secrets.GITHUB_TOKEN }} + # Switch on cross validation mode. Any provided model will be ignored + cross_validation: 'true' + # Number of cross validation folds (cross validation only) + # All available arguments: rasa test nlu --help + test_args: '--folds 3' + test_type: 'nlu' + # ... +``` + +In a case where the cross-validation mode is enabled, a summary published as a PR comment includes `Intent Cross-Validation Results` and `Entity Cross-Validation Results`, e.g. + +![Intent Cross-Validation Results](_img/img_4.png) + +![Entity Cross-Validation Results](_img/img_5.png) + +### Upload results and reports as artifacts + +```yaml +jobs: + train_and_test: + # ... + steps: + # ... + - name: Train and Test Rasa Demo + id: action + uses: RasaHQ/rasa-train-test-gha@main + with: + # List of available tags: https://hub.docker.com/r/rasa/rasa/tags + rasa_version: '2.0.0-full' + # In order to add a PR comment with summary + # a GH Token has to be pass to the GH action + github_token: ${{ secrets.GITHUB_TOKEN }} + + # The 'actions/upload-artifact' action to upload files + # More: https://github.com/actions/upload-artifact + - uses: actions/upload-artifact@v2 + with: + name: rasa-demo-cfg + path: | + results + reports +``` diff --git a/_img/img_1.png b/_img/img_1.png new file mode 100644 index 0000000..567b146 Binary files /dev/null and b/_img/img_1.png differ diff --git a/_img/img_2.png b/_img/img_2.png new file mode 100644 index 0000000..d74535c Binary files /dev/null and b/_img/img_2.png differ diff --git a/_img/img_3.png b/_img/img_3.png new file mode 100644 index 0000000..e69d7f7 Binary files /dev/null and b/_img/img_3.png differ diff --git a/_img/img_4.png b/_img/img_4.png new file mode 100644 index 0000000..1d55a18 Binary files /dev/null and b/_img/img_4.png differ diff --git a/_img/img_5.png b/_img/img_5.png new file mode 100644 index 0000000..cc029ea Binary files /dev/null and b/_img/img_5.png differ diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..3484f93 --- /dev/null +++ b/action.yml @@ -0,0 +1,267 @@ +name: 'Rasa Train-Test Model GitHub Action' +description: 'The GitHub action to run the rasa train and rasa test commands' +inputs: + rasa_version: + description: 'The Rasa version used to run test and train' + required: true + default: 'latest-full' + rasa_image: + description: 'Custom Docker image' + required: false + default: '' + rasa_train: + description: 'Run rasa train' + required: true + default: 'true' + rasa_test: + description: 'Run rasa test' + required: true + default: 'true' + data_validate: + description: 'Validates domain and data files to check for possible mistakes' + required: true + default: 'true' + workspace: + description: 'The root directory' + required: false + default: '${{ github.workspace }}' + train_args: + description: 'Additional arguments passed to the rasa train command' + required: false + default: '' + test_args: + description: 'Additional arguments passed to the rasa test command' + required: false + default: '' + test_type: + description: 'The types of tests to run (available types: core/nlu/all)' + required: true + default: 'all' + publish_summary: + description: 'Publish tests summary as a PR comment' + required: false + default: 'true' + github_token: + description: 'GitHub Token - required to add a comment with summary' + required: false + default: '' + configuration: + description: 'Model configuration file' + required: true + default: 'config.yml' + model: + description: 'Path to a file with a model. Use existing model instead of training a new one' + required: false + default: '' + cross_validation: + description: 'Switch on cross validation mode. Any provided model will be ignored' + required: false + default: 'false' + configuration_name: + description: 'Configuration name used in summary. If not provided a file name is used' + required: false + default: '' + data_name: + description: 'Data name used in summary. If not provided a directory name is used' + required: false + default: 'default' + compare_report: + description: 'Path to a report that will be used to compare results' + required: false + default: '' + result_directory: + description: 'Directory name where results are stored in' + required: false + default: 'results' + report_directory: + description: 'Directory name where reports are stored in' + required: false + default: 'reports' +outputs: + report: + description: "Return report as JSON" + value: ${{ steps.report_output.outputs.report }} + +branding: + icon: 'layers' + color: 'green' + +runs: + using: "composite" + steps: + - name: Set environment + shell: bash + run: |- + mkdir -p ${{ inputs.workspace }}/models + chmod 777 ${{ inputs.workspace }}/models + mkdir -p ${{ inputs.workspace }}/${{ inputs.result_directory }} + chmod 777 ${{ inputs.workspace }}/${{ inputs.result_directory }} + + case "${{ inputs.test_type }}" in + all) + echo TEST_TYPE="" >> $GITHUB_ENV + ;; + nlu) + echo TEST_TYPE=nlu >> $GITHUB_ENV + echo DEFAULT_ARGS="--config /app/${{ inputs.configuration }}" >> $GITHUB_ENV + ;; + core) + echo TEST_TYPE=core >> $GITHUB_ENV + ;; + *) + echo "::error::Unknown test type: ${{ inputs.test_type }} (use: core/nlu/all)" + exit 1 + esac + + # Set a model path + if [[ -n "${{ inputs.model }}" ]];then + echo MODEL_ARGS="--model /app/${{ inputs.model }}" >> $GITHUB_ENV + fi + + # Set cross validation + if [[ "${{ inputs.cross_validation }}" == "true" ]];then + echo CROSS_VALIDATION_ARGS="--cross-validation" >> $GITHUB_ENV + fi + + # Set docker args + echo DOCKER_ARGS="--rm --tmpfs /.config -e MPLCONFIGDIR=/tmp/.mplconfigdir \ + -e CI=${CI} -e RASA_TELEMETRY_ENABLED=${{ env.RASA_TELEMETRY_ENABLED }} -v ${{ github.workspace }}:/app/github_workspace \ + -v ${{ inputs.workspace }}:/app -v ${{ inputs.workspace }}/models:/app/models \ + -v ${{ inputs.workspace }}/${{ inputs.result_directory }}:/app/results" >> $GITHUB_ENV + + # Set environment variables for generating a report + echo "SUMMARY_FILE=${{ inputs.workspace }}/reports/report.json" >> $GITHUB_ENV + + if [[ -n "${{ inputs.configuration_name }}" ]]; then + echo "CONFIG=${{ inputs.configuration_name }}" >> $GITHUB_ENV + else + echo "CONFIG=${{ inputs.configuration }}" >> $GITHUB_ENV + fi + + echo "DATASET_NAME=${{ inputs.data_name }}" >> $GITHUB_ENV + echo "RESULT_DIR=${{ inputs.workspace }}/${{ inputs.result_directory }}" >> $GITHUB_ENV + + - name: Set the Rasa version and image + shell: bash + run: | + # Set custom rasa image + if [[ -n "${{ inputs.rasa_image }}" ]]; then + echo "RASA_IMAGE=${{ inputs.rasa_image }}" >> $GITHUB_ENV + # Don't continue if a custom rasa image was used + exit 0 + fi + + DOCKERHUB_TAGS_URL="https://registry.hub.docker.com/v2/repositories/rasa/rasa/tags?page_size=10000" + # Get the latest version + if [[ "${{ inputs.rasa_version }}" == "latest-full" ]]; then + LATEST_RASA_VERSION=$(curl -s ${DOCKERHUB_TAGS_URL} | jq -r '.results[].name' | grep -E 'full' | grep -vE 'latest' | sort -Vr | head -n1) + RASA_VERSION=${LATEST_RASA_VERSION} + else + # Validate Rasa version + CHECK_VERSION=$(curl --silent ${DOCKERHUB_TAGS_URL} | jq -r '.results[] | select(.name=="${{ inputs.rasa_version }}") | .name') + if [[ "$CHECK_VERSION" != "${{ inputs.rasa_version }}" ]]; then + echo "::error::Rasa in ${{ inputs.rasa_version }} version doesn't exist. Check if the given Rasa version is valid, https://hub.docker.com/r/rasa/rasa/tags" && exit 1 + fi + RASA_VERSION=${{ inputs.rasa_version }} + fi + + echo "RASA_IMAGE=rasa/rasa:${RASA_VERSION}" >> $GITHUB_ENV + + - name: Data Validation + shell: bash + run: | + if [[ "${{ inputs.data_validate }}" == "true" ]]; then + echo "Run data validation" + echo + echo docker run ${{ env.DOCKER_ARGS }} ${{ env.RASA_IMAGE }} data validate + docker run ${{ env.DOCKER_ARGS }} ${{ env.RASA_IMAGE }} data validate + else + echo "::warning::Data validation is disabled. To turn on data validation set the data_validate parameter to 'true'." + fi + + - name: Run the rasa train command + shell: bash + run: | + if [[ -n "${{ inputs.model }}" ]]; then + echo + echo "Use model ${{ inputs.model }}. Skipping training" + echo + elif [[ "${{ inputs.rasa_train }}" == "true" ]]; then + echo "Run the rasa train" + echo "" + docker run ${{ env.DOCKER_ARGS }} ${{ env.RASA_IMAGE }} train ${{ env.TEST_TYPE }} ${{ env.DEFAULT_ARGS }} ${{ inputs.train_args }} + else + echo "::warning::rasa train is disabled. To turn on the rasa train set the rasa_train parameter to 'true'." + fi + + - name: Run the test command + shell: bash + run: | + if [[ "${{ inputs.rasa_test }}" == "true" ]]; then + echo "Run the rasa test" + echo "" + docker run ${{ env.DOCKER_ARGS }} ${{ env.RASA_IMAGE }} test ${{ env.TEST_TYPE }} ${{ env.MODEL_ARGS }} ${{ env.CROSS_VALIDATION_ARGS }} ${{ env.DEFAULT_ARGS }} ${{ inputs.test_args }} + else + echo "::warning::rasa test is disabled. To turn on the rasa train set the rasa_test parameter to 'true'." + fi + + - name: Generate a report + id: report_output + shell: bash + run: | + mkdir -p ${{ inputs.workspace }}/${{ inputs.report_directory }} + python3 ${{ github.action_path }}/scripts/generate_results_report.py + + # Copy a report from the current one if the report to compare to wasn't provided + cp ${{ inputs.workspace }}/${{ inputs.report_directory }}/report.json ${{ inputs.workspace }}/${{ inputs.report_directory }}/report_master.json + + if [[ -n "${{ inputs.compare_report }}" && -e "${{ inputs.compare_report }}" ]]; then + cp ${{ inputs.compare_report }} ${{ inputs.workspace }}/${{ inputs.report_directory }}/report_master.json + elif [[ -n "${{ inputs.compare_report }}" && ! -e "${{ inputs.compare_report }}" ]]; then + echo "::warning::The report ${{ inputs.compare_report }} to compare doesn't exist." + fi + + echo "::set-output name=report::$(docker run --rm -v ${{ github.action_path }}/templates:/templates \ + -v ${{ inputs.workspace }}/${{ inputs.report_directory }}/report.json:/report.json hairyhenderson/gomplate -d report=/report.json \ + -f /templates/report_to_json.tmpl)" + + # Convert JSON to CSV + python3 ${{ github.action_path }}/scripts/json_to_csv.py ${{ inputs.workspace }}/${{ inputs.report_directory }}/report.json ${{ inputs.workspace }}/${{ inputs.report_directory }}/report.csv + + - name: Publish summary as a comment in a PR + shell: bash + run: | + if [[ -z "${{ inputs.github_token }}" && "${{ inputs.publish_summary }}" == "true" && "${{ github.event_name }}" == "pull_request" ]]; then + echo "::warning::The GITHUB_TOKEN is required. Set the 'github_token' input parameter in order to publish summary." + elif [[ "${{ inputs.publish_summary }}" == "true" && "${{ github.event_name }}" == "pull_request" ]]; then + docker run --rm \ + -v ${{ github.action_path }}/templates:/templates \ + -v ${{ inputs.workspace }}/${{ inputs.report_directory }}:/reports hairyhenderson/gomplate -d data=/reports/report.json -d results_master=/reports/report_master.json \ + -f /templates/comment_summary.tmpl > comment_summary.md + + OUTPUT=$(cat comment_summary.md) + + if [[ "${{ inputs.cross_validation }}" == "true" ]]; then + pip3 install pytablewriter + OUTPUT="${OUTPUT}\n$(python3 ${{ github.action_path }}/scripts/cross_validation_results.py)" + fi + + OUTPUT="${OUTPUT//$'\n'/'\n'}" + + curl -X POST -s -H "Authorization: token ${{ inputs.github_token }}" -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.number }}/comments -d "{\"body\":\"Commit: ${{ github.sha }}\n$OUTPUT\"}" + + elif [[ "${{ github.event_name }}" != "pull_request" ]]; then + echo + echo "::warning::Publish Summary as a PR comment is available only for a pull request." + echo + else + echo + echo "Publish Summary as a PR comment is disabled. To enabled it set the 'publish_summary' input parameter to 'true'." + echo + fi + + - name: Clean + shell: bash + run: | + rm -rf ${{ inputs.workspace }}/${{ inputs.report_directory }}/report_master.json diff --git a/examples/report/report.csv b/examples/report/report.csv new file mode 100644 index 0000000..1b444f0 --- /dev/null +++ b/examples/report/report.csv @@ -0,0 +1,5 @@ +name,macro_avg__f1-score,macro_avg__precision,macro_avg__recall,macro_avg__support,micro_avg__f1-score,micro_avg__precision,micro_avg__recall,micro_avg__support,weighted_avg__f1-score,weighted_avg__precision,weighted_avg__recall,weighted_avg__support,accuracy +default/Rasa Demo - Configuration 1/entity_prediction,0.8439612155511906,0.8928145686878677,0.8505867162712345,1451,0.8802499132245749,0.8867132867132868,0.8738800827015851,1451,0.8795120459917696,0.8962881808799879,0.8738800827015851,1451,n/a +default/Rasa Demo - Configuration 1/intent_classification,0.39396343663157196,0.6152770784589695,0.3412664077499422,4950,0.8144018583042975,0.9579234972677596,0.7082828282828283,4950,0.7571805042281962,0.89504509788388,0.7082828282828283,4950,n/a +default/Rasa Demo - Configuration 1/response_selection,0.6128055852643406,0.6404843224661372,0.6527552082927716,1981,n/a,n/a,n/a,n/a,0.77835090522873,0.7812937731413816,0.8278647147905098,1981,0.8278647147905098 +default/Rasa Demo - Configuration 1/story_prediction,0.820486111111111,0.822530864197531,0.8363095238095238,189,n/a,n/a,n/a,n/a,0.915299823633157,0.9200470311581422,0.9206349206349206,189,0.9206349206349206 diff --git a/examples/report/report.json b/examples/report/report.json new file mode 100644 index 0000000..56a9f2f --- /dev/null +++ b/examples/report/report.json @@ -0,0 +1,76 @@ +{ + "default": { + "Rasa Demo - Configuration 1": { + "entity_prediction": { + "macro_avg": { + "f1-score": 0.8439612155511906, + "precision": 0.8928145686878677, + "recall": 0.8505867162712345, + "support": 1451 + }, + "micro_avg": { + "f1-score": 0.8802499132245749, + "precision": 0.8867132867132868, + "recall": 0.8738800827015851, + "support": 1451 + }, + "weighted_avg": { + "f1-score": 0.8795120459917696, + "precision": 0.8962881808799879, + "recall": 0.8738800827015851, + "support": 1451 + } + }, + "intent_classification": { + "macro_avg": { + "f1-score": 0.39396343663157196, + "precision": 0.6152770784589695, + "recall": 0.3412664077499422, + "support": 4950 + }, + "micro_avg": { + "f1-score": 0.8144018583042975, + "precision": 0.9579234972677596, + "recall": 0.7082828282828283, + "support": 4950 + }, + "weighted_avg": { + "f1-score": 0.7571805042281962, + "precision": 0.89504509788388, + "recall": 0.7082828282828283, + "support": 4950 + } + }, + "response_selection": { + "accuracy": 0.8278647147905098, + "macro_avg": { + "f1-score": 0.6128055852643406, + "precision": 0.6404843224661372, + "recall": 0.6527552082927716, + "support": 1981 + }, + "weighted_avg": { + "f1-score": 0.77835090522873, + "precision": 0.7812937731413816, + "recall": 0.8278647147905098, + "support": 1981 + } + }, + "story_prediction": { + "accuracy": 0.9206349206349206, + "macro_avg": { + "f1-score": 0.820486111111111, + "precision": 0.822530864197531, + "recall": 0.8363095238095238, + "support": 189 + }, + "weighted_avg": { + "f1-score": 0.915299823633157, + "precision": 0.9200470311581422, + "recall": 0.9206349206349206, + "support": 189 + } + } + } + } +} diff --git a/package.json.tmp b/package.json.tmp new file mode 100644 index 0000000..ac7f9da --- /dev/null +++ b/package.json.tmp @@ -0,0 +1,3 @@ +{ + "version": "_version_" +} diff --git a/scripts/cross_validation_results.py b/scripts/cross_validation_results.py new file mode 100644 index 0000000..fd95eaa --- /dev/null +++ b/scripts/cross_validation_results.py @@ -0,0 +1,70 @@ +from pytablewriter import MarkdownTableWriter +import json +import os + +result_dir = os.environ["RESULT_DIR"] + +def intent_table(): + writer = MarkdownTableWriter() + writer.table_name = "Intent Cross-Validation Results" + + with open(f"{result_dir}/intent_report.json", "r") as f: + data = json.loads(f.read()) + + cols = ["support", "f1-score", "confused_with"] + writer.headers = ["class"] + cols + + data.pop("accuracy", None) + classes = list(data.keys()) + + classes.sort(key=lambda x: data[x].get("support", 0), reverse=True) + + def format_cell(data, c, k): + if not data[c].get(k): + return "N/A" + if k == "confused_with": + return ", ".join([f"{k}({v})" for k, v in data[c][k].items()]) + else: + return data[c][k] + + writer.value_matrix = [ + [c] + [format_cell(data, c, k) for k in cols] for c in classes + ] + + return writer.dumps() + + +def entity_table(): + + writer = MarkdownTableWriter() + writer.table_name = "Entity Cross-Validation Results" + + with open(f"{result_dir}/DIETClassifier_report.json", "r") as f: + data = json.loads(f.read()) + + cols = ["support", "f1-score", "precision", "recall"] + writer.headers = ["entity"] + cols + + classes = list(data.keys()) + classes.sort(key=lambda x: data[x]["support"], reverse=True) + + def format_cell(data, c, k): + if not data[c].get(k): + return "N/A" + else: + return data[c][k] + + writer.value_matrix = [ + [c] + [format_cell(data, c, k) for k in cols] for c in classes + ] + + return writer.dumps() + + +intents = intent_table() +entities = entity_table() + + +print(intents) +print("\n\n\n") +print(entities) diff --git a/scripts/generate_results_report.py b/scripts/generate_results_report.py new file mode 100644 index 0000000..79d3bc7 --- /dev/null +++ b/scripts/generate_results_report.py @@ -0,0 +1,62 @@ +# Collect the results of the various model test runs +import json +import os + +SUMMARY_FILE = os.environ["SUMMARY_FILE"] +CONFIG = os.environ["CONFIG"] +DATASET = os.environ["DATASET_NAME"] +task_mapping = { + "story_report.json": "story_prediction", + "intent_report.json": "intent_classification", + "CRFEntityExtractor_report.json": "entity_prediction", + "DIETClassifier_report.json": "entity_prediction", + "response_selection_report.json": "response_selection", +} + + +def generate_json(file, task, data): + if not DATASET in data: + data = {DATASET: {CONFIG: {}}, **data} + elif not CONFIG in data[DATASET]: + data[DATASET] = {CONFIG: {}, **data[DATASET]} + + data[DATASET][CONFIG] = { + **data[DATASET][CONFIG], + } + + data[DATASET][CONFIG][task] = {**read_results(file)} + + return data + + +def read_results(file): + with open(file) as json_file: + data = json.load(json_file) + + keys = ["accuracy", "weighted avg", "macro avg", "micro avg"] + key_mapping = { + "weighted avg": "weighted_avg", + "macro avg": "macro_avg", + "micro avg": "micro_avg", + "accuracy": "accuracy" + } + result = {key_mapping[key]: data[key] for key in keys if key in data} + + return result + + +if __name__ == "__main__": + data = {} + if os.path.exists(SUMMARY_FILE): + with open(SUMMARY_FILE) as json_file: + data = json.load(json_file) + + for dirpath, dirnames, files in os.walk(os.environ["RESULT_DIR"]): + for f in files: + if f not in task_mapping.keys(): + continue + + data = generate_json(os.path.join(dirpath, f), task_mapping[f], data) + + with open(SUMMARY_FILE, "w") as f: + json.dump(data, f, sort_keys=True, indent=2) diff --git a/scripts/json_to_csv.py b/scripts/json_to_csv.py new file mode 100644 index 0000000..e0fdedd --- /dev/null +++ b/scripts/json_to_csv.py @@ -0,0 +1,55 @@ +import csv, json, sys +from pathlib import Path + +def flattenjson(b, delim): + val = {} + for i in b.keys(): + if isinstance( b[i], dict ): + get = flattenjson( b[i], delim ) + for j in get.keys(): + val[ i + delim + j ] = get[j] + else: + val[i] = b[i] + + return val + + +header = [ + "macro_avg", + "micro_avg", + "weighted_avg", +] + +if sys.argv[1] is not None and sys.argv[2] is not None: + fileInput = sys.argv[1] + fileOutput = sys.argv[2] + inputFile = Path(fileInput) + + dataJSON = json.loads(inputFile.read_text()) + data = flattenjson(dataJSON, "__") + + with open(fileOutput, "w") as file: + csv_file = csv.writer(file) + + header_names = [] + for header_name in header: + for field in ["f1-score", "precision", "recall", "support"]: + header_names.append(header_name+"__"+field) + header_names.append("accuracy") + + csv_file.writerow(["name"] + header_names) + + for data_name in dataJSON: + for configuration in dataJSON[data_name]: + for row in dataJSON[data_name][configuration]: + data = dataJSON[data_name][configuration] + row_data = flattenjson(data[row], "__") + + result = [f"{data_name}/{configuration}/{row}"] + for field in header_names: + try: + result.append(row_data[field]) + except KeyError: + result.append("n/a") + + csv_file.writerow(result) diff --git a/templates/comment_summary.tmpl b/templates/comment_summary.tmpl new file mode 100644 index 0000000..2468042 --- /dev/null +++ b/templates/comment_summary.tmpl @@ -0,0 +1,91 @@ +{{- /* + +The template reads a file with a report (the report file is available +as an artifact in the model regression tests workflow) and returns +a markdown table with a summary of the tests. + +*/ -}} +{{- /* + +The accuracy_tpl template returns data for the accuracy field. + +*/ -}} +{{ define "accuracy_tpl" -}} +{{- if has .master "accuracy" -}} +{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" (.master.accuracy | math.Sub .branch.accuracy) }}) +{{- else -}} +{{ printf "%.4f" .branch.accuracy }} (`no data`) +{{- end -}} +{{- end -}} +{{- /* + +The micro_avg_tpl template returns data for the "micro avg" field. + +*/ -}} +{{ define "micro_avg_tpl" -}} +{{- if has .master.micro_avg "f1-score" -}} +{{ printf "%.4f" (index .branch.micro_avg "f1-score") }} ({{ printf "%.2f" ((index .master.micro_avg "f1-score") | math.Sub (index .branch.micro_avg "f1-score")) }}) +{{- else -}} +{{ printf "%.4f" (index .branch.micro_avg "f1-score") }} (`no data`) +{{- end -}} +{{- end -}} +{{- /* + +Render Markdown with results. + +*/ -}} +{{- $results_master := (datasource "results_master") -}} +{{ range $dataset, $config := (datasource "data")}} +{{- $dataset_master := (index $results_master $dataset) -}} +Data: `{{$dataset}}` + +| Configuration | Intent Classification Micro F1 | Entity Recognition Micro F1 | Response Selection Micro F1 | Story Recognition Micro F1 | +|---------------|-----------------|-----------------|-------------------|-------------------| +{{ range $config_name, $config_data := $config -}} +| `{{ $config_name }}` | +{{- if has $config_data "intent_classification" -}} +{{- /* Use results from branch if there is lack of data doesn't exist in results from scheduled version */ -}} +{{- $intent_class_master := $config_data.intent_classification -}} +{{- if has $dataset_master $config_name -}} +{{- $intent_class_master = (index $dataset_master $config_name).intent_classification -}} +{{- end -}} +{{- $intent_class := index $config_data.intent_classification -}} +{{- if has (index $intent_class "micro_avg") "f1-score" -}}{{ template "micro_avg_tpl" (dict "branch" $intent_class "master" $intent_class_master) }}{{- else if has $intent_class "accuracy" -}}{{ template "accuracy_tpl" (dict "branch" $intent_class "master" $intent_class_master) }}{{- else -}}`no data`{{- end -}} | +{{- else -}} +`no data`| +{{- end -}} +{{- if has $config_data "entity_prediction" -}} +{{- /* Use results from branch if there is lack of data doesn't exist in results from scheduled version */ -}} +{{- $entity_class_master := index $config_data.entity_prediction -}} +{{- if has $dataset_master $config_name -}} +{{- $entity_class_master = (index $dataset_master $config_name).entity_prediction -}} +{{- end -}} +{{- $entity_class := $config_data.entity_prediction -}} +{{- if has (index $entity_class "micro_avg") "f1-score" -}}{{ template "micro_avg_tpl" (dict "branch" $entity_class "master" $entity_class_master) }}{{- else if has $entity_class "accuracy" -}}{{ template "accuracy_tpl" (dict "branch" $entity_class "master" $entity_class_master) }}{{- else -}}`no data`{{- end -}} | +{{- else -}} +`no data`| +{{- end -}} +{{- if has $config_data "response_selection" -}} +{{- /* Use results from branch if there is lack of data doesn't exist in results from scheduled version */ -}} +{{- $response_class_master := index $config_data.response_selection -}} +{{- if has $dataset_master $config_name -}} +{{- $response_class_master = (index $dataset_master $config_name).response_selection -}} +{{- end -}} +{{- $response_class := $config_data.response_selection -}} +{{- if has (index $response_class "micro_avg") "f1-score" -}}{{ template "micro_avg_tpl" (dict "branch" $response_class "master" $response_class_master) }}{{- else if has $response_class "accuracy" -}}{{ template "accuracy_tpl" (dict "branch" $response_class "master" $response_class_master) }}{{- else -}}`no data`{{- end -}} | +{{- else -}} +`no data`| +{{- end -}} +{{- if has $config_data "story_prediction" -}} +{{- /* Use results from branch if there is lack of data doesn't exist in results from scheduled version */ -}} +{{- $story_class_master := index $config_data.story_prediction -}} +{{- if has $dataset_master $config_name -}} +{{- $story_class_master = (index $dataset_master $config_name).story_prediction -}} +{{- end -}} +{{- $story_class := $config_data.story_prediction -}} +{{- if has (index $story_class "micro_avg") "f1-score" -}}{{ template "micro_avg_tpl" (dict "branch" $story_class "master" $story_class_master) }}{{- else if has $story_class "accuracy" -}}{{ template "accuracy_tpl" (dict "branch" $story_class "master" $story_class_master) }}{{- else -}}`no data`{{- end -}} | +{{- else -}} +`no data`| +{{- end }} +{{end}} +{{end}} diff --git a/templates/report_to_json.tmpl b/templates/report_to_json.tmpl new file mode 100644 index 0000000..c63078d --- /dev/null +++ b/templates/report_to_json.tmpl @@ -0,0 +1 @@ +{{- datasource "report" | toJSON -}}