diff --git a/.gitignore b/.gitignore
index 16fdfb48..f9c9edb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,7 +25,7 @@ models/.DS_Store
 src/
 workdir/
 *save*/
-eval_dir/*
+eval_dir*/*
 genieNLP-tests*
 
 lightning_logs/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 09326ab1..227e5571 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v4.1.0
     hooks:
       - id: check-yaml
       - id: end-of-file-fixer
@@ -8,16 +8,16 @@ repos:
       - id: trailing-whitespace
         exclude: ^(tests/dataset/|tests/database/|tests/expected_results/)
 -   repo: https://github.com/hadialqattan/pycln
-    rev: v1.0.3
+    rev: v1.2.1
     hooks:
       - id: pycln
         args: [--config=pyproject.toml]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.9.3
+    rev: 5.10.1
     hooks:
       - id: isort
 -   repo: https://github.com/psf/black
-    rev: 21.9b0
+    rev: 22.1.0
     hooks:
       - id: black
 -   repo: https://gitlab.com/pycqa/flake8
diff --git a/.travis.yml b/.travis.yml
index cbe7031e..9a6d55f7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,11 @@ jobs:
       stage: test
       script:
         - bash ./tests/test_translation.sh
+    -
+      name: "E2E Dialogues tests"
+      stage: test
+      script:
+        - bash ./tests/test_e2e_dialogues.sh
     -
       name: "NED tests"
       stage: test
diff --git a/genienlp/arguments.py b/genienlp/arguments.py
index ef810b77..9ba7c9c1 100644
--- a/genienlp/arguments.py
+++ b/genienlp/arguments.py
@@ -183,6 +183,7 @@ def parse_argv(parser):
         default=0.1,
         help='multiplicative constant choosing the weight of encoder_loss in total loss',
     )
+    parser.add_argument('--train_set_name', type=str, default='train', help='Training dataset name to use during training')
     parser.add_argument('--eval_set_name', type=str, help='Evaluation dataset name to use during training')
 
     parser.add_argument('--max_output_length', default=150, type=int, help='maximum output length for generation')
@@ -544,6 +545,33 @@ def parse_argv(parser):
         help='Debugging flag for hf datasets where validation will be performed on train set',
     )
 
+    parser.add_argument(
+        '--e2e_dialogue_evaluation',
+        action='store_true',
+        help='Evaluate model on a dialogue dataset end-to-end; i.e. model predictions are used as input instead of gold',
+    )
+    parser.add_argument(
+        '--e2e_dialogue_valid_subtasks',
+        nargs='+',
+        type=str,
+        default=['dst', 'api', 'da'],
+        help='Evaluate only on these subtasks when calculating e2e_dialogue_score; rg is not included by default',
+    )
+    parser.add_argument(
+        '--e2e_dialogue_valid_submetrics',
+        nargs='+',
+        type=str,
+        default=['jga', 'em', 'em'],
+        help='Specify metrics to use for each of subtasks in e2e_dialogue_valid_subtasks.',
+    )
+    parser.add_argument(
+        '--e2e_dialogue_valid_subweights',
+        nargs='+',
+        type=float,
+        default=[1.0, 1.0, 1.0],
+        help='Specify weights to use for each of subtasks in e2e_dialogue_valid_subtasks.',
+    )
+
 
 def check_and_update_generation_args(args):
     """
@@ -632,6 +660,20 @@ def post_parse_general(args):
 
 
 def post_parse_train_specific(args):
+    if args.e2e_dialogue_evaluation and args.val_batch_size[0] != 1:
+        logger.warning('When evaluating bitod end2end val_batch_size should be 1 so we load data turn by turn')
+        args.val_batch_size = [1]
+
+    if len(args.e2e_dialogue_valid_subtasks) != len(args.e2e_dialogue_valid_submetrics):
+        raise ValueError(
+            'Length of e2e_dialogue_valid_subtasks and e2e_dialogue_valid_submetrics arguments should be equal (i.e. one metric per subtask)'
+        )
+
+    if len(args.e2e_dialogue_valid_subtasks) != len(args.e2e_dialogue_valid_subweights):
+        raise ValueError(
+            'Length of e2e_dialogue_valid_subtasks and e2e_dialogue_valid_subweights arguments should be equal (i.e. one weight per subtask)'
+        )
+
     if len(args.val_batch_size) < len(args.val_task_names):
         args.val_batch_size = len(args.val_task_names) * args.val_batch_size
 
diff --git a/genienlp/data_utils/numericalizer.py b/genienlp/data_utils/numericalizer.py
index c4337968..4f4eac71 100644
--- a/genienlp/data_utils/numericalizer.py
+++ b/genienlp/data_utils/numericalizer.py
@@ -278,10 +278,6 @@ def build_vocab(self, vocab_sets, tasks):
         if self.args.add_entities_to_text != 'off':
             self._tokenizer.add_tokens(['<e>', '</e>'])
 
-        # add special tokens for ambig_qa task
-        if any(task.name == 'ambig_qa' for task in tasks):
-            self._tokenizer.add_tokens(['<q>', '<p>', '<u>'])
-
         existing_special_tokens = self._tokenizer.special_tokens_map
         # add separator if it doesn't exist. It will be used to concatenate context and question
         if 'sep_token' not in existing_special_tokens:
diff --git a/genienlp/metrics.py b/genienlp/metrics.py
index 06e2f1de..aec0463e 100644
--- a/genienlp/metrics.py
+++ b/genienlp/metrics.py
@@ -27,11 +27,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import collections
+import logging
 import os
 import re
 import string
-import logging
+from collections import Counter, OrderedDict, defaultdict
 from contextlib import closing
 from multiprocessing import Pool, cpu_count
 from subprocess import PIPE, Popen
@@ -40,6 +40,8 @@
 import numpy as np
 import sacrebleu
 from datasets import load_metric
+from dialogues import Bitod
+from dialogues.bitod.src.evaluate import convert_lists_to_set
 from pyrouge import Rouge155
 from seqeval import metrics as seq_metrics
 from seqeval import scheme as seq_scheme
@@ -51,7 +53,8 @@
 
 # metrics that are calculated over a corpus (i.e. a list of predictions and gold answers, not single ones).
 # These metrics cannot be calculated on individual examples and then averaged.
-corpus_level_metrics = set(['bleu', 'casedbleu', 'ter', 't5_bleu', 'nmt_bleu', 'corpus_f1'])
+corpus_level_metrics = {'bleu', 'casedbleu', 'ter', 't5_bleu', 'nmt_bleu', 'corpus_f1', 'jga'}
+
 
 def to_lf(s, table):
     aggs = [y.lower() for y in Query.agg_ops]
@@ -218,7 +221,7 @@ def lower(text):
 def f1_score(prediction, ground_truth):
     prediction_tokens = prediction.split()
     ground_truth_tokens = ground_truth.split()
-    common = collections.Counter(prediction_tokens) & collections.Counter(ground_truth_tokens)
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
     num_same = sum(common.values())
     if num_same == 0:
         return 0
@@ -519,7 +522,81 @@ def computeDialogue(greedy, answer):
     return joint_goal_em, turn_request_em, turn_goal_em, answer
 
 
-def compute_metrics(predictions: Iterable[str], answers: Union[Iterable[str], Iterable[Iterable[str]]], requested_metrics: Iterable, lang: str):
+def compute_e2e_dialogue_score(greedy, answer, tgt_lang, args, example_ids):
+    num_examples = len(answer)
+    subtask_metrics_dict = OrderedDict()
+
+    results = OrderedDict({'e2e_dialogue_score': 0.0, 'JGA': 0.0, 'API_em': 0.0, 'DA_em': 0.0, 'BLEU': 0.0})
+    subtask2result_key = OrderedDict({'dst': 'JGA', 'api': 'API_em', 'da': 'DA_em', 'rg': 'BLEU'})
+
+    for k, subtask in enumerate(args.e2e_dialogue_valid_subtasks):
+        ids, preds, golds = [], [], []
+        for i in range(num_examples):
+            id_ = example_ids[i]
+            if id_.endswith(f'/{subtask}'):
+                ids.append(id_)
+                preds.append(greedy[i])
+                golds.append(answer[i])
+
+        if golds:
+            metrics_to_compute = args.e2e_dialogue_valid_submetrics[k]
+            sub_metrics = compute_metrics(preds, golds, [metrics_to_compute], tgt_lang, args, ids)
+            subtask_metrics_dict[subtask] = (
+                sub_metrics[metrics_to_compute],
+                len(golds),
+                args.e2e_dialogue_valid_subweights[k],
+            )
+
+    # TODO  how should we aggregate?
+    weighted_num_examples = 0
+    for subtask, (sub_result, num_ex, weight) in subtask_metrics_dict.items():
+        result_key = subtask2result_key[subtask]
+
+        results[result_key] += sub_result
+        results['e2e_dialogue_score'] += weight * (sub_result * num_ex)
+        weighted_num_examples += weight * num_ex
+
+    results['e2e_dialogue_score'] /= weighted_num_examples
+
+    return results
+
+
+def computeJGA(greedy, answer, example_ids):
+    dataset = Bitod()
+    hit = 0
+    cur_dial_id = None
+    assert len(example_ids) == len(greedy) == len(answer)
+    for id_, g, a in zip(example_ids, greedy, answer):
+        dial_id = id_.split('/')[1]
+        if dial_id != cur_dial_id:
+            cur_dial_id = dial_id
+            greedy_state = defaultdict()
+            answer_state = defaultdict()
+
+        a = a[0]
+        a = dataset.span2state(a)
+        g = dataset.span2state(g)
+
+        dataset.update_state(a, answer_state)
+        dataset.update_state(g, greedy_state)
+
+        convert_lists_to_set(answer_state)
+        convert_lists_to_set(greedy_state)
+
+        if answer_state == greedy_state:
+            hit += 1
+
+    return hit / len(greedy) * 100
+
+
+def compute_metrics(
+    predictions: Iterable[str],
+    answers: Union[Iterable[str], Iterable[Iterable[str]]],
+    requested_metrics: Iterable,
+    lang: str,
+    args,
+    example_ids: Iterable[str] = None,
+):
     """
     Inputs:
         predictions: a list of model predictions
@@ -536,11 +613,22 @@ def compute_metrics(predictions: Iterable[str], answers: Union[Iterable[str], It
             lfem
             joint_goal_em, turn_request_em, turn_goal_em, avg_dialogue
         lang: the language of the predictions and answers. Used for BERTScore.
+        args: arguments
+        example_ids: used to calculate some of e2e dialogue metrics that need to know span of each dialogue such as JGA
     """
     metric_keys = []
     metric_values = []
     if not isinstance(answers[0], list):
         answers = [[a] for a in answers]
+    if 'e2e_dialogue_score' in requested_metrics:
+        requested_metrics += ['JGA', 'API_em', 'DA_em', 'BLEU']
+        results = compute_e2e_dialogue_score(predictions, answers, lang, args, example_ids)
+        metric_keys += results.keys()
+        metric_values += results.values()
+    if 'jga' in requested_metrics:
+        jga = computeJGA(predictions, answers, example_ids)
+        metric_keys += ['jga']
+        metric_values += [jga]
     if 'lfem' in requested_metrics:
         lfem, answers = computeLFEM(predictions, answers)
         metric_keys += ['lfem']
@@ -550,9 +638,10 @@ def compute_metrics(predictions: Iterable[str], answers: Union[Iterable[str], It
         avg_dialogue = (joint_goal_em + request_em) / 2
         metric_keys += ['joint_goal_em', 'turn_request_em', 'turn_goal_em', 'avg_dialogue']
         metric_values += [joint_goal_em, request_em, turn_goal_em, avg_dialogue]
-    em = computeEM(predictions, answers)
-    metric_keys += ['em']
-    metric_values += [em]
+    if 'em' in requested_metrics:
+        em = computeEM(predictions, answers)
+        metric_keys += ['em']
+        metric_values += [em]
     if 'pem' in requested_metrics:
         pem = computePartialEM(predictions, answers)
         metric_keys.append('pem')
@@ -621,7 +710,8 @@ def convert_IOB2_to_IOB1(labels):
         convert_IOB2_to_IOB1(predictions_processed)
         convert_IOB2_to_IOB1(answers_processed)
         f1 = (
-            seq_metrics.f1_score(y_pred=predictions_processed, y_true=answers_processed, mode='strict', scheme=seq_scheme.IOB1) * 100
+            seq_metrics.f1_score(y_pred=predictions_processed, y_true=answers_processed, mode='strict', scheme=seq_scheme.IOB1)
+            * 100
         )
 
         metric_keys.append('ner_f1_IOB1')
@@ -653,26 +743,35 @@ def convert_IOB2_to_IOB1(labels):
         metric_values += [corpus_f1, precision, recall]
 
     metric_dict = dict(zip(metric_keys, metric_values))
-    metric_dict = collections.OrderedDict((key, metric_dict[key]) for key in requested_metrics)
+    metric_dict = OrderedDict((key, metric_dict[key]) for key in requested_metrics)
     return metric_dict
 
 
-def calculate_and_reduce_metrics(predictions, answers, metrics_to_compute, reduce_metrics, lang):
-    metrics = collections.OrderedDict()
-    if reduce_metrics == 'max':
-        for i in range(len(predictions[0])): # for each output (in case of mulitple outputs)
-            partial_metrics = compute_metrics([p[i] for p in predictions], answers, metrics_to_compute, lang) # calculate the metric on all first outputs, all second outputs, etc.
+def calculate_and_reduce_metrics(generation_output, metrics_to_compute, args, lang):
+    metrics = OrderedDict()
+    example_ids = generation_output.example_ids
+    predictions = generation_output.predictions
+    answers = generation_output.answers
+
+    if args.reduce_metrics == 'max':
+        for i in range(len(predictions[0])):  # for each output (in case of multiple outputs)
+            partial_metrics = compute_metrics(
+                [p[i] for p in predictions], answers, metrics_to_compute, lang, args, example_ids
+            )  # calculate the metric on all first outputs, all second outputs, etc.
             for k, v in partial_metrics.items():
                 metrics[k] = max(metrics.get(k, 0), v)
-    elif reduce_metrics == 'top_k':
+    elif args.reduce_metrics == 'top_k':
         for m in metrics_to_compute:
             if m in corpus_level_metrics:
-                logging.warning('You are using the corpus-level metric %s with `--reduce_metrics top_k`, which can lead to incorrect results.', m)
-        
-        for i in range(len(predictions)): # for each input
-            example_metrics = collections.OrderedDict() # keep track of metrics for one input and all of its outputs
-            for j in range(len(predictions[i])): # for each output (in case of mulitple outputs)
-                partial_metrics = compute_metrics([predictions[i][j]], [answers[i]], metrics_to_compute, lang) # calculate the metric on the j-th output of the i-th input
+                logging.warning(
+                    f'You are using the corpus-level metric {m} with `--reduce_metrics top_k`, which can lead to incorrect results.',
+                )
+        for i in range(len(predictions)):  # for each input
+            example_metrics = OrderedDict()  # keep track of metrics for one input and all of its outputs
+            for j in range(len(predictions[i])):  # for each output (in case of multiple outputs)
+                partial_metrics = compute_metrics(
+                    [predictions[i][j]], [answers[i]], metrics_to_compute, lang, args, example_ids
+                )  # calculate the metric on the j-th output of the i-th input
                 for k, v in partial_metrics.items():
                     example_metrics[k] = max(example_metrics.get(k, 0), v)
             # sum metrics for all examples
diff --git a/genienlp/paraphrase/scripts/transform_dataset.py b/genienlp/paraphrase/scripts/transform_dataset.py
index 9b2df661..2dd53ef6 100644
--- a/genienlp/paraphrase/scripts/transform_dataset.py
+++ b/genienlp/paraphrase/scripts/transform_dataset.py
@@ -144,7 +144,7 @@ def main(args):
             new_queries = []  # list of lists
             query_file = open(args.query_file, 'r')
             for line in query_file:
-                queries = line.split('\t')[1:-1]  # 0 is example id, -1 is gold answer
+                queries = line.split('\t')[1:-2]  # 0 is example id, -1 is input, -2 is gold answer
                 new_queries.append([lower_case(tokenize(q.strip())) for q in queries])
         if args.transformation in ['remove_wrong_thingtalk', 'get_wrong_thingtalk']:
             gold_thingtalks = []
diff --git a/genienlp/predict.py b/genienlp/predict.py
index 6fbe51fa..c92a8213 100644
--- a/genienlp/predict.py
+++ b/genienlp/predict.py
@@ -38,18 +38,18 @@
 
 from torch.multiprocessing import Process, set_start_method
 
-from .metrics import calculate_and_reduce_metrics
-
 try:
     set_start_method('spawn')
 except RuntimeError:
     pass
 
+
 import torch
 
 from . import models
 from .arguments import check_and_update_generation_args
 from .calibrate import ConfidenceEstimator
+from .metrics import calculate_and_reduce_metrics
 from .ned.ned_utils import init_ned_model
 from .tasks.registry import get_tasks
 from .util import (
@@ -254,6 +254,30 @@ def parse_argv(parser):
         help='do not preserve quotation marks in the output. Useful if using alignment for semantic parsing or NLG',
     )
 
+    parser.add_argument(
+        '--e2e_dialogue_evaluation',
+        action='store_true',
+        help='Evaluate model on a dialogue dataset end-to-end; i.e. model predictions are used as input instead of gold',
+    )
+    parser.add_argument(
+        '--e2e_dialogue_valid_subtasks',
+        nargs='+',
+        type=str,
+        help='Evaluate only on these subtasks when calculating e2e_dialogue_score; rg is not included by default',
+    )
+    parser.add_argument(
+        '--e2e_dialogue_valid_submetrics',
+        nargs='+',
+        type=str,
+        help='Specify metrics to use for each of subtasks in e2e_dialogue_valid_subtasks.',
+    )
+    parser.add_argument(
+        '--e2e_dialogue_valid_subweights',
+        nargs='+',
+        type=float,
+        help='Specify weights to use for each of subtasks in e2e_dialogue_valid_subtasks.',
+    )
+
 
 def set_default_values(args):
     """
@@ -262,6 +286,10 @@ def set_default_values(args):
     if args.confidence_feature_path is None:
         args.confidence_feature_path = os.path.join(args.path, 'confidence_features.pkl')
 
+    if args.e2e_dialogue_evaluation and args.val_batch_size[0] != 1:
+        logger.warning('When evaluating bitod end-to-end, val_batch_size should be 1 so we load the data turn by turn')
+        args.val_batch_size = [1]
+
 
 def check_args(args):
 
@@ -412,9 +440,8 @@ def run(args, device):
     log_model_size(logger, model, args.model)
     model.to(device)
 
-    decaScore = []
-    task_scores = defaultdict(list)
     model.eval()
+    task_scores = defaultdict(list)
 
     eval_dir = os.path.join(args.eval_dir, args.evaluate)
     os.makedirs(eval_dir, exist_ok=True)
@@ -459,6 +486,7 @@ def run(args, device):
                     output_confidence_features=args.save_confidence_features,
                     confidence_estimators=confidence_estimators,
                     disable_progbar=False,
+                    eval_dir=eval_dir,
                 )
 
             if args.save_confidence_features:
@@ -469,54 +497,76 @@ def run(args, device):
             with open(prediction_file_name, 'w' + ('' if args.overwrite else '+')) as prediction_file:
                 for i in range(len(generation_output.example_ids)):
                     if args.one_output_per_line:
-                        lines = [(
-                            generation_output.example_ids[i]
-                            + '\t'
-                            + prediction
-                            + '\t'
-                            + generation_output.answers[i]
-                        ) for prediction in generation_output.predictions[i]] # one line per generation output
+                        lines = [
+                            (
+                                generation_output.example_ids[i]
+                                + '\t'
+                                + prediction
+                                + '\t'
+                                + generation_output.answers[i]
+                                + '\t'
+                                + generation_output.contexts[i]
+                            )
+                            for prediction in generation_output.predictions[i]
+                        ]  # one line per generation output
                     else:
-                        lines = [(
-                            generation_output.example_ids[i]
-                            + '\t'
-                            + '\t'.join(generation_output.predictions[i])
-                            + '\t'
-                            + generation_output.answers[i]
-                        )]  # one line with all generation outputs separated by '\t'
+                        lines = [
+                            (
+                                generation_output.example_ids[i]
+                                + '\t'
+                                + '\t'.join(generation_output.predictions[i])
+                                + '\t'
+                                + generation_output.answers[i]
+                                + '\t'
+                                + generation_output.contexts[i]
+                            )
+                        ]  # one line with all generation outputs separated by '\t'
                     if args.calibrator_paths is not None:
                         for score in generation_output.confidence_scores:
-                            lines = [line + '\t' + str(score[i]) for line in lines] # append score to all lines
+                            lines = [line + '\t' + str(score[i]) for line in lines]  # append score to all lines
                     prediction_file.write('\n'.join(lines) + '\n')
 
             if args.translate_return_raw_outputs:
                 with open(raw_prediction_file_name, 'w' + ('' if args.overwrite else '+')) as prediction_file:
                     for i in range(len(generation_output.example_ids)):
                         if args.one_output_per_line:
-                            lines = [(
-                                generation_output.example_ids[i]
-                                + '\t'
-                                + raw_prediction
-                                + '\t'
-                                + generation_output.answers[i]
-                            ) for raw_prediction in generation_output.raw_predictions[i]] # one line per generation output
+                            lines = [
+                                (
+                                    generation_output.example_ids[i]
+                                    + '\t'
+                                    + raw_prediction
+                                    + '\t'
+                                    + generation_output.answers[i]
+                                    + '\t'
+                                    + generation_output.contexts[i]
+                                )
+                                for raw_prediction in generation_output.raw_predictions[i]
+                            ]  # one line per generation output
                         else:
-                            lines = [(
-                                generation_output.example_ids[i]
-                                + '\t'
-                                + '\t'.join(generation_output.raw_predictions[i])
-                                + '\t'
-                                + generation_output.answers[i]
-                            )]  # one line with all outputs separated by '\t'
+                            lines = [
+                                (
+                                    generation_output.example_ids[i]
+                                    + '\t'
+                                    + '\t'.join(generation_output.raw_predictions[i])
+                                    + '\t'
+                                    + generation_output.answers[i]
+                                    + '\t'
+                                    + generation_output.contexts[i]
+                                )
+                            ]  # one line with all outputs separated by '\t'
                         prediction_file.write('\n'.join(lines) + '\n')
 
             if len(generation_output.answers) > 0:
                 metrics_to_compute = task.metrics
                 metrics_to_compute += args.extra_metrics
+                metrics_to_compute = [metric for metric in task.metrics if metric not in ['loss']]
                 if args.main_metric_only:
                     metrics_to_compute = [metrics_to_compute[0]]
                 metrics = calculate_and_reduce_metrics(
-                    generation_output.predictions, generation_output.answers, metrics_to_compute, args.reduce_metrics, tgt_lang
+                    generation_output,
+                    metrics_to_compute,
+                    args,
+                    tgt_lang,
                 )
 
                 with open(results_file_name, 'w' + ('' if args.overwrite else '+')) as results_file:
@@ -526,17 +576,21 @@ def run(args, device):
                     for i, (c, p, a) in enumerate(
                         zip(generation_output.contexts, generation_output.predictions, generation_output.answers)
                     ):
-                        log_string = f'\nContext {i+1}: {c}\nPrediction {i + 1} ({len(p)} outputs): {p}\nAnswer {i + 1}: {a}\n'
+                        log_string = (
+                            f'\nContext {i + 1}: {c}\nPrediction {i + 1} ({len(p)} outputs): {p}\nAnswer {i + 1}: {a}\n'
+                        )
                         if args.calibrator_paths is not None:
-                            log_string += f'Confidence {i+1} : '
+                            log_string += f'Confidence {i + 1} : '
                             for score in generation_output.confidence_scores:
                                 log_string += f'{score[i]:.3f}, '
                             log_string += '\n'
                         logger.info(log_string)
-                    logger.info(metrics)
+
+                logger.info(metrics)
 
                 task_scores[task].append((len(generation_output.answers), metrics[task.metrics[0]]))
 
+    decaScore = []
     for task in task_scores.keys():
         decaScore.append(
             sum([length * score for length, score in task_scores[task]]) / sum([length for length, score in task_scores[task]])
@@ -560,9 +614,6 @@ def main(args):
     args.tasks = list(get_tasks(args.task_names, args).values())
 
     logger.info(f'Arguments:\n{pformat(vars(args))}')
-    logger.info(f'Loading from {args.best_checkpoint}')
-
-    devices = get_devices(args.devices)
 
     if args.override_valid_metrics:
         assert len(args.override_valid_metrics) == len(args.tasks)
@@ -579,6 +630,9 @@ def main(args):
 
             task.metrics = new_metrics
 
+    logger.info(f'Loading from {args.best_checkpoint}')
+    devices = get_devices(args.devices)
+
     if len(devices) > 1:
         logger.info(f'Independent multi-GPU generation on following devices: {devices}')
         all_processes = []
diff --git a/genienlp/tasks/almond_task.py b/genienlp/tasks/almond_task.py
index a2ed971c..e1de6b81 100644
--- a/genienlp/tasks/almond_task.py
+++ b/genienlp/tasks/almond_task.py
@@ -383,7 +383,7 @@ def _make_example(self, parts, dir_name=None, **kwargs):
         contexts = []
         src_char_spans = None
         if split_sentence:
-            if self.args.do_alignment:
+            if self.need_attention_scores:
                 src_quotation_symbol = '"'
                 src_char_spans_ind = [index for index, char in enumerate(context) if char == src_quotation_symbol]
                 src_char_spans = [
@@ -394,7 +394,9 @@ def _make_example(self, parts, dir_name=None, **kwargs):
         if len(contexts) > 1:
             examples = []
             for i, text in enumerate(contexts):
-                ex_id, text = self.construct_id2span_mapping(self.name + '/' + example_id + f'@{i}', text, 'context')
+                ex_id = self.name + '/' + example_id + f'@{i}'
+                if self.need_attention_scores:
+                    ex_id, text = self.construct_id2span_mapping(ex_id, text, 'context')
                 examples.append(
                     Example.from_raw(
                         ex_id,
@@ -406,7 +408,9 @@ def _make_example(self, parts, dir_name=None, **kwargs):
                     )
                 )
         else:
-            ex_id, context = self.construct_id2span_mapping(self.name + '/' + example_id, context, 'context')
+            ex_id = self.name + '/' + example_id
+            if self.need_attention_scores:
+                ex_id, context = self.construct_id2span_mapping(ex_id, context, 'context')
             examples = Example.from_raw(ex_id, context, question, answer, preprocess=self.preprocess_field, lower=False)
 
         return examples
@@ -480,7 +484,7 @@ def batch_postprocess_prediction_ids(self, batch_example_ids, batch_src_ids, bat
                 )
                 plt.show()
 
-            if self.args.do_alignment:
+            if self.need_attention_scores:
                 src_spans = self.input_spans[example_id]
                 try:
                     text = align_and_replace(
diff --git a/genienlp/tasks/base_task.py b/genienlp/tasks/base_task.py
index aff367a8..b88ac2cf 100644
--- a/genienlp/tasks/base_task.py
+++ b/genienlp/tasks/base_task.py
@@ -41,6 +41,7 @@ class BaseTask(object):
 
     def __init__(self, name, args):
         self.name = name
+        self.args = args
         self._metrics = ['em', 'nem', 'nf1']
         # special task-specific tokens that should not be subword tokenized
         self.special_tokens = set()
@@ -63,7 +64,6 @@ def get_splits(self, root, **kwargs):
         """
         Load the train, test, eval datasets for this task
 
-        :param field: the text.Field to use for tokenization, preprocessing and vocabulary construction
         :param root: the base directory where data is stored
         :param kwargs: other arguments to pass to the Dataset
         :return: a list of text.Dataset
diff --git a/genienlp/tasks/generic_dataset.py b/genienlp/tasks/generic_dataset.py
index dec8d7df..de40eeda 100644
--- a/genienlp/tasks/generic_dataset.py
+++ b/genienlp/tasks/generic_dataset.py
@@ -41,6 +41,7 @@
 from typing import Iterable
 
 import torch
+import ujson
 from datasets import load_dataset
 
 from ..data_utils.example import Example, NumericalizedExamples
@@ -1873,7 +1874,7 @@ def __init__(self, data, *, make_example, **kwargs):
         super().__init__(examples, **kwargs)
 
     @classmethod
-    def return_splits(cls, name, path='.data', train='train', validation='dev', test='test', **kwargs):
+    def return_splits(cls, path='.data', train='train', validation='dev', test='test', **kwargs):
 
         crossner_domains = kwargs.pop('crossner_domains')
 
@@ -1968,3 +1969,46 @@ def splits(cls, root='.data', train='train', validation='eval', test='test', **k
             ),
             Split(train=train_path, eval=validation_path, test=test_path),
         )
+
+
+class BiTODDataset(CQA):
+    def __init__(self, path, *, make_example, **kwargs):
+        subsample = kwargs.pop('subsample')
+        examples = []
+
+        with open(path) as fin:
+            data = ujson.load(fin)['data']
+            for turn in data:
+                processed = make_example(turn, train_target=kwargs.get('train_target', False))
+                if processed:
+                    examples.append(processed)
+
+                if subsample is not None and len(examples) >= subsample:
+                    break
+
+        super().__init__(examples, **kwargs)
+
+        # do not sort eval/ test set so we can compute individual scores for each subtask (e2e_dialogue_score)
+        self.eval_sort_key_fn = None
+
+        # in e2e evaluation use 1 batch at a time
+        if kwargs.get('e2e_evaluation', False):
+            self.eval_batch_size_fn = default_batch_fn
+
+    @classmethod
+    def return_splits(cls, path='.data', train='train', validation='valid', test='test', **kwargs):
+        train_path, validation_path, test_path = None, None, None
+        if train:
+            train_path = os.path.join(path, f'{train}.json')
+        if validation:
+            validation_path = os.path.join(path, f'{validation}.json')
+        if test:
+            test_path = os.path.join(path, 'test.json')
+
+        train_data = None if train is None else cls(train_path, **kwargs)
+        validation_data = None if validation is None else cls(validation_path, **kwargs)
+        test_data = None if test is None else cls(test_path, **kwargs)
+
+        return Split(train=train_data, eval=validation_data, test=test_data), Split(
+            train=train_path, eval=validation_path, test=test_path
+        )
diff --git a/genienlp/tasks/generic_task.py b/genienlp/tasks/generic_task.py
index 8cbf999d..3418d6a0 100644
--- a/genienlp/tasks/generic_task.py
+++ b/genienlp/tasks/generic_task.py
@@ -33,7 +33,7 @@
 from . import generic_dataset
 from .almond_task import BaseAlmondTask
 from .base_task import BaseTask
-from .generic_dataset import CrossNERDataset, OODDataset
+from .generic_dataset import BiTODDataset, CrossNERDataset, OODDataset
 from .registry import register_task
 
 
@@ -377,7 +377,7 @@ def _make_example(self, parts, dir_name=None, **kwargs):
         )
 
     def get_splits(self, root, **kwargs):
-        return CrossNERDataset.return_splits(name=self.name, path=root, make_example=self._make_example, **kwargs)
+        return CrossNERDataset.return_splits(path=root, make_example=self._make_example, **kwargs)
 
 
 @register_task('ood_task')
@@ -393,3 +393,98 @@ def metrics(self):
 
     def get_splits(self, root, **kwargs):
         return OODDataset.splits(root=root, **kwargs)
+
+
+@register_task('bitod')
+class BiTOD(BaseTask):
+    def __init__(self, name, args):
+        super().__init__(name, args)
+        special_tokens_v1 = {
+            '<user>',
+            '<system>',
+            '<API>',
+            '<knowledge>',
+            '<slot>',
+            '<relation>',
+            '<value>',
+            '<sep>',
+            '<unknow>',
+            '<dialogue_state>',
+        }
+        special_tokens_v2 = {
+            'USER:',
+            'SYSTEM:',
+            '<knowledge>',
+            '<history>',
+            '<state>',
+            '#unknown',
+            'DST:',
+            'API:',
+            'Response:',
+        }
+        special_tokens_v5 = {'AGENT_ACTS:'}
+        special_tokens_v7 = {'ACTS:'}
+        special_tokens_v9 = {'USER_ACTS:'}
+        special_tokens_v11 = {'<endofknowledge>', '<endofhistory>', '<endofstate>'}
+        special_tokens_v13 = {'AGENT_ACTS_PREV'}
+        special_tokens_v2_10 = {'<actions>', '<endofactions>', 'DA:', 'RG:'}
+        self.special_tokens = (
+            special_tokens_v1
+            | special_tokens_v2
+            | special_tokens_v5
+            | special_tokens_v7
+            | special_tokens_v9
+            | special_tokens_v11
+            | special_tokens_v13
+            | special_tokens_v2_10
+        )
+        self._metrics = ['e2e_dialogue_score']
+
+    def utterance_field(self):
+        return 'context'
+
+    def _make_example(self, turn, **kwargs):
+        dial_id, turn_id, input_text, output_text, train_target = (
+            turn['dial_id'],
+            turn['turn_id'],
+            turn['input_text'],
+            turn['output_text'],
+            turn['train_target'],
+        )
+
+        if kwargs.get('train_target', False) and train_target != kwargs['train_target']:
+            return None
+
+        example_id = '/'.join([dial_id, str(turn_id), train_target])
+
+        return Example.from_raw(
+            self.name + '/' + str(example_id), input_text, '', output_text, preprocess=self.preprocess_field, lower=False
+        )
+
+    def get_splits(self, root, **kwargs):
+        kwargs['e2e_evaluation'] = self.args.e2e_dialogue_evaluation
+        return BiTODDataset.return_splits(path=root, make_example=self._make_example, **kwargs)
+
+
+@register_task('bitod_nlg')
+class BiTODNLG(BiTOD):
+    def __init__(self, name, args):
+        super().__init__(name, args)
+        self._metrics = ['casedbleu']
+
+    def get_splits(self, root, **kwargs):
+        kwargs['train_target'] = 'rg'
+        kwargs['e2e_evaluation'] = self.args.e2e_dialogue_evaluation
+        return BiTODDataset.return_splits(path=root, make_example=self._make_example, **kwargs)
+
+
+@register_task('bitod_dst')
+class BiTODDST(BiTOD):
+    def __init__(self, name, args):
+        super().__init__(name, args)
+        self._metrics = ['jga', 'em']
+
+    def get_splits(self, root, **kwargs):
+        kwargs['train_target'] = 'dst'
+        kwargs['e2e_evaluation'] = self.args.e2e_dialogue_evaluation
+        return BiTODDataset.return_splits(path=root, make_example=self._make_example, **kwargs)
diff --git a/genienlp/tasks/hf_task.py b/genienlp/tasks/hf_task.py
index eb21a11a..7891e471 100644
--- a/genienlp/tasks/hf_task.py
+++ b/genienlp/tasks/hf_task.py
@@ -49,6 +49,7 @@ def utterance_field(self):
 class AmbigQA(HFTask):
     def __init__(self, name, args):
         super().__init__(name, args)
+        self.special_tokens = {'<q>', '<p>', '<u>'}
 
     @property
     def metrics(self):
diff --git a/genienlp/train.py b/genienlp/train.py
index 7b8f283c..3f23b767 100644
--- a/genienlp/train.py
+++ b/genienlp/train.py
@@ -96,6 +96,7 @@ def prepare_data(args, logger):
         for task in args.train_tasks:
             logger.info(f'Loading {task.name}')
             kwargs = {'test': None, 'validation': None}
+            kwargs['train'] = args.train_set_name
             kwargs.update(train_eval_shared_kwargs)
             kwargs['all_dirs'] = args.train_src_languages
             kwargs['cached_path'] = os.path.join(args.cache, task.name)
diff --git a/genienlp/util.py b/genienlp/util.py
index 544ec489..bb789738 100644
--- a/genienlp/util.py
+++ b/genienlp/util.py
@@ -232,7 +232,7 @@ def __repr__(self) -> str:
         )
 
 
-class GenerationOutput:
+class GenerationOutput(object):
     """
     Contains all the information that the generation function may need to output
     """
@@ -599,9 +599,16 @@ def make_data_loader(dataset, numericalizer, batch_size, device=None, train=Fals
         f'answer lengths (min, mean, max): {np.min(answer_lengths)}, {int(np.mean(answer_lengths))}, {np.max(answer_lengths)}'
     )
 
-    if dataset.batch_size_fn == input_tokens_fn:
+    if train:
+        sort_key_fn = dataset.sort_key_fn
+        batch_size_fn = dataset.batch_size_fn
+    else:
+        sort_key_fn = getattr(dataset, 'eval_sort_key_fn', dataset.sort_key_fn)
+        batch_size_fn = getattr(dataset, 'eval_batch_size_fn', dataset.batch_size_fn)
+
+    if batch_size_fn == input_tokens_fn:
         min_batch_length = np.min(context_lengths)
-    elif dataset.batch_size_fn == all_tokens_fn:
+    elif batch_size_fn == all_tokens_fn:
         min_batch_length = np.min(context_lengths) + np.min(answer_lengths)
     else:
         min_batch_length = 1
@@ -611,7 +618,7 @@ def make_data_loader(dataset, numericalizer, batch_size, device=None, train=Fals
 
     if min_batch_length > batch_size:
         raise ValueError(
-            f'The minimum example length in your dataset is {np.min(context_lengths) + np.min(answer_lengths)} but your batch size is {batch_size}.'
+            f'The minimum batch length in your dataset is {min_batch_length} but your batch size is {batch_size}.'
             f' Thus no examples will be processed. Consider increasing batch_size'
         )
     if np.min(answer_lengths) < min_output_length:
@@ -628,10 +635,10 @@ def make_data_loader(dataset, numericalizer, batch_size, device=None, train=Fals
     sampler = LengthSortedIterator(
         all_features,
         batch_size=batch_size,
-        sort=True,
+        sort=bool(sort_key_fn),
         shuffle_and_repeat=train,
-        sort_key_fn=dataset.sort_key_fn,
-        batch_size_fn=dataset.batch_size_fn,
+        sort_key_fn=sort_key_fn,
+        batch_size_fn=batch_size_fn,
         groups=dataset.groups,
     )
     # get the sorted data_source
@@ -861,7 +868,8 @@ def load_config_json(args):
             'eval_tgt_languages',
         ]
 
-        # train and predict scripts have these arguments in common. We use the values from train only if they are not provided in predict
+        # train and predict scripts have these arguments in common. We use the values from train only if they are not provided in predict.
+        # NOTE: do not set default values for these arguments in predict cause the defaults will always override training arguments
         overwrite = [
             'val_batch_size',
             'num_beams',
@@ -877,13 +885,23 @@ def load_config_json(args):
             'min_output_length',
             'reduce_metrics',
             'database_dir',
+            'e2e_dialogue_valid_subtasks',
+            'e2e_dialogue_valid_submetrics',
+            'e2e_dialogue_valid_subweights',
         ]
-        # these are true/ false arguments
-        overwrite_actions = ['do_alignment', 'align_preserve_input_quotation', 'align_remove_output_quotation']
         for o in overwrite:
             if o not in args or getattr(args, o) is None:
                 retrieve.append(o)
+
+        # these are true/ false arguments
+        overwrite_actions = [
+            'do_alignment',
+            'align_preserve_input_quotation',
+            'align_remove_output_quotation',
+            'e2e_dialogue_evaluation',
+        ]
         for o in overwrite_actions:
+            # if argument is True in predict overwrite train; if False retrieve from train
             if not getattr(args, o, False):
                 retrieve.append(o)
 
@@ -946,6 +964,13 @@ def load_config_json(args):
                 # use default value
                 setattr(args, r, None)
 
+        if args.e2e_dialogue_valid_subtasks is None:
+            setattr(args, 'e2e_dialogue_valid_subtasks', ['dst', 'api', 'da'])
+        if args.e2e_dialogue_valid_submetrics is None:
+            setattr(args, 'e2e_dialogue_valid_submetrics', ['jga', 'em', 'em'])
+        if args.e2e_dialogue_valid_subweights is None:
+            setattr(args, 'e2e_dialogue_valid_subweights', [1.0, 1.0, 1.0])
+
         # backward compatibility for models trained with genienlp before NED Refactoring (2)
         if args.max_features_size is None:
             if hasattr(args, 'ned_features_size'):
diff --git a/genienlp/validate.py b/genienlp/validate.py
index a84718ba..934a9f7d 100644
--- a/genienlp/validate.py
+++ b/genienlp/validate.py
@@ -27,18 +27,27 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+import copy
+import logging
+import os
+import re
 import sys
+from collections import defaultdict
 
 import torch
+import ujson
 from dateparser.languages import default_loader
+from dialogues import Bitod
 from transformers import MarianTokenizer
 
+from .data_utils.example import NumericalizedExamples, SequentialField
 from .data_utils.progbar import progress_bar
 from .metrics import calculate_and_reduce_metrics
 from .models import TransformerForSequenceClassification, TransformerForTokenClassification
 from .util import GenerationOutput, merge_translated_sentences
 
+logger = logging.getLogger(__name__)
+
 
 def generate_with_model(
     model,
@@ -51,9 +60,22 @@ def generate_with_model(
     original_order=None,
     confidence_estimators=None,
     disable_progbar=True,
+    eval_dir=None,
 ):
+    if args.e2e_dialogue_evaluation:
+        return generate_with_seq2seq_model_for_dialogue(
+            model,
+            data_iterator,
+            numericalizer,
+            task,
+            args,
+            eval_dir,
+            output_predictions_only=output_predictions_only,
+            original_order=original_order,
+            disable_progbar=disable_progbar,
+        )
 
-    if isinstance(model, TransformerForTokenClassification) or isinstance(model, TransformerForSequenceClassification):
+    elif isinstance(model, (TransformerForTokenClassification, TransformerForSequenceClassification)):
         return generate_with_classification_model(
             model, data_iterator, numericalizer, task, original_order=original_order, disable_progbar=disable_progbar
         )
@@ -72,6 +94,259 @@ def generate_with_model(
         )
 
 
+def replace_capturing_group(input, re_pattern, replacement):
+    # replace first captured group in the input with replacement using regex re_pattern
+    if re_pattern.search(input):
+        whole_match = re_pattern.search(input).group(0).strip()
+        captured_match = re_pattern.search(input).group(1).strip()
+        new_whole_match = whole_match.replace(captured_match, replacement)
+        new_input = re.sub(re_pattern, new_whole_match, input)
+    else:
+        new_input = input
+    return new_input
+
+
+def generate_with_seq2seq_model_for_dialogue(
+    model,
+    data_iterator,
+    numericalizer,
+    task,
+    args,
+    eval_dir,
+    output_predictions_only=False,
+    original_order=None,
+    disable_progbar=True,
+) -> GenerationOutput:
+    """
+    Inputs:
+        original_order: List of indices. If provided, we will sort the results according to this order
+        confidence_estimator: if provided, will use it to calculate and output confidence scores
+    Outputs: predictions if `output_predictions_only` == True, (loss, predictions, answers, contexts) otherwise
+        loss
+        predictions: a List of Lists of strings
+        answers
+        contexts
+    """
+
+    dataset = Bitod()
+    e2e_dialogue_preds = dict()
+
+    predictions = []
+    example_ids = []
+    answers = []
+    contexts = []
+
+    # TODO: handle multiple responses
+    hyperparameter_idx = 0
+
+    cur_dial_id = ''
+    knowledge = None
+
+    device = model.device
+
+    special_tokens = numericalizer._tokenizer.all_special_tokens
+
+    for k, turn in enumerate(progress_bar(data_iterator, desc='Generating', disable=disable_progbar)):
+        batch_size = len(turn.example_id)
+        assert batch_size == 1
+        batch_prediction = []
+        batch_example_ids = turn.example_id
+
+        example_ids += batch_example_ids
+
+        task_name, dial_id, turn_id, train_target = example_ids[-1].split('/')
+        turn_id = int(turn_id)
+
+        if cur_dial_id != dial_id:
+            # new dialogue
+            cur_dial_id = dial_id
+            dialogue_state = {}
+            # new_state_text = 'null'
+            knowledge = defaultdict(dict)
+            new_knowledge_text = 'null'
+            new_actions_text = 'null'
+            active_api = None
+            e2e_dialogue_preds[dial_id] = {"turns": defaultdict(dict), "API": defaultdict(dict)}
+
+        batch_context = []
+        batch_tokens = numericalizer.convert_ids_to_tokens(turn.context.value.data, skip_special_tokens=False)
+
+        # remove only beginning and trailing special tokens
+        # otherwise the sep_token added between context and question will be lost
+        for text in batch_tokens:
+            i = 0
+            while text[i] in special_tokens:
+                i += 1
+            j = len(text) - 1
+            while text[j] in special_tokens:
+                j -= 1
+            text = text[i : j + 1]
+
+            batch_context.append(numericalizer._tokenizer.convert_tokens_to_string(text))
+
+        contexts += batch_context
+
+        if not output_predictions_only:
+            batch_answer = numericalizer.reverse(turn.answer.value.data, 'answer')
+            batch_answer = [
+                task.postprocess_prediction(batch_example_ids[i], batch_answer[i]) for i in range(len(batch_answer))
+            ]
+            answers += batch_answer
+
+        new_state_text = dataset.state2span(dialogue_state)
+
+        if train_target == 'dst':
+            input_text = replace_capturing_group(contexts[-1], dataset.state_re, new_state_text)
+
+            ## we always use gold history following common practice
+            ## if you want to use predicted response instead of gold uncomment the following
+            # last_sys_pred = predictions[-1][0].strip()
+            # input_text = replace_match(input_text, last_system_re, last_sys_pred)
+
+        elif train_target == 'api':
+
+            # replace state
+            input_text = replace_capturing_group(contexts[-1], dataset.state_re, new_state_text)
+
+        elif train_target == 'da':
+            # replace state
+            input_text = replace_capturing_group(contexts[-1], dataset.state_re, new_state_text)
+
+            # replace knowledge
+            input_text = replace_capturing_group(input_text, dataset.knowledge_re, new_knowledge_text)
+
+        elif train_target == 'rg':
+
+            # replace actions
+            input_text = replace_capturing_group(contexts[-1], dataset.actions_re, new_actions_text)
+
+        else:
+            raise ValueError(f'Invalid train_target: {train_target}')
+
+        # replace old context with updated
+        contexts[-1] = input_text
+
+        tokenized_contexts = numericalizer.encode_batch([input_text], field_name='context', features=None)[0]
+
+        numericalized_turn = NumericalizedExamples(
+            example_id=[turn.example_id[0]],
+            context=SequentialField(
+                value=torch.tensor([tokenized_contexts.value], device=device),
+                length=torch.tensor([tokenized_contexts.length], device=device),
+                limited=torch.tensor([tokenized_contexts.limited], device=device),
+                feature=None,
+            ),
+            answer=SequentialField(value=None, length=None, limited=None, feature=None),
+        )
+
+        generated = model.generate(
+            numericalized_turn,
+            max_output_length=args.max_output_length,
+            min_output_length=args.min_output_length,
+            num_outputs=args.num_outputs[hyperparameter_idx],
+            temperature=args.temperature[hyperparameter_idx] if args.temperature[hyperparameter_idx] > 0 else 1.0,
+            repetition_penalty=args.repetition_penalty[hyperparameter_idx],
+            top_k=args.top_k[hyperparameter_idx],
+            top_p=args.top_p[hyperparameter_idx],
+            num_beams=args.num_beams[hyperparameter_idx],
+            num_beam_groups=args.num_beam_groups[hyperparameter_idx],
+            diversity_penalty=args.diversity_penalty[hyperparameter_idx],
+            no_repeat_ngram_size=args.no_repeat_ngram_size[hyperparameter_idx],
+            do_sample=args.temperature[hyperparameter_idx] != 0,
+        )
+
+        partial_batch_prediction_ids = generated.sequences
+
+        partial_batch_prediction = numericalizer.reverse(partial_batch_prediction_ids, 'answer')[0]
+
+        if train_target == 'da':
+            partial_batch_prediction = dataset.postprocess_prediction(
+                partial_batch_prediction, knowledge, lang=numericalizer._tokenizer.src_lang[:2]
+            )
+
+        partial_batch_prediction = task.postprocess_prediction(batch_example_ids[0], partial_batch_prediction)
+
+        # put them into the right array
+        batch_prediction.append([partial_batch_prediction])
+
+        predictions += batch_prediction
+
+        if train_target == 'dst':
+            # update dialogue_state
+            lev = predictions[-1][0].strip()
+            state_update = dataset.span2state(lev)
+            if state_update:
+                active_api = list(state_update.keys())[-1]
+            dataset.update_state(state_update, dialogue_state)
+
+            #### save latest state
+            state_to_record = copy.deepcopy(dialogue_state)
+            state_to_record = {dataset.domain2api_name(k): v for k, v in state_to_record.items()}
+            e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["state"] = state_to_record
+            ####
+
+        elif train_target == 'api':
+            if dataset.do_knowledge_reset(active_api):
+                new_knowledge_text = "null"
+                knowledge = defaultdict(dict)
+
+            do_api_call = predictions[-1][0].strip()
+
+            if do_api_call == 'yes':
+                # make api call
+                api_name = active_api
+                if api_name in dialogue_state:
+                    constraints, new_knowledge_text = dataset.make_api_call(
+                        dialogue_state, knowledge, api_name, numericalizer._tokenizer.src_lang, dial_id, turn_id
+                    )
+                    #### save latest api constraints
+                    e2e_dialogue_preds[dial_id]["API"][dataset.domain2api_name(api_name)] = copy.deepcopy(constraints)
+                    ####
+
+            elif do_api_call == 'no':
+                # do nothing
+                pass
+            else:
+                logger.error(
+                    f'API call should be either yes or no but got {do_api_call}. Seems model is not trained for enough steps. For now we assume it\'s a no'
+                )
+
+            #### save latest api results
+            e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["api"] = new_knowledge_text
+            ####
+
+        elif train_target == 'da':
+            new_actions_text = predictions[-1][0]
+            #### save latest actions
+            e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["actions"] = predictions[-1][0]
+            ####
+
+        elif train_target == 'rg':
+            #### save latest response
+            e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["response"] = predictions[-1]
+            ####
+
+    with open(os.path.join(eval_dir, 'e2e_dialogue_preds.json'), 'w') as fout:
+        ujson.dump(e2e_dialogue_preds, fout, indent=2, ensure_ascii=False)
+
+    if original_order is not None:
+        # sort back to the original order
+        original_order, example_ids, predictions, answers, contexts = [
+            list(a) for a in tuple(zip(*sorted(list(zip(original_order, example_ids, predictions, answers, contexts)))))
+        ]
+
+    # TODO calculate and return loss
+    loss = None
+    output = GenerationOutput(loss=loss)
+
+    if output_predictions_only:
+        output.predictions = predictions
+    else:
+        output.example_ids, output.predictions, output.answers, output.contexts = example_ids, predictions, answers, contexts
+
+    return output
+
+
 def generate_with_seq2seq_model(
     model,
     data_iterator,
@@ -407,9 +682,7 @@ def validate(task, val_iter, model, numericalizer, args, num_print=10):
         # loss is already calculated
         metrics_to_return = [metric for metric in task.metrics if metric != 'loss']
 
-        metrics = calculate_and_reduce_metrics(
-            output.predictions, output.answers, metrics_to_return, args.reduce_metrics, model.tgt_lang
-        )
+        metrics = calculate_and_reduce_metrics(output, metrics_to_return, args, model.tgt_lang)
 
         results = {'model prediction': output.predictions, 'gold answer': output.answers, 'context': output.contexts}
 
diff --git a/setup.py b/setup.py
index ab085dd2..025d1825 100644
--- a/setup.py
+++ b/setup.py
@@ -68,5 +68,7 @@
         'scikit-learn>=0.23,<2.0',
         'dill~=0.3',
         'xgboost~=1.3',
+        # dialogues
+        'dialogues @ git+https://github.com/Mehrad0711/dialogues',
     ],
 )
diff --git a/tests/dataset/bitod/test.json b/tests/dataset/bitod/test.json
new file mode 100644
index 00000000..ad9227b2
--- /dev/null
+++ b/tests/dataset/bitod/test.json
@@ -0,0 +1,256 @@
+{
+ "data": [
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "DST: <state> null <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "( hotels search )",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "( hotels search ) request location , request rating",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "RG: <actions> ( hotels search ) request location , request rating <endofactions> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "Sure! Do you have a destination in mind, and did you have a certain hotel rating in mind?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "DST: <state> ( hotels search ) <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_DA: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "( hotels search ) request stars",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "RG: <actions> ( hotels search ) request stars <endofactions> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "How many stars would be your ideal?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "( hotels search ) stars equal_to \" 5 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_DA: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "( hotels search ) request price_level",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "RG: <actions> ( hotels search ) request price_level <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "What's your budget?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "( hotels search ) price_level equal_to \" moderate \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "yes",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "DA: <knowledge> ( hotels search ) available_options \" 26 \" , location \" Mong Kok | Kowloon | Yau Tsim Mong District \" , name \" Cordis, Hong Kong \" , price_level \" moderate \" , price_per_night \" 839 HKD \" , rating \" 10 \" , stars \" 5 \" <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_DA: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \"",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "RG: <actions> ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "We have 26 hotels with availability here, and my recommendation would be Cordis, Hong Kong which has a 10 star rating.",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "( hotels booking ) name equal_to \" Cordis, Hong Kong \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_DA: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "( hotels booking ) request number_of_nights",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "RG: <actions> ( hotels booking ) request number_of_nights <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "How many nights do you plan to stay?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "DST: <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "( hotels booking ) number_of_nights equal_to \" 9 \" , user_name equal_to \" David \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_DA: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "( hotels booking ) request number_of_rooms",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "RG: <actions> ( hotels booking ) request number_of_rooms <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "Of course, David. How many rooms?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "DST: <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "( hotels booking ) number_of_rooms equal_to \" eight \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_DA: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \"",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "RG: <actions> ( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \" <endofactions> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "Okay, just to confirm: You would like to book eight rooms at the Cordis, Hong Kong hotel for 9 nights with a check-in date of November 23rd?",
+   "train_target": "rg"
+  }
+ ]
+}
\ No newline at end of file
diff --git a/tests/dataset/bitod/train.json b/tests/dataset/bitod/train.json
new file mode 100644
index 00000000..ad9227b2
--- /dev/null
+++ b/tests/dataset/bitod/train.json
@@ -0,0 +1,256 @@
+{
+ "data": [
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "DST: <state> null <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "( hotels search )",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "( hotels search ) request location , request rating",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "RG: <actions> ( hotels search ) request location , request rating <endofactions> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "Sure! Do you have a destination in mind, and did you have a certain hotel rating in mind?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "DST: <state> ( hotels search ) <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_DA: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "( hotels search ) request stars",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "RG: <actions> ( hotels search ) request stars <endofactions> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "How many stars would be your ideal?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "( hotels search ) stars equal_to \" 5 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_DA: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "( hotels search ) request price_level",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "RG: <actions> ( hotels search ) request price_level <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "What's your budget?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "( hotels search ) price_level equal_to \" moderate \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "yes",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "DA: <knowledge> ( hotels search ) available_options \" 26 \" , location \" Mong Kok | Kowloon | Yau Tsim Mong District \" , name \" Cordis, Hong Kong \" , price_level \" moderate \" , price_per_night \" 839 HKD \" , rating \" 10 \" , stars \" 5 \" <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_DA: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \"",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "RG: <actions> ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "We have 26 hotels with availability here, and my recommendation would be Cordis, Hong Kong which has a 10 star rating.",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "( hotels booking ) name equal_to \" Cordis, Hong Kong \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_DA: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "( hotels booking ) request number_of_nights",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "RG: <actions> ( hotels booking ) request number_of_nights <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "How many nights do you plan to stay?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "DST: <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "( hotels booking ) number_of_nights equal_to \" 9 \" , user_name equal_to \" David \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_DA: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "( hotels booking ) request number_of_rooms",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "RG: <actions> ( hotels booking ) request number_of_rooms <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "Of course, David. How many rooms?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "DST: <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "( hotels booking ) number_of_rooms equal_to \" eight \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_DA: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \"",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "RG: <actions> ( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \" <endofactions> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "Okay, just to confirm: You would like to book eight rooms at the Cordis, Hong Kong hotel for 9 nights with a check-in date of November 23rd?",
+   "train_target": "rg"
+  }
+ ]
+}
\ No newline at end of file
diff --git a/tests/dataset/bitod/valid.json b/tests/dataset/bitod/valid.json
new file mode 100644
index 00000000..ad9227b2
--- /dev/null
+++ b/tests/dataset/bitod/valid.json
@@ -0,0 +1,256 @@
+{
+ "data": [
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "DST: <state> null <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "( hotels search )",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "( hotels search ) request location , request rating",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 1,
+   "dialog_history": "USER: Hey, can you help me with hotel booking?",
+   "input_text": "RG: <actions> ( hotels search ) request location , request rating <endofactions> <history> USER: Hey, can you help me with hotel booking? <endofhistory>",
+   "output_text": "Sure! Do you have a destination in mind, and did you have a certain hotel rating in mind?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "DST: <state> ( hotels search ) <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_DA: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "( hotels search ) request stars",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 2,
+   "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.",
+   "input_text": "RG: <actions> ( hotels search ) request stars <endofactions> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>",
+   "output_text": "How many stars would be your ideal?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "( hotels search ) stars equal_to \" 5 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_DA: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "( hotels search ) request price_level",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 3,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.",
+   "input_text": "RG: <actions> ( hotels search ) request price_level <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>",
+   "output_text": "What's your budget?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "( hotels search ) price_level equal_to \" moderate \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "yes",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "DA: <knowledge> ( hotels search ) available_options \" 26 \" , location \" Mong Kok | Kowloon | Yau Tsim Mong District \" , name \" Cordis, Hong Kong \" , price_level \" moderate \" , price_per_night \" 839 HKD \" , rating \" 10 \" , stars \" 5 \" <endofknowledge> <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_DA: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \"",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels search",
+   "turn_id": 4,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.",
+   "input_text": "RG: <actions> ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>",
+   "output_text": "We have 26 hotels with availability here, and my recommendation would be Cordis, Hong Kong which has a 10 star rating.",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "DST: <state> ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "( hotels booking ) name equal_to \" Cordis, Hong Kong \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_DA: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "( hotels booking ) request number_of_nights",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 5,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.",
+   "input_text": "RG: <actions> ( hotels booking ) request number_of_nights <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. <endofhistory>",
+   "output_text": "How many nights do you plan to stay?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "DST: <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "( hotels booking ) number_of_nights equal_to \" 9 \" , user_name equal_to \" David \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_DA: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "( hotels booking ) request number_of_rooms",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 6,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.",
+   "input_text": "RG: <actions> ( hotels booking ) request number_of_rooms <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>",
+   "output_text": "Of course, David. How many rooms?",
+   "train_target": "rg"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "DST: <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "( hotels booking ) number_of_rooms equal_to \" eight \"",
+   "train_target": "dst"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "no",
+   "train_target": "api"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_DA: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \"",
+   "train_target": "da"
+  },
+  {
+   "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824",
+   "task": "hotels booking",
+   "turn_id": 7,
+   "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.",
+   "input_text": "RG: <actions> ( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \" <endofactions> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>",
+   "output_text": "Okay, just to confirm: You would like to book eight rooms at the Cordis, Hong Kong hotel for 9 nights with a check-in date of November 23rd?",
+   "train_target": "rg"
+  }
+ ]
+}
\ No newline at end of file
diff --git a/tests/expected_results/almond/bert_base_cased_beam.tsv b/tests/expected_results/almond/bert_base_cased_beam.tsv
index 5c4fe7cd..d1d972f9 100644
--- a/tests/expected_results/almond/bert_base_cased_beam.tsv
+++ b/tests/expected_results/almond/bert_base_cased_beam.tsv
@@ -1,10 +1,10 @@
-almond/R1633686-0	i engligli	@com.twitter . home_timeline ( ) => @com.gmail . send_email ( message = text ) ;
-almond/R1624112-0	onglitalgli room	monitor ( @thermostat . get_humidity ( ) ) filter value <= NUMBER_0 => @org.thingpedia.bluetooth.speaker.a2dp . play_music ( ) ;
-almond/R1633355-0	##tter igligli hope please youwi	monitor ( @security-camera . current_event ( ) ) => @com.twitter . post_picture ( caption = " person and i hope you have a great day " , picture_url = picture_url ) ;
-almond/R1633055-0	once	monitor ( @org.thingpedia.weather . current ( location = LOCATION_0 ) ) filter ! ( status == enum snowy ) => @com.fitbit . getsteps ( ) ;
-almond/R1621376-0	i igliglisetglital aost the thingtal get aost the thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get	monitor ( @com.twitter . home_timeline ( ) ) => @thermostat . set_target_temperature ( ) ;
-almond/R1628268-0	englitag	monitor ( @com.washingtonpost . get_article ( section = enum world ) ) => @com.instagram . get_pictures ( ) filter filter == null ^^com.instagram:filter ( " brannan " ) ;
-almond/R1632097-0		monitor ( @org.thingpedia.weather . current ( location = $location . work ) ) filter ! ( status == enum snowy ) => @org.thingpedia.builtin.thingengine.phone . get_gps ( ) ;
-almond/R1618372-0	##eriaglieriagligligligligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli me	monitor ( @com.tumblr . new_photo ( blog_name = " zackeriah " ^^tt:username ) ) => @org.thingpedia.icalendar . list_events ( ) ;
-almond/R1624288-0	i igligliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igli	monitor ( @com.twitter . home_timeline ( ) ) => @thermostat . set_target_temperature ( value = NUMBER_0 F ) ;
-almond/R1629029-0	i igli tell thing to thingtalwitter thing totter thing to totter thing to thing totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing	monitor ( @org.thingpedia.icalendar . list_events ( ) filter organizer == " jouanna mills " ) filter status == enum started => @com.twitter . direct_messages ( ) ;
+almond/R1633686-0	i engligli	@com.twitter . home_timeline ( ) => @com.gmail . send_email ( message = text ) ;	deliver the text of tweets from anyone i follow via email translate from english to thingtalk
+almond/R1624112-0	onglitalgli room	monitor ( @thermostat . get_humidity ( ) ) filter value <= NUMBER_0 => @org.thingpedia.bluetooth.speaker.a2dp . play_music ( ) ;	when the room 's humidity falls below NUMBER_0 in the room play something on my speaker translate from english to thingtalk
+almond/R1633355-0	##tter igligli hope please youwi	monitor ( @security-camera . current_event ( ) ) => @com.twitter . post_picture ( caption = " person and i hope you have a great day " , picture_url = picture_url ) ;	please post the live feed photo of my new security camera on twitter and caption it person and i hope you have a great day translate from english to thingtalk
+almond/R1633055-0	once	monitor ( @org.thingpedia.weather . current ( location = LOCATION_0 ) ) filter ! ( status == enum snowy ) => @com.fitbit . getsteps ( ) ;	once it stops snowing in LOCATION_0 , get the steps from fitbit translate from english to thingtalk
+almond/R1621376-0	i igliglisetglital aost the thingtal get aost the thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get thingtal get	monitor ( @com.twitter . home_timeline ( ) ) => @thermostat . set_target_temperature ( ) ;	reset my thermostat reading if i get a tweet translate from english to thingtalk
+almond/R1628268-0	englitag	monitor ( @com.washingtonpost . get_article ( section = enum world ) ) => @com.instagram . get_pictures ( ) filter filter == null ^^com.instagram:filter ( " brannan " ) ;	notify me of the instagram pictures with filter brannan when there is a new article in the world section of the washington post translate from english to thingtalk
+almond/R1632097-0		monitor ( @org.thingpedia.weather . current ( location = $location . work ) ) filter ! ( status == enum snowy ) => @org.thingpedia.builtin.thingengine.phone . get_gps ( ) ;	retrieve my current location when it stops snowing in work translate from english to thingtalk
+almond/R1618372-0	##eriaglieriagligligligligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli megligligligligligligligligligli me	monitor ( @com.tumblr . new_photo ( blog_name = " zackeriah " ^^tt:username ) ) => @org.thingpedia.icalendar . list_events ( ) ;	show me my icalendar events when zackeriah uploads a new picture on tumblr translate from english to thingtalk
+almond/R1624288-0	i igligliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igliglital to to thing to NUMBER_0 NUMBER_0 to NUMBER_0 to NUMBER_0 NUMBER_0 igli	monitor ( @com.twitter . home_timeline ( ) ) => @thermostat . set_target_temperature ( value = NUMBER_0 F ) ;	change my thermostat to NUMBER_0 degrees when someone i follow makes a tweet translate from english to thingtalk
+almond/R1629029-0	i igli tell thing to thingtalwitter thing totter thing to totter thing to thing totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing to totter thing to totter thing to thing totter thing	monitor ( @org.thingpedia.icalendar . list_events ( ) filter organizer == " jouanna mills " ) filter status == enum started => @com.twitter . direct_messages ( ) ;	tell me the direct message i got on twitter when an event organized by jouanna mills from my calendar is about to start translate from english to thingtalk
diff --git a/tests/expected_results/bitod/bitod.tsv b/tests/expected_results/bitod/bitod.tsv
new file mode 100644
index 00000000..641d478a
--- /dev/null
+++ b/tests/expected_results/bitod/bitod.tsv
@@ -0,0 +1,28 @@
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/1/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search )	DST: <state> null <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/1/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	no	API: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/1/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) request location , request rating	DA: <knowledge> null <endofknowledge> <state> ( hotels search ) <endofstate> <history> USER: Hey, can you help me with hotel booking? <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/1/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	Sure! Do you have a destination in mind, and did you have a certain hotel rating in mind?	RG: <actions> ( hotels search ) request location , request rating <endofactions> <history> USER: Hey, can you help me with hotel booking? <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/2/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) location equal_to " don't care " , rating at_least " 4 "	DST: <state> ( hotels search ) <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/2/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	no	API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to " don't care " , rating at_least " 4 " <endofstate> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/2/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) request stars	DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to " don't care " , rating at_least " 4 " <endofstate> <history> AGENT_DA: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/2/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	How many stars would be your ideal?	RG: <actions> ( hotels search ) request stars <endofactions> <history> AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/3/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) stars equal_to " 5 "	DST: <state> ( hotels search ) location equal_to " don't care " , rating at_least " 4 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/3/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	no	API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to " don't care " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/3/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) request price_level	DA: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to " don't care " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_DA: ( hotels search ) request stars USER: 5 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/3/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	What's your budget?	RG: <actions> ( hotels search ) request price_level <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/4/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) price_level equal_to " moderate "	DST: <state> ( hotels search ) location equal_to " don't care " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/4/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	yes	API: <knowledge> null <endofknowledge> <state> ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/4/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 "	DA: <knowledge> ( hotels search ) available_options " 26 " , location " Mong Kok | Kowloon | Yau Tsim Mong District " , name " Cordis, Hong Kong " , price_level " moderate " , price_per_night " 839 HKD " , rating " 10 " , stars " 5 " <endofknowledge> <state> ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_DA: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/4/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	We have 26 hotels with availability here, and my recommendation would be Cordis, Hong Kong which has a 10 star rating.	RG: <actions> ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/5/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels booking ) name equal_to " Cordis, Hong Kong " , start_day equal_to " 23 " , start_month equal_to " 11 "	DST: <state> ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " USER: I'll take it. I need to check in on November 23rd. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/5/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	no	API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name #unknown , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " USER: I'll take it. I need to check in on November 23rd. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/5/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels booking ) request number_of_nights	DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name #unknown , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_DA: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " USER: I'll take it. I need to check in on November 23rd. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/5/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	How many nights do you plan to stay?	RG: <actions> ( hotels booking ) request number_of_nights <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " USER: I'll take it. I need to check in on November 23rd. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/6/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels booking ) number_of_nights equal_to " 9 " , user_name equal_to " David "	DST: <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name #unknown , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/6/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	no	API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights equal_to " 9 " , number_of_rooms #unknown , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name equal_to " David " , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/6/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels booking ) request number_of_rooms	DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights equal_to " 9 " , number_of_rooms #unknown , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name equal_to " David " , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " AGENT_DA: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/6/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	Of course, David. How many rooms?	RG: <actions> ( hotels booking ) request number_of_rooms <endofactions> <history> AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to " 26 " , offer name equal_to " Cordis, Hong Kong " , offer rating equal_to " 10 " AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/7/dst	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels booking ) number_of_rooms equal_to " eight "	DST: <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights equal_to " 9 " , number_of_rooms #unknown , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name equal_to " David " , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/7/api	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	no	API: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights equal_to " 9 " , number_of_rooms equal_to " eight " , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name equal_to " David " , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/7/da	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	( hotels booking ) confirm name equal_to " Cordis, Hong Kong " , confirm number_of_nights equal_to " 9 " , confirm number_of_rooms equal_to " eight " , confirm start_date equal_to " November 23 "	DA: <knowledge> null <endofknowledge> <state> ( hotels booking ) name equal_to " Cordis, Hong Kong " , number_of_nights equal_to " 9 " , number_of_rooms equal_to " eight " , start_day equal_to " 23 " , start_month equal_to " 11 " , user_name equal_to " David " , ( hotels search ) location equal_to " don't care " , price_level equal_to " moderate " , rating at_least " 4 " , stars equal_to " 5 " <endofstate> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_DA: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>
+bitod/ec909032-d5fd-4dee-8dd2-53a895a98824/7/rg	 up Trend Trend Trend Trend Trend Trend Trend Trend Trend sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship sponsorship metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metro metroEarnEarnEarn	Okay, just to confirm: You would like to book eight rooms at the Cordis, Hong Kong hotel for 9 nights with a check-in date of November 23rd?	RG: <actions> ( hotels booking ) confirm name equal_to " Cordis, Hong Kong " , confirm number_of_nights equal_to " 9 " , confirm number_of_rooms equal_to " eight " , confirm start_date equal_to " November 23 " <endofactions> <history> AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. <endofhistory>
diff --git a/tests/expected_results/sequence_classification/ood_task.tsv b/tests/expected_results/sequence_classification/ood_task.tsv
index e9e5224d..e66b7760 100644
--- a/tests/expected_results/sequence_classification/ood_task.tsv
+++ b/tests/expected_results/sequence_classification/ood_task.tsv
@@ -1,10 +1,10 @@
-ood/0	1	1
-ood/1	1	0
-ood/2	1	0
-ood/3	1	0
-ood/4	0	0
-ood/5	1	0
-ood/6	1	1
-ood/7	1	0
-ood/8	1	0
-ood/9	1	1
+ood/0	1	1	get curiosity pictures is this sentence in - domain or out - domain?
+ood/1	1	0	play skinny love by bon iver is this sentence in - domain or out - domain?
+ood/2	1	0	search for all songs with the word dance in the title. is this sentence in - domain or out - domain?
+ood/3	1	0	play some psychedelic rock. is this sentence in - domain or out - domain?
+ood/4	0	0	play the track grow old with me by artist chloe rose lattanzi is this sentence in - domain or out - domain?
+ood/5	1	0	acdc is this sentence in - domain or out - domain?
+ood/6	1	1	easy walk, accountable lady gaga. is this sentence in - domain or out - domain?
+ood/7	1	0	play that would be something by eden ahbez is this sentence in - domain or out - domain?
+ood/8	1	0	show me the weather in berceni, romania is this sentence in - domain or out - domain?
+ood/9	1	1	what is number _ 0 + 2 is this sentence in - domain or out - domain?
diff --git a/tests/expected_results/token_classification/conll2003_2.tsv b/tests/expected_results/token_classification/conll2003_2.tsv
index 93b2051d..c2f9618a 100644
--- a/tests/expected_results/token_classification/conll2003_2.tsv
+++ b/tests/expected_results/token_classification/conll2003_2.tsv
@@ -1,5 +1,5 @@
-conll2003/0	I-PER I-LOC O B-PER B-PER B-ORG B-ORG O B-PER B-PER B-PER B-ORG B-LOC B-LOC B-LOC B-PER B-LOC O B-ORG B-LOC B-LOC B-LOC B-LOC B-PER B-ORG B-LOC B-ORG B-ORG I-ORG B-PER O B-ORG B-PER B-PER B-PER B-LOC B-LOC B-LOC B-LOC B-PER B-PER B-PER	O O O O O B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG O O O O O O O O O O O O O O O O O O O O O O O O O O O O
-conll2003/1	B-ORG O I-ORG O B-PER O B-PER O B-PER B-PER B-PER B-ORG B-PER B-ORG B-PER B-ORG B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-ORG B-LOC B-ORG B-ORG B-ORG B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER	B-LOC B-LOC B-LOC B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
-conll2003/2	I-PER B-PER O I-PER O O I-PER B-ORG B-LOC B-MISC I-PER B-LOC O B-LOC I-PER B-PER B-LOC B-LOC O O I-PER I-ORG B-PER B-MISC I-PER O B-LOC I-LOC O B-LOC O O O O I-LOC B-PER B-PER O I-LOC I-LOC I-LOC I-LOC	B-MISC I-MISC O O O O B-PER I-PER O O O O O O O B-ORG O B-ORG O O O O O O O O O O O O O O O O O O O O O O O O
-conll2003/3	I-PER B-LOC I-LOC B-LOC B-LOC B-ORG B-LOC B-LOC O B-ORG O B-LOC O B-PER O I-LOC B-LOC B-LOC B-LOC B-LOC B-LOC B-LOC I-LOC I-LOC B-LOC B-LOC B-LOC B-ORG B-LOC B-PER B-LOC I-PER O B-PER B-LOC O B-ORG B-PER O B-LOC O I-LOC	O O O O O O O O O O O O O O O B-ORG O B-ORG O B-ORG O O O O O O B-ORG O O O O O O O O O O O O B-ORG O O
-conll2003/4	I-PER O I-LOC I-PER I-PER O B-LOC B-LOC B-MISC B-LOC B-MISC I-LOC B-MISC B-LOC B-PER B-ORG B-LOC B-LOC I-PER I-PER B-PER B-LOC B-LOC I-LOC O I-PER I-PER O B-MISC O B-LOC B-ORG O I-PER O B-ORG B-LOC B-MISC O B-MISC B-MISC O	O O B-ORG O O O O O O O O B-LOC I-LOC O B-ORG O O O O O O O O O O O O O O O B-LOC O O B-PER I-PER I-PER I-PER O O O O O
+conll2003/0	I-PER I-LOC O B-PER B-PER B-ORG B-ORG O B-PER B-PER B-PER B-ORG B-LOC B-LOC B-LOC B-PER B-LOC O B-ORG B-LOC B-LOC B-LOC B-LOC B-PER B-ORG B-LOC B-ORG B-ORG I-ORG B-PER O B-ORG B-PER B-PER B-PER B-LOC B-LOC B-LOC B-LOC B-PER B-PER B-PER	O O O O O B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG B-ORG O O O O O O O O O O O O O O O O O O O O O O O O O O O O	CRICKET - LEICESTERSHIRE TAKE OVER AT TOP AFTER INNINGS VICTORY.
+conll2003/1	B-ORG O I-ORG O B-PER O B-PER O B-PER B-PER B-PER B-ORG B-PER B-ORG B-PER B-ORG B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-ORG B-LOC B-ORG B-ORG B-ORG B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER B-PER	B-LOC B-LOC B-LOC B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O	LONDON 1996 - 08 - 30
+conll2003/2	I-PER B-PER O I-PER O O I-PER B-ORG B-LOC B-MISC I-PER B-LOC O B-LOC I-PER B-PER B-LOC B-LOC O O I-PER I-ORG B-PER B-MISC I-PER O B-LOC I-LOC O B-LOC O O O O I-LOC B-PER B-PER O I-LOC I-LOC I-LOC I-LOC	B-MISC I-MISC O O O O B-PER I-PER O O O O O O O B-ORG O B-ORG O O O O O O O O O O O O O O O O O O O O O O O O	West Indian all - rounder Phil Simmons took four for 38 on Friday as Leicestershire beat Somerset by an innings and 39 runs in two days to take over at the head of the county championship.
+conll2003/3	I-PER B-LOC I-LOC B-LOC B-LOC B-ORG B-LOC B-LOC O B-ORG O B-LOC O B-PER O I-LOC B-LOC B-LOC B-LOC B-LOC B-LOC B-LOC I-LOC I-LOC B-LOC B-LOC B-LOC B-ORG B-LOC B-PER B-LOC I-PER O B-PER B-LOC O B-ORG B-PER O B-LOC O I-LOC	O O O O O O O O O O O O O O O B-ORG O B-ORG O B-ORG O O O O O O B-ORG O O O O O O O O O O O O B-ORG O O	Their stay on top, though, may be short - lived as title rivals Essex, Derbyshire and Surrey all closed in on victory while Kent made up for lost time in their rain - affected match against Nottinghamshire.
+conll2003/4	I-PER O I-LOC I-PER I-PER O B-LOC B-LOC B-MISC B-LOC B-MISC I-LOC B-MISC B-LOC B-PER B-ORG B-LOC B-LOC I-PER I-PER B-PER B-LOC B-LOC I-LOC O I-PER I-PER O B-MISC O B-LOC B-ORG O I-PER O B-ORG B-LOC B-MISC O B-MISC B-MISC O	O O B-ORG O O O O O O O O B-LOC I-LOC O B-ORG O O O O O O O O O O O O O O O B-LOC O O B-PER I-PER I-PER I-PER O O O O O	After bowling Somerset out for 83 on the opening morning at Grace Road, Leicestershire extended their first innings by 94 runs before being bowled out for 296 with England discard Andy Caddick taking three for 83.
diff --git a/tests/expected_results/token_classification/cross_ner_news_0.tsv b/tests/expected_results/token_classification/cross_ner_news_0.tsv
index c4868f62..f16b22b7 100644
--- a/tests/expected_results/token_classification/cross_ner_news_0.tsv
+++ b/tests/expected_results/token_classification/cross_ner_news_0.tsv
@@ -1,5 +1,5 @@
-cross_ner/0	B-researcher I-book B-scientist B-metrics I-politicalparty B-university B-scientist B-metrics B-ORG B-metrics B-metrics I-musicalinstrument I-chemicalelement B-field I-band B-metrics B-protein I-academicjournal B-university I-academicjournal I-programlang I-PER I-song B-university B-university B-university I-literarygenre B-scientist B-metrics I-university I-song B-university B-university B-protein B-organisation B-enzyme B-enzyme I-academicjournal I-musicalinstrument I-song I-song B-LOC B-university B-university I-politician B-university B-metrics B-metrics B-metrics I-academicjournal I-chemicalcompound B-album I-programlang I-programlang B-field B-award B-award B-protein I-chemicalelement B-field I-chemicalelement B-astronomicalobject I-politicalparty B-astronomicalobject B-protein I-musicalinstrument I-musicalinstrument B-university B-award I-musicalinstrument B-album I-song B-field I-programlang B-metrics I-musicalinstrument B-award B-award B-programlang B-field B-protein	O O O O O O O O B-event I-event I-event O B-musicalartist B-musicalartist B-musicalartist O O O O B-location I-location I-location I-location O O B-location O O O B-location I-location O O B-location O O O O O B-location B-location B-location O O B-location O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
-cross_ner/1	I-chemicalelement B-award B-event I-politicalparty B-field I-astronomicalobject I-academicjournal B-university B-metrics B-metrics B-LOC B-university B-university I-field B-musicalinstrument I-musicalinstrument I-literarygenre B-metrics I-chemicalelement I-protein B-metrics I-person B-scientist I-politicalparty I-academicjournal I-astronomicalobject I-literarygenre I-politician I-astronomicalobject B-university B-country I-person B-politician I-event B-protein B-protein I-politicalparty I-field I-field I-politicalparty B-university B-politician B-enzyme I-task B-award B-event B-university I-field B-university I-field B-university I-field I-location I-enzyme B-university B-university B-university I-programlang I-task B-university I-programlang B-university B-university B-politician B-university B-protein B-metrics B-field B-metrics B-field B-metrics B-metrics B-university B-university B-event B-politician I-field I-field I-chemicalelement B-award B-award	B-musicalartist B-musicalartist B-musicalartist O O O O O O O O O O O O O B-organisation B-organisation I-organisation O O O O O O O O O O O O O O O O O O B-album I-album O B-album I-album I-album I-album I-album I-album O B-album B-album B-album O B-album I-album O B-album I-album I-album I-album I-album O B-album I-album I-album I-album O B-album B-album B-album B-album O B-album B-album I-album I-album O O O O O O O
-cross_ner/2	I-chemicalelement I-chemicalelement B-field B-metrics B-university B-field B-university I-song B-university I-programlang B-enzyme B-enzyme B-protein I-person B-chemicalelement B-chemicalelement B-university B-field B-politician I-person B-university B-university I-location B-ORG B-university B-university B-university B-university B-scientist B-university I-politicalparty B-university B-university B-university B-field I-task I-academicjournal B-university B-scientist B-university B-ORG B-university I-song B-university B-enzyme B-enzyme B-university I-politicalparty I-task I-politician I-politician B-enzyme B-university B-enzyme B-university B-enzyme I-song B-university B-enzyme I-song I-politicalparty I-politician B-scientist B-university B-country I-politician B-LOC I-task I-task I-task B-university B-university B-university B-field I-task B-LOC B-university B-politician I-song B-university B-album	O O O O O O O O O O O O B-song I-song I-song I-song O O B-album I-album I-album I-album I-album O O B-song B-song O O B-misc B-misc B-misc O O O B-album B-album B-album I-album I-album O O B-song I-song I-song I-song O O B-musicalartist B-musicalartist I-musicalartist I-musicalartist I-musicalartist O O O B-song I-song I-song I-song I-song I-song I-song O O B-musicalartist I-musicalartist I-musicalartist I-musicalartist I-musicalartist O O O O B-album B-album B-album I-album I-album O O
-cross_ner/3	I-chemicalelement I-chemicalelement I-person B-country B-university I-astronomicalobject I-person B-album I-academicjournal B-LOC B-country B-organisation I-song B-university B-scientist B-protein B-organisation I-song B-metrics B-LOC B-metrics B-LOC I-academicjournal B-university B-scientist I-academicjournal I-programlang I-song I-song B-scientist B-scientist B-university B-metrics I-person B-metrics B-metrics I-song I-protein B-LOC B-LOC B-LOC B-field B-field B-scientist B-scientist B-scientist B-field B-field B-LOC I-song I-song B-field B-literarygenre B-LOC B-album B-field B-field B-scientist B-album B-LOC B-LOC B-scientist B-field B-field I-song I-song I-song B-field B-LOC B-LOC B-album B-scientist B-scientist B-field B-literarygenre I-song B-LOC B-scientist B-scientist B-field I-field	O O O O O B-award I-award O O B-award I-award I-award I-award I-award O O B-award I-award I-award I-award I-award I-award I-award I-award O O B-award I-award I-award O O O B-award I-award I-award I-award I-award O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
-cross_ner/4	I-chemicalelement I-song I-song I-song I-song B-enzyme I-song B-researcher B-university I-programlang I-programlang I-chemicalelement B-scientist B-university I-university B-country I-chemicalelement I-song I-enzyme B-university B-university B-university B-scientist B-organisation I-task B-field I-task I-song B-field I-politician I-politician B-country I-politician I-politician I-politician B-scientist B-scientist I-politician I-song I-band I-politician I-song I-song B-scientist B-university B-field B-university B-metrics B-chemicalelement I-song B-scientist I-MISC I-politician B-university B-scientist I-enzyme B-university I-chemicalelement I-musicalinstrument B-scientist B-scientist B-metrics B-LOC B-university B-metrics B-field B-university B-LOC B-university B-politician B-LOC B-LOC B-scientist B-scientist B-scientist B-scientist B-chemicalelement I-song I-musicalinstrument I-field B-university	B-band B-band O O O O O O O O O B-musicalartist I-musicalartist I-musicalartist O B-band I-band I-band O O O O O B-song I-song I-song I-song I-song O B-musicalartist B-musicalartist B-musicalartist B-musicalartist I-musicalartist I-musicalartist I-musicalartist O B-band B-band O O B-musicalartist I-musicalartist I-musicalartist O O B-song B-song B-song B-song O B-musicalartist B-musicalartist I-musicalartist I-musicalartist I-musicalartist O B-band B-band B-band O B-musicalartist I-musicalartist O O O B-song I-song I-song I-song I-song O O O O O O O O O O
+cross_ner/0	B-researcher I-book B-scientist B-metrics I-politicalparty B-university B-scientist B-metrics B-ORG B-metrics B-metrics I-musicalinstrument I-chemicalelement B-field I-band B-metrics B-protein I-academicjournal B-university I-academicjournal I-programlang I-PER I-song B-university B-university B-university I-literarygenre B-scientist B-metrics I-university I-song B-university B-university B-protein B-organisation B-enzyme B-enzyme I-academicjournal I-musicalinstrument I-song I-song B-LOC B-university B-university I-politician B-university B-metrics B-metrics B-metrics I-academicjournal I-chemicalcompound B-album I-programlang I-programlang B-field B-award B-award B-protein I-chemicalelement B-field I-chemicalelement B-astronomicalobject I-politicalparty B-astronomicalobject B-protein I-musicalinstrument I-musicalinstrument B-university B-award I-musicalinstrument B-album I-song B-field I-programlang B-metrics I-musicalinstrument B-award B-award B-programlang B-field B-protein	O O O O O O O O B-event I-event I-event O B-musicalartist B-musicalartist B-musicalartist O O O O B-location I-location I-location I-location O O B-location O O O B-location I-location O O B-location O O O O O B-location B-location B-location O O B-location O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O	As part of the 2010 leg of the My Christmas Tour, Bocelli gave two concerts in The O2 Arena, in London, and the Manchester Arena, in Manchester, and a concert at 3Arena, in Dublin, in late November 2010.
+cross_ner/1	I-chemicalelement B-award B-event I-politicalparty B-field I-astronomicalobject I-academicjournal B-university B-metrics B-metrics B-LOC B-university B-university I-field B-musicalinstrument I-musicalinstrument I-literarygenre B-metrics I-chemicalelement I-protein B-metrics I-person B-scientist I-politicalparty I-academicjournal I-astronomicalobject I-literarygenre I-politician I-astronomicalobject B-university B-country I-person B-politician I-event B-protein B-protein I-politicalparty I-field I-field I-politicalparty B-university B-politician B-enzyme I-task B-award B-event B-university I-field B-university I-field B-university I-field I-location I-enzyme B-university B-university B-university I-programlang I-task B-university I-programlang B-university B-university B-politician B-university B-protein B-metrics B-field B-metrics B-field B-metrics B-metrics B-university B-university B-event B-politician I-field I-field I-chemicalelement B-award B-award	B-musicalartist B-musicalartist B-musicalartist O O O O O O O O O O O O O B-organisation B-organisation I-organisation O O O O O O O O O O O O O O O O O O B-album I-album O B-album I-album I-album I-album I-album I-album O B-album B-album B-album O B-album I-album O B-album I-album I-album I-album I-album O B-album I-album I-album I-album O B-album B-album B-album B-album O B-album B-album I-album I-album O O O O O O O	Squarepusher continues to push new boundaries to this day, where he still calls Warp Records his home, having released numerous albums to critical acclaim in the years to follow, such as Go Plastic, Do You Know Squarepusher, Ultravisitor, Hello Everything, Just a Souvenir, Solo Electric Bass 1, Ufabulum and Damogen Furies.
+cross_ner/2	I-chemicalelement I-chemicalelement B-field B-metrics B-university B-field B-university I-song B-university I-programlang B-enzyme B-enzyme B-protein I-person B-chemicalelement B-chemicalelement B-university B-field B-politician I-person B-university B-university I-location B-ORG B-university B-university B-university B-university B-scientist B-university I-politicalparty B-university B-university B-university B-field I-task I-academicjournal B-university B-scientist B-university B-ORG B-university I-song B-university B-enzyme B-enzyme B-university I-politicalparty I-task I-politician I-politician B-enzyme B-university B-enzyme B-university B-enzyme I-song B-university B-enzyme I-song I-politicalparty I-politician B-scientist B-university B-country I-politician B-LOC I-task I-task I-task B-university B-university B-university B-field I-task B-LOC B-university B-politician I-song B-university B-album	O O O O O O O O O O O O B-song I-song I-song I-song O O B-album I-album I-album I-album I-album O O B-song B-song O O B-misc B-misc B-misc O O O B-album B-album B-album I-album I-album O O B-song I-song I-song I-song O O B-musicalartist B-musicalartist I-musicalartist I-musicalartist I-musicalartist O O O B-song I-song I-song I-song I-song I-song I-song O O B-musicalartist I-musicalartist I-musicalartist I-musicalartist I-musicalartist O O O O B-album B-album B-album I-album I-album O O	During the 1990s, many releases included recordings of classical compositions : Pictures at an Exhibition ( on Turn of the Tides ), Largo ( from Xerxes ) ( on Tyranny of Beauty ), Symphony in A Minor ( by J. S. Bach ), and Concerto in A Major / Adagio ( by Wolfgang Amadeus Mozart ) ( both on Ambient Monkeys ).
+cross_ner/3	I-chemicalelement I-chemicalelement I-person B-country B-university I-astronomicalobject I-person B-album I-academicjournal B-LOC B-country B-organisation I-song B-university B-scientist B-protein B-organisation I-song B-metrics B-LOC B-metrics B-LOC I-academicjournal B-university B-scientist I-academicjournal I-programlang I-song I-song B-scientist B-scientist B-university B-metrics I-person B-metrics B-metrics I-song I-protein B-LOC B-LOC B-LOC B-field B-field B-scientist B-scientist B-scientist B-field B-field B-LOC I-song I-song B-field B-literarygenre B-LOC B-album B-field B-field B-scientist B-album B-LOC B-LOC B-scientist B-field B-field I-song I-song I-song B-field B-LOC B-LOC B-album B-scientist B-scientist B-field B-literarygenre I-song B-LOC B-scientist B-scientist B-field I-field	O O O O O B-award I-award O O B-award I-award I-award I-award I-award O O B-award I-award I-award I-award I-award I-award I-award I-award O O B-award I-award I-award O O O B-award I-award I-award I-award I-award O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O	He has also won three Grammy Awards, 14 Academy of Country Music awards, 11 Country Music Association ( CMA ) awards, 10 American Music Awards, and three People's Choice Awards.
+cross_ner/4	I-chemicalelement I-song I-song I-song I-song B-enzyme I-song B-researcher B-university I-programlang I-programlang I-chemicalelement B-scientist B-university I-university B-country I-chemicalelement I-song I-enzyme B-university B-university B-university B-scientist B-organisation I-task B-field I-task I-song B-field I-politician I-politician B-country I-politician I-politician I-politician B-scientist B-scientist I-politician I-song I-band I-politician I-song I-song B-scientist B-university B-field B-university B-metrics B-chemicalelement I-song B-scientist I-MISC I-politician B-university B-scientist I-enzyme B-university I-chemicalelement I-musicalinstrument B-scientist B-scientist B-metrics B-LOC B-university B-metrics B-field B-university B-LOC B-university B-politician B-LOC B-LOC B-scientist B-scientist B-scientist B-scientist B-chemicalelement I-song I-musicalinstrument I-field B-university	B-band B-band O O O O O O O O O B-musicalartist I-musicalartist I-musicalartist O B-band I-band I-band O O O O O B-song I-song I-song I-song I-song O B-musicalartist B-musicalartist B-musicalartist B-musicalartist I-musicalartist I-musicalartist I-musicalartist O B-band B-band O O B-musicalartist I-musicalartist I-musicalartist O O B-song B-song B-song B-song O B-musicalartist B-musicalartist I-musicalartist I-musicalartist I-musicalartist O B-band B-band B-band O B-musicalartist I-musicalartist O O O B-song I-song I-song I-song I-song O O O O O O O O O O	ABBA were soon recognised and embraced by other acts : Evan Dando of the Lemonheads recorded a cover version of Knowing Me, Knowing You ; Sinéad O'Connor and Boyzone's Stephen Gately have recorded Chiquitita ; Tanita Tikaram, Blancmange and Steven Wilson paid tribute to The Day Before You Came.
diff --git a/tests/expected_results/token_classification/cross_ner_news_1.tsv b/tests/expected_results/token_classification/cross_ner_news_1.tsv
index 7ca866a8..728ff118 100644
--- a/tests/expected_results/token_classification/cross_ner_news_1.tsv
+++ b/tests/expected_results/token_classification/cross_ner_news_1.tsv
@@ -1,5 +1,5 @@
-cross_ner/0	I-chemicalelement B-university B-university I-task I-programlang B-university I-academicjournal B-university B-scientist I-academicjournal B-album B-university B-university I-academicjournal B-discipline B-metrics B-university I-politicalparty I-person B-country I-task B-award B-country I-field B-scientist I-academicjournal I-band B-country B-scientist I-programlang I-band B-LOC B-metrics I-programlang B-scientist I-field I-field I-field B-album B-album I-song B-album	O O O O O B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation O O O O O O O O O O O O O O O O O O O O O O O O O O O O
-cross_ner/1	I-programlang I-literarygenre B-protein I-programlang B-university I-task I-literarygenre I-task I-task I-programlang B-university I-field I-field I-programlang B-university B-university B-protein B-politician I-task B-astronomicalobject I-task I-song I-song I-field B-LOC I-programlang B-university B-university B-protein B-university B-university B-protein I-programlang I-task I-task I-task B-astronomicalobject I-song I-field I-field I-field I-field	B-location B-location B-location B-location O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
-cross_ner/2	I-location I-field I-song B-programlang I-song I-programlang I-literarygenre I-chemicalelement I-song B-task I-misc B-metrics B-protein I-programlang I-programlang B-university B-university B-scientist I-musicalinstrument B-enzyme I-programlang I-location I-event I-field B-university B-protein I-programlang B-protein B-university I-field B-university B-university B-protein B-university B-protein I-song I-song B-scientist I-song I-chemicalelement I-chemicalelement I-musicalinstrument	B-misc I-misc O O O O B-person I-person O O O O O O O B-organisation O B-organisation O O O O O O O O O O O O O O O O O O O O O O O O
-cross_ner/3	I-chemicalelement I-song I-enzyme B-protein B-university I-literarygenre B-protein I-academicjournal I-discipline I-discipline B-university B-university I-enzyme B-university I-chemicalelement B-university B-university B-university I-university B-university I-astronomicalobject I-academicjournal B-university B-protein B-university B-university B-university B-protein B-university I-enzyme B-university B-protein B-university B-protein B-metrics B-protein B-university I-field I-university B-protein B-protein I-chemicalelement	O O O O O O O O O O O O O O O B-organisation O B-organisation O B-organisation O O O O O O B-organisation O O O O O O O O O O O O B-organisation O O
-cross_ner/4	I-chemicalelement B-protein B-scientist B-protein B-scientist I-astronomicalobject B-university B-university B-metrics I-programlang B-metrics B-literarygenre I-programlang B-university B-university I-university I-university I-book I-programlang I-musicalinstrument B-scientist I-field I-programlang B-protein B-metrics B-protein B-protein I-discipline I-person B-university I-song B-metrics I-field I-literarygenre B-metrics B-metrics I-song B-university B-musicalinstrument I-misc I-astronomicalobject B-scientist	O O B-organisation O O O O O O O O B-location I-location O B-organisation O O O O O O O O O O O O O O O B-location O O B-person I-person I-person I-person O O O O O
+cross_ner/0	I-chemicalelement B-university B-university I-task I-programlang B-university I-academicjournal B-university B-scientist I-academicjournal B-album B-university B-university I-academicjournal B-discipline B-metrics B-university I-politicalparty I-person B-country I-task B-award B-country I-field B-scientist I-academicjournal I-band B-country B-scientist I-programlang I-band B-LOC B-metrics I-programlang B-scientist I-field I-field I-field B-album B-album I-song B-album	O O O O O B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation B-organisation O O O O O O O O O O O O O O O O O O O O O O O O O O O O	CRICKET - LEICESTERSHIRE TAKE OVER AT TOP AFTER INNINGS VICTORY. <e> </e>
+cross_ner/1	I-programlang I-literarygenre B-protein I-programlang B-university I-task I-literarygenre I-task I-task I-programlang B-university I-field I-field I-programlang B-university B-university B-protein B-politician I-task B-astronomicalobject I-task I-song I-song I-field B-LOC I-programlang B-university B-university B-protein B-university B-university B-protein I-programlang I-task I-task I-task B-astronomicalobject I-song I-field I-field I-field I-field	B-location B-location B-location B-location O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O	LONDON 1996 - 08 - 30 <e> </e>
+cross_ner/2	I-location I-field I-song B-programlang I-song I-programlang I-literarygenre I-chemicalelement I-song B-task I-misc B-metrics B-protein I-programlang I-programlang B-university B-university B-scientist I-musicalinstrument B-enzyme I-programlang I-location I-event I-field B-university B-protein I-programlang B-protein B-university I-field B-university B-university B-protein B-university B-protein I-song I-song B-scientist I-song I-chemicalelement I-chemicalelement I-musicalinstrument	B-misc I-misc O O O O B-person I-person O O O O O O O B-organisation O B-organisation O O O O O O O O O O O O O O O O O O O O O O O O	West Indian all - rounder Phil Simmons took four for 38 on Friday as Leicestershire beat Somerset by an innings and 39 runs in two days to take over at the head of the county championship. <e> </e>
+cross_ner/3	I-chemicalelement I-song I-enzyme B-protein B-university I-literarygenre B-protein I-academicjournal I-discipline I-discipline B-university B-university I-enzyme B-university I-chemicalelement B-university B-university B-university I-university B-university I-astronomicalobject I-academicjournal B-university B-protein B-university B-university B-university B-protein B-university I-enzyme B-university B-protein B-university B-protein B-metrics B-protein B-university I-field I-university B-protein B-protein I-chemicalelement	O O O O O O O O O O O O O O O B-organisation O B-organisation O B-organisation O O O O O O B-organisation O O O O O O O O O O O O B-organisation O O	Their stay on top, though, may be short - lived as title rivals Essex, Derbyshire and Surrey all closed in on victory while Kent made up for lost time in their rain - affected match against Nottinghamshire. <e> </e>
+cross_ner/4	I-chemicalelement B-protein B-scientist B-protein B-scientist I-astronomicalobject B-university B-university B-metrics I-programlang B-metrics B-literarygenre I-programlang B-university B-university I-university I-university I-book I-programlang I-musicalinstrument B-scientist I-field I-programlang B-protein B-metrics B-protein B-protein I-discipline I-person B-university I-song B-metrics I-field I-literarygenre B-metrics B-metrics I-song B-university B-musicalinstrument I-misc I-astronomicalobject B-scientist	O O B-organisation O O O O O O O O B-location I-location O B-organisation O O O O O O O O O O O O O O O B-location O O B-person I-person I-person I-person O O O O O	After bowling Somerset out for 83 on the opening morning at Grace Road, Leicestershire extended their first innings by 94 runs before being bowled out for 296 with England discard Andy Caddick taking three for 83. <e> </e>
diff --git a/tests/test_e2e_dialogues.sh b/tests/test_e2e_dialogues.sh
new file mode 100755
index 00000000..13ad3e90
--- /dev/null
+++ b/tests/test_e2e_dialogues.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+. ./tests/lib.sh
+
+# test e2e dialogue tasks
+
+hparams=(
+        "--pretrained_model sshleifer/bart-tiny-random"
+        "--pretrained_model sshleifer/bart-tiny-random"
+        )
+tasks=(
+      bitod
+      bitod_dst
+      )
+
+for i in ${!hparams[*]};
+do
+    # train
+    genienlp train --train_tasks ${tasks[i]} --train_batch_tokens 100 --val_batch_size 300 --train_iterations 4 --preserve_case --save_every 2 --log_every 2 --val_every 2 --save $workdir/model_$i --data $SRCDIR/dataset/bitod --exist_ok --skip_cache --embeddings $EMBEDDING_DIR --no_commit ${hparams[i]}
+
+    # greedy prediction
+    genienlp predict --tasks ${tasks[i]} --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/bitod --embeddings $EMBEDDING_DIR --skip_cache --extra_metrics e2e_dialogue_score
+
+    # check if result file exists
+    if test ! -f $workdir/model_$i/eval_results/test/${tasks[i]}.tsv ; then
+        echo "File not found!"
+        exit 1
+    fi
+
+    # check export and server mode
+    if [ $i == 0 ] ; then
+      echo "Testing export"
+      genienlp export --path $workdir/model_$i --output $workdir/model_"$i"_exported
+
+      echo "Testing the server mode"
+      echo '{"id": "dummy_example_1", "context": "show me .", "question": "translate to thingtalk", "answer": "now => () => notify"}' | genienlp server --path $workdir/model_$i --stdin
+    fi
+
+    if [ $i == 0 ] ; then
+      # check if predictions matches expected_results
+      diff -u $SRCDIR/expected_results/bitod/bitod.tsv $workdir/model_$i/eval_results/test/bitod.tsv
+    fi
+
+    rm -rf $workdir/model_$i $workdir/model_"$i"_exported
+
+done
diff --git a/tests/test_main_almond.sh b/tests/test_main_almond.sh
index a96aa8cf..6b4cf31d 100755
--- a/tests/test_main_almond.sh
+++ b/tests/test_main_almond.sh
@@ -28,7 +28,7 @@ do
     # check TransformerSeq2Seq and TransformerLSTM
     if [ $i == 0 ] || [ $i == 2 ] ; then
       echo "Testing export"
-      genienlp export --path $workdir/model_$i --output $workdir/model_$i_exported
+      genienlp export --path $workdir/model_$i --output $workdir/model_"$i"_exported
 
       echo "Testing the server mode"
       echo '{"id": "dummy_example_1", "context": "show me .", "question": "translate to thingtalk", "answer": "now => () => notify"}' | genienlp server --path $workdir/model_$i --stdin