', '', '']) - existing_special_tokens = self._tokenizer.special_tokens_map # add separator if it doesn't exist. It will be used to concatenate context and question if 'sep_token' not in existing_special_tokens: diff --git a/genienlp/metrics.py b/genienlp/metrics.py index 06e2f1de..aec0463e 100644 --- a/genienlp/metrics.py +++ b/genienlp/metrics.py @@ -27,11 +27,11 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import collections +import logging import os import re import string -import logging +from collections import Counter, OrderedDict, defaultdict from contextlib import closing from multiprocessing import Pool, cpu_count from subprocess import PIPE, Popen @@ -40,6 +40,8 @@ import numpy as np import sacrebleu from datasets import load_metric +from dialogues import Bitod +from dialogues.bitod.src.evaluate import convert_lists_to_set from pyrouge import Rouge155 from seqeval import metrics as seq_metrics from seqeval import scheme as seq_scheme @@ -51,7 +53,8 @@ # metrics that are calculated over a corpus (i.e. a list of predictions and gold answers, not single ones). # These metrics cannot be calculated on individual examples and then averaged. -corpus_level_metrics = set(['bleu', 'casedbleu', 'ter', 't5_bleu', 'nmt_bleu', 'corpus_f1']) +corpus_level_metrics = {'bleu', 'casedbleu', 'ter', 't5_bleu', 'nmt_bleu', 'corpus_f1', 'jga'} + def to_lf(s, table): aggs = [y.lower() for y in Query.agg_ops] @@ -218,7 +221,7 @@ def lower(text): def f1_score(prediction, ground_truth): prediction_tokens = prediction.split() ground_truth_tokens = ground_truth.split() - common = collections.Counter(prediction_tokens) & collections.Counter(ground_truth_tokens) + common = Counter(prediction_tokens) & Counter(ground_truth_tokens) num_same = sum(common.values()) if num_same == 0: return 0 @@ -519,7 +522,81 @@ def computeDialogue(greedy, answer): return joint_goal_em, turn_request_em, turn_goal_em, answer -def compute_metrics(predictions: Iterable[str], answers: Union[Iterable[str], Iterable[Iterable[str]]], requested_metrics: Iterable, lang: str): +def compute_e2e_dialogue_score(greedy, answer, tgt_lang, args, example_ids): + num_examples = len(answer) + subtask_metrics_dict = OrderedDict() + + results = OrderedDict({'e2e_dialogue_score': 0.0, 'JGA': 0.0, 'API_em': 0.0, 'DA_em': 0.0, 'BLEU': 0.0}) + subtask2result_key = OrderedDict({'dst': 'JGA', 'api': 'API_em', 'da': 'DA_em', 'rg': 'BLEU'}) + + for k, subtask in enumerate(args.e2e_dialogue_valid_subtasks): + ids, preds, golds = [], [], [] + for i in range(num_examples): + id_ = example_ids[i] + if id_.endswith(f'/{subtask}'): + ids.append(id_) + preds.append(greedy[i]) + golds.append(answer[i]) + + if golds: + metrics_to_compute = args.e2e_dialogue_valid_submetrics[k] + sub_metrics = compute_metrics(preds, golds, [metrics_to_compute], tgt_lang, args, ids) + subtask_metrics_dict[subtask] = ( + sub_metrics[metrics_to_compute], + len(golds), + args.e2e_dialogue_valid_subweights[k], + ) + + # TODO how should we aggregate? + weighted_num_examples = 0 + for subtask, (sub_result, num_ex, weight) in subtask_metrics_dict.items(): + result_key = subtask2result_key[subtask] + + results[result_key] += sub_result + results['e2e_dialogue_score'] += weight * (sub_result * num_ex) + weighted_num_examples += weight * num_ex + + results['e2e_dialogue_score'] /= weighted_num_examples + + return results + + +def computeJGA(greedy, answer, example_ids): + dataset = Bitod() + hit = 0 + cur_dial_id = None + assert len(example_ids) == len(greedy) == len(answer) + for id_, g, a in zip(example_ids, greedy, answer): + dial_id = id_.split('/')[1] + if dial_id != cur_dial_id: + cur_dial_id = dial_id + greedy_state = defaultdict() + answer_state = defaultdict() + + a = a[0] + a = dataset.span2state(a) + g = dataset.span2state(g) + + dataset.update_state(a, answer_state) + dataset.update_state(g, greedy_state) + + convert_lists_to_set(answer_state) + convert_lists_to_set(greedy_state) + + if answer_state == greedy_state: + hit += 1 + + return hit / len(greedy) * 100 + + +def compute_metrics( + predictions: Iterable[str], + answers: Union[Iterable[str], Iterable[Iterable[str]]], + requested_metrics: Iterable, + lang: str, + args, + example_ids: Iterable[str] = None, +): """ Inputs: predictions: a list of model predictions @@ -536,11 +613,22 @@ def compute_metrics(predictions: Iterable[str], answers: Union[Iterable[str], It lfem joint_goal_em, turn_request_em, turn_goal_em, avg_dialogue lang: the language of the predictions and answers. Used for BERTScore. + args: arguments + example_ids: used to calculate some of e2e dialogue metrics that need to know span of each dialogue such as JGA """ metric_keys = [] metric_values = [] if not isinstance(answers[0], list): answers = [[a] for a in answers] + if 'e2e_dialogue_score' in requested_metrics: + requested_metrics += ['JGA', 'API_em', 'DA_em', 'BLEU'] + results = compute_e2e_dialogue_score(predictions, answers, lang, args, example_ids) + metric_keys += results.keys() + metric_values += results.values() + if 'jga' in requested_metrics: + jga = computeJGA(predictions, answers, example_ids) + metric_keys += ['jga'] + metric_values += [jga] if 'lfem' in requested_metrics: lfem, answers = computeLFEM(predictions, answers) metric_keys += ['lfem'] @@ -550,9 +638,10 @@ def compute_metrics(predictions: Iterable[str], answers: Union[Iterable[str], It avg_dialogue = (joint_goal_em + request_em) / 2 metric_keys += ['joint_goal_em', 'turn_request_em', 'turn_goal_em', 'avg_dialogue'] metric_values += [joint_goal_em, request_em, turn_goal_em, avg_dialogue] - em = computeEM(predictions, answers) - metric_keys += ['em'] - metric_values += [em] + if 'em' in requested_metrics: + em = computeEM(predictions, answers) + metric_keys += ['em'] + metric_values += [em] if 'pem' in requested_metrics: pem = computePartialEM(predictions, answers) metric_keys.append('pem') @@ -621,7 +710,8 @@ def convert_IOB2_to_IOB1(labels): convert_IOB2_to_IOB1(predictions_processed) convert_IOB2_to_IOB1(answers_processed) f1 = ( - seq_metrics.f1_score(y_pred=predictions_processed, y_true=answers_processed, mode='strict', scheme=seq_scheme.IOB1) * 100 + seq_metrics.f1_score(y_pred=predictions_processed, y_true=answers_processed, mode='strict', scheme=seq_scheme.IOB1) + * 100 ) metric_keys.append('ner_f1_IOB1') @@ -653,26 +743,35 @@ def convert_IOB2_to_IOB1(labels): metric_values += [corpus_f1, precision, recall] metric_dict = dict(zip(metric_keys, metric_values)) - metric_dict = collections.OrderedDict((key, metric_dict[key]) for key in requested_metrics) + metric_dict = OrderedDict((key, metric_dict[key]) for key in requested_metrics) return metric_dict -def calculate_and_reduce_metrics(predictions, answers, metrics_to_compute, reduce_metrics, lang): - metrics = collections.OrderedDict() - if reduce_metrics == 'max': - for i in range(len(predictions[0])): # for each output (in case of mulitple outputs) - partial_metrics = compute_metrics([p[i] for p in predictions], answers, metrics_to_compute, lang) # calculate the metric on all first outputs, all second outputs, etc. +def calculate_and_reduce_metrics(generation_output, metrics_to_compute, args, lang): + metrics = OrderedDict() + example_ids = generation_output.example_ids + predictions = generation_output.predictions + answers = generation_output.answers + + if args.reduce_metrics == 'max': + for i in range(len(predictions[0])): # for each output (in case of multiple outputs) + partial_metrics = compute_metrics( + [p[i] for p in predictions], answers, metrics_to_compute, lang, args, example_ids + ) # calculate the metric on all first outputs, all second outputs, etc. for k, v in partial_metrics.items(): metrics[k] = max(metrics.get(k, 0), v) - elif reduce_metrics == 'top_k': + elif args.reduce_metrics == 'top_k': for m in metrics_to_compute: if m in corpus_level_metrics: - logging.warning('You are using the corpus-level metric %s with `--reduce_metrics top_k`, which can lead to incorrect results.', m) - - for i in range(len(predictions)): # for each input - example_metrics = collections.OrderedDict() # keep track of metrics for one input and all of its outputs - for j in range(len(predictions[i])): # for each output (in case of mulitple outputs) - partial_metrics = compute_metrics([predictions[i][j]], [answers[i]], metrics_to_compute, lang) # calculate the metric on the j-th output of the i-th input + logging.warning( + f'You are using the corpus-level metric {m} with `--reduce_metrics top_k`, which can lead to incorrect results.', + ) + for i in range(len(predictions)): # for each input + example_metrics = OrderedDict() # keep track of metrics for one input and all of its outputs + for j in range(len(predictions[i])): # for each output (in case of multiple outputs) + partial_metrics = compute_metrics( + [predictions[i][j]], [answers[i]], metrics_to_compute, lang, args, example_ids + ) # calculate the metric on the j-th output of the i-th input for k, v in partial_metrics.items(): example_metrics[k] = max(example_metrics.get(k, 0), v) # sum metrics for all examples diff --git a/genienlp/paraphrase/scripts/transform_dataset.py b/genienlp/paraphrase/scripts/transform_dataset.py index 9b2df661..2dd53ef6 100644 --- a/genienlp/paraphrase/scripts/transform_dataset.py +++ b/genienlp/paraphrase/scripts/transform_dataset.py @@ -144,7 +144,7 @@ def main(args): new_queries = [] # list of lists query_file = open(args.query_file, 'r') for line in query_file: - queries = line.split('\t')[1:-1] # 0 is example id, -1 is gold answer + queries = line.split('\t')[1:-2] # 0 is example id, -1 is input, -2 is gold answer new_queries.append([lower_case(tokenize(q.strip())) for q in queries]) if args.transformation in ['remove_wrong_thingtalk', 'get_wrong_thingtalk']: gold_thingtalks = [] diff --git a/genienlp/predict.py b/genienlp/predict.py index 6fbe51fa..c92a8213 100644 --- a/genienlp/predict.py +++ b/genienlp/predict.py @@ -38,18 +38,18 @@ from torch.multiprocessing import Process, set_start_method -from .metrics import calculate_and_reduce_metrics - try: set_start_method('spawn') except RuntimeError: pass + import torch from . import models from .arguments import check_and_update_generation_args from .calibrate import ConfidenceEstimator +from .metrics import calculate_and_reduce_metrics from .ned.ned_utils import init_ned_model from .tasks.registry import get_tasks from .util import ( @@ -254,6 +254,30 @@ def parse_argv(parser): help='do not preserve quotation marks in the output. Useful if using alignment for semantic parsing or NLG', ) + parser.add_argument( + '--e2e_dialogue_evaluation', + action='store_true', + help='Evaluate model on a dialogue dataset end-to-end; i.e. model predictions are used as input instead of gold', + ) + parser.add_argument( + '--e2e_dialogue_valid_subtasks', + nargs='+', + type=str, + help='Evaluate only on these subtasks when calculating e2e_dialogue_score; rg is not included by default', + ) + parser.add_argument( + '--e2e_dialogue_valid_submetrics', + nargs='+', + type=str, + help='Specify metrics to use for each of subtasks in e2e_dialogue_valid_subtasks.', + ) + parser.add_argument( + '--e2e_dialogue_valid_subweights', + nargs='+', + type=float, + help='Specify weights to use for each of subtasks in e2e_dialogue_valid_subtasks.', + ) + def set_default_values(args): """ @@ -262,6 +286,10 @@ def set_default_values(args): if args.confidence_feature_path is None: args.confidence_feature_path = os.path.join(args.path, 'confidence_features.pkl') + if args.e2e_dialogue_evaluation and args.val_batch_size[0] != 1: + logger.warning('When evaluating bitod end-to-end, val_batch_size should be 1 so we load the data turn by turn') + args.val_batch_size = [1] + def check_args(args): @@ -412,9 +440,8 @@ def run(args, device): log_model_size(logger, model, args.model) model.to(device) - decaScore = [] - task_scores = defaultdict(list) model.eval() + task_scores = defaultdict(list) eval_dir = os.path.join(args.eval_dir, args.evaluate) os.makedirs(eval_dir, exist_ok=True) @@ -459,6 +486,7 @@ def run(args, device): output_confidence_features=args.save_confidence_features, confidence_estimators=confidence_estimators, disable_progbar=False, + eval_dir=eval_dir, ) if args.save_confidence_features: @@ -469,54 +497,76 @@ def run(args, device): with open(prediction_file_name, 'w' + ('' if args.overwrite else '+')) as prediction_file: for i in range(len(generation_output.example_ids)): if args.one_output_per_line: - lines = [( - generation_output.example_ids[i] - + '\t' - + prediction - + '\t' - + generation_output.answers[i] - ) for prediction in generation_output.predictions[i]] # one line per generation output + lines = [ + ( + generation_output.example_ids[i] + + '\t' + + prediction + + '\t' + + generation_output.answers[i] + + '\t' + + generation_output.contexts[i] + ) + for prediction in generation_output.predictions[i] + ] # one line per generation output else: - lines = [( - generation_output.example_ids[i] - + '\t' - + '\t'.join(generation_output.predictions[i]) - + '\t' - + generation_output.answers[i] - )] # one line with all generation outputs separated by '\t' + lines = [ + ( + generation_output.example_ids[i] + + '\t' + + '\t'.join(generation_output.predictions[i]) + + '\t' + + generation_output.answers[i] + + '\t' + + generation_output.contexts[i] + ) + ] # one line with all generation outputs separated by '\t' if args.calibrator_paths is not None: for score in generation_output.confidence_scores: - lines = [line + '\t' + str(score[i]) for line in lines] # append score to all lines + lines = [line + '\t' + str(score[i]) for line in lines] # append score to all lines prediction_file.write('\n'.join(lines) + '\n') if args.translate_return_raw_outputs: with open(raw_prediction_file_name, 'w' + ('' if args.overwrite else '+')) as prediction_file: for i in range(len(generation_output.example_ids)): if args.one_output_per_line: - lines = [( - generation_output.example_ids[i] - + '\t' - + raw_prediction - + '\t' - + generation_output.answers[i] - ) for raw_prediction in generation_output.raw_predictions[i]] # one line per generation output + lines = [ + ( + generation_output.example_ids[i] + + '\t' + + raw_prediction + + '\t' + + generation_output.answers[i] + + '\t' + + generation_output.contexts[i] + ) + for raw_prediction in generation_output.raw_predictions[i] + ] # one line per generation output else: - lines = [( - generation_output.example_ids[i] - + '\t' - + '\t'.join(generation_output.raw_predictions[i]) - + '\t' - + generation_output.answers[i] - )] # one line with all outputs separated by '\t' + lines = [ + ( + generation_output.example_ids[i] + + '\t' + + '\t'.join(generation_output.raw_predictions[i]) + + '\t' + + generation_output.answers[i] + + '\t' + + generation_output.contexts[i] + ) + ] # one line with all outputs separated by '\t' prediction_file.write('\n'.join(lines) + '\n') if len(generation_output.answers) > 0: metrics_to_compute = task.metrics metrics_to_compute += args.extra_metrics + metrics_to_compute = [metric for metric in task.metrics if metric not in ['loss']] if args.main_metric_only: metrics_to_compute = [metrics_to_compute[0]] metrics = calculate_and_reduce_metrics( - generation_output.predictions, generation_output.answers, metrics_to_compute, args.reduce_metrics, tgt_lang + generation_output, + metrics_to_compute, + args, + tgt_lang, ) with open(results_file_name, 'w' + ('' if args.overwrite else '+')) as results_file: @@ -526,17 +576,21 @@ def run(args, device): for i, (c, p, a) in enumerate( zip(generation_output.contexts, generation_output.predictions, generation_output.answers) ): - log_string = f'\nContext {i+1}: {c}\nPrediction {i + 1} ({len(p)} outputs): {p}\nAnswer {i + 1}: {a}\n' + log_string = ( + f'\nContext {i + 1}: {c}\nPrediction {i + 1} ({len(p)} outputs): {p}\nAnswer {i + 1}: {a}\n' + ) if args.calibrator_paths is not None: - log_string += f'Confidence {i+1} : ' + log_string += f'Confidence {i + 1} : ' for score in generation_output.confidence_scores: log_string += f'{score[i]:.3f}, ' log_string += '\n' logger.info(log_string) - logger.info(metrics) + + logger.info(metrics) task_scores[task].append((len(generation_output.answers), metrics[task.metrics[0]])) + decaScore = [] for task in task_scores.keys(): decaScore.append( sum([length * score for length, score in task_scores[task]]) / sum([length for length, score in task_scores[task]]) @@ -560,9 +614,6 @@ def main(args): args.tasks = list(get_tasks(args.task_names, args).values()) logger.info(f'Arguments:\n{pformat(vars(args))}') - logger.info(f'Loading from {args.best_checkpoint}') - - devices = get_devices(args.devices) if args.override_valid_metrics: assert len(args.override_valid_metrics) == len(args.tasks) @@ -579,6 +630,9 @@ def main(args): task.metrics = new_metrics + logger.info(f'Loading from {args.best_checkpoint}') + devices = get_devices(args.devices) + if len(devices) > 1: logger.info(f'Independent multi-GPU generation on following devices: {devices}') all_processes = [] diff --git a/genienlp/tasks/almond_task.py b/genienlp/tasks/almond_task.py index a2ed971c..e1de6b81 100644 --- a/genienlp/tasks/almond_task.py +++ b/genienlp/tasks/almond_task.py @@ -383,7 +383,7 @@ def _make_example(self, parts, dir_name=None, **kwargs): contexts = [] src_char_spans = None if split_sentence: - if self.args.do_alignment: + if self.need_attention_scores: src_quotation_symbol = '"' src_char_spans_ind = [index for index, char in enumerate(context) if char == src_quotation_symbol] src_char_spans = [ @@ -394,7 +394,9 @@ def _make_example(self, parts, dir_name=None, **kwargs): if len(contexts) > 1: examples = [] for i, text in enumerate(contexts): - ex_id, text = self.construct_id2span_mapping(self.name + '/' + example_id + f'@{i}', text, 'context') + ex_id = self.name + '/' + example_id + f'@{i}' + if self.need_attention_scores: + ex_id, text = self.construct_id2span_mapping(ex_id, text, 'context') examples.append( Example.from_raw( ex_id, @@ -406,7 +408,9 @@ def _make_example(self, parts, dir_name=None, **kwargs): ) ) else: - ex_id, context = self.construct_id2span_mapping(self.name + '/' + example_id, context, 'context') + ex_id = self.name + '/' + example_id + if self.need_attention_scores: + ex_id, context = self.construct_id2span_mapping(ex_id, context, 'context') examples = Example.from_raw(ex_id, context, question, answer, preprocess=self.preprocess_field, lower=False) return examples @@ -480,7 +484,7 @@ def batch_postprocess_prediction_ids(self, batch_example_ids, batch_src_ids, bat ) plt.show() - if self.args.do_alignment: + if self.need_attention_scores: src_spans = self.input_spans[example_id] try: text = align_and_replace( diff --git a/genienlp/tasks/base_task.py b/genienlp/tasks/base_task.py index aff367a8..b88ac2cf 100644 --- a/genienlp/tasks/base_task.py +++ b/genienlp/tasks/base_task.py @@ -41,6 +41,7 @@ class BaseTask(object): def __init__(self, name, args): self.name = name + self.args = args self._metrics = ['em', 'nem', 'nf1'] # special task-specific tokens that should not be subword tokenized self.special_tokens = set() @@ -63,7 +64,6 @@ def get_splits(self, root, **kwargs): """ Load the train, test, eval datasets for this task - :param field: the text.Field to use for tokenization, preprocessing and vocabulary construction :param root: the base directory where data is stored :param kwargs: other arguments to pass to the Dataset :return: a list of text.Dataset diff --git a/genienlp/tasks/generic_dataset.py b/genienlp/tasks/generic_dataset.py index dec8d7df..de40eeda 100644 --- a/genienlp/tasks/generic_dataset.py +++ b/genienlp/tasks/generic_dataset.py @@ -41,6 +41,7 @@ from typing import Iterable import torch +import ujson from datasets import load_dataset from ..data_utils.example import Example, NumericalizedExamples @@ -1873,7 +1874,7 @@ def __init__(self, data, *, make_example, **kwargs): super().__init__(examples, **kwargs) @classmethod - def return_splits(cls, name, path='.data', train='train', validation='dev', test='test', **kwargs): + def return_splits(cls, path='.data', train='train', validation='dev', test='test', **kwargs): crossner_domains = kwargs.pop('crossner_domains') @@ -1968,3 +1969,46 @@ def splits(cls, root='.data', train='train', validation='eval', test='test', **k ), Split(train=train_path, eval=validation_path, test=test_path), ) + + +class BiTODDataset(CQA): + def __init__(self, path, *, make_example, **kwargs): + subsample = kwargs.pop('subsample') + examples = [] + + with open(path) as fin: + data = ujson.load(fin)['data'] + for turn in data: + processed = make_example(turn, train_target=kwargs.get('train_target', False)) + if processed: + examples.append(processed) + + if subsample is not None and len(examples) >= subsample: + break + + super().__init__(examples, **kwargs) + + # do not sort eval/ test set so we can compute individual scores for each subtask (e2e_dialogue_score) + self.eval_sort_key_fn = None + + # in e2e evaluation use 1 batch at a time + if kwargs.get('e2e_evaluation', False): + self.eval_batch_size_fn = default_batch_fn + + @classmethod + def return_splits(cls, path='.data', train='train', validation='valid', test='test', **kwargs): + train_path, validation_path, test_path = None, None, None + if train: + train_path = os.path.join(path, f'{train}.json') + if validation: + validation_path = os.path.join(path, f'{validation}.json') + if test: + test_path = os.path.join(path, 'test.json') + + train_data = None if train is None else cls(train_path, **kwargs) + validation_data = None if validation is None else cls(validation_path, **kwargs) + test_data = None if test is None else cls(test_path, **kwargs) + + return Split(train=train_data, eval=validation_data, test=test_data), Split( + train=train_path, eval=validation_path, test=test_path + ) diff --git a/genienlp/tasks/generic_task.py b/genienlp/tasks/generic_task.py index 8cbf999d..3418d6a0 100644 --- a/genienlp/tasks/generic_task.py +++ b/genienlp/tasks/generic_task.py @@ -33,7 +33,7 @@ from . import generic_dataset from .almond_task import BaseAlmondTask from .base_task import BaseTask -from .generic_dataset import CrossNERDataset, OODDataset +from .generic_dataset import BiTODDataset, CrossNERDataset, OODDataset from .registry import register_task @@ -377,7 +377,7 @@ def _make_example(self, parts, dir_name=None, **kwargs): ) def get_splits(self, root, **kwargs): - return CrossNERDataset.return_splits(name=self.name, path=root, make_example=self._make_example, **kwargs) + return CrossNERDataset.return_splits(path=root, make_example=self._make_example, **kwargs) @register_task('ood_task') @@ -393,3 +393,98 @@ def metrics(self): def get_splits(self, root, **kwargs): return OODDataset.splits(root=root, **kwargs) + + +@register_task('bitod') +class BiTOD(BaseTask): + def __init__(self, name, args): + super().__init__(name, args) + special_tokens_v1 = { + '
', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + } + special_tokens_v2 = { + 'USER:', + 'SYSTEM:', + ' ', + ' ', + ' ', + '#unknown', + 'DST:', + 'API:', + 'Response:', + } + special_tokens_v5 = {'AGENT_ACTS:'} + special_tokens_v7 = {'ACTS:'} + special_tokens_v9 = {'USER_ACTS:'} + special_tokens_v11 = {' ', ' ', ' '} + special_tokens_v13 = {'AGENT_ACTS_PREV'} + special_tokens_v2_10 = {' ', ' ', 'DA:', 'RG:'} + self.special_tokens = ( + special_tokens_v1 + | special_tokens_v2 + | special_tokens_v5 + | special_tokens_v7 + | special_tokens_v9 + | special_tokens_v11 + | special_tokens_v13 + | special_tokens_v2_10 + ) + self._metrics = ['e2e_dialogue_score'] + + def utterance_field(self): + return 'context' + + def _make_example(self, turn, **kwargs): + dial_id, turn_id, input_text, output_text, train_target = ( + turn['dial_id'], + turn['turn_id'], + turn['input_text'], + turn['output_text'], + turn['train_target'], + ) + + if kwargs.get('train_target', False) and train_target != kwargs['train_target']: + return None + + example_id = '/'.join([dial_id, str(turn_id), train_target]) + + return Example.from_raw( + self.name + '/' + str(example_id), input_text, '', output_text, preprocess=self.preprocess_field, lower=False + ) + + def get_splits(self, root, **kwargs): + kwargs['e2e_evaluation'] = self.args.e2e_dialogue_evaluation + return BiTODDataset.return_splits(path=root, make_example=self._make_example, **kwargs) + + +@register_task('bitod_nlg') +class BiTODNLG(BiTOD): + def __init__(self, name, args): + super().__init__(name, args) + self._metrics = ['casedbleu'] + + def get_splits(self, root, **kwargs): + kwargs['train_target'] = 'rg' + kwargs['e2e_evaluation'] = self.args.e2e_dialogue_evaluation + return BiTODDataset.return_splits(path=root, make_example=self._make_example, **kwargs) + + +@register_task('bitod_dst') +class BiTODDST(BiTOD): + def __init__(self, name, args): + super().__init__(name, args) + self._metrics = ['jga', 'em'] + + def get_splits(self, root, **kwargs): + kwargs['train_target'] = 'dst' + kwargs['e2e_evaluation'] = self.args.e2e_dialogue_evaluation + return BiTODDataset.return_splits(path=root, make_example=self._make_example, **kwargs) diff --git a/genienlp/tasks/hf_task.py b/genienlp/tasks/hf_task.py index eb21a11a..7891e471 100644 --- a/genienlp/tasks/hf_task.py +++ b/genienlp/tasks/hf_task.py @@ -49,6 +49,7 @@ def utterance_field(self): class AmbigQA(HFTask): def __init__(self, name, args): super().__init__(name, args) + self.special_tokens = {' ', '', ''} @property def metrics(self): diff --git a/genienlp/train.py b/genienlp/train.py index 7b8f283c..3f23b767 100644 --- a/genienlp/train.py +++ b/genienlp/train.py @@ -96,6 +96,7 @@ def prepare_data(args, logger): for task in args.train_tasks: logger.info(f'Loading {task.name}') kwargs = {'test': None, 'validation': None} + kwargs['train'] = args.train_set_name kwargs.update(train_eval_shared_kwargs) kwargs['all_dirs'] = args.train_src_languages kwargs['cached_path'] = os.path.join(args.cache, task.name) diff --git a/genienlp/util.py b/genienlp/util.py index 544ec489..bb789738 100644 --- a/genienlp/util.py +++ b/genienlp/util.py @@ -232,7 +232,7 @@ def __repr__(self) -> str: ) -class GenerationOutput: +class GenerationOutput(object): """ Contains all the information that the generation function may need to output """ @@ -599,9 +599,16 @@ def make_data_loader(dataset, numericalizer, batch_size, device=None, train=Fals f'answer lengths (min, mean, max): {np.min(answer_lengths)}, {int(np.mean(answer_lengths))}, {np.max(answer_lengths)}' ) - if dataset.batch_size_fn == input_tokens_fn: + if train: + sort_key_fn = dataset.sort_key_fn + batch_size_fn = dataset.batch_size_fn + else: + sort_key_fn = getattr(dataset, 'eval_sort_key_fn', dataset.sort_key_fn) + batch_size_fn = getattr(dataset, 'eval_batch_size_fn', dataset.batch_size_fn) + + if batch_size_fn == input_tokens_fn: min_batch_length = np.min(context_lengths) - elif dataset.batch_size_fn == all_tokens_fn: + elif batch_size_fn == all_tokens_fn: min_batch_length = np.min(context_lengths) + np.min(answer_lengths) else: min_batch_length = 1 @@ -611,7 +618,7 @@ def make_data_loader(dataset, numericalizer, batch_size, device=None, train=Fals if min_batch_length > batch_size: raise ValueError( - f'The minimum example length in your dataset is {np.min(context_lengths) + np.min(answer_lengths)} but your batch size is {batch_size}.' + f'The minimum batch length in your dataset is {min_batch_length} but your batch size is {batch_size}.' f' Thus no examples will be processed. Consider increasing batch_size' ) if np.min(answer_lengths) < min_output_length: @@ -628,10 +635,10 @@ def make_data_loader(dataset, numericalizer, batch_size, device=None, train=Fals sampler = LengthSortedIterator( all_features, batch_size=batch_size, - sort=True, + sort=bool(sort_key_fn), shuffle_and_repeat=train, - sort_key_fn=dataset.sort_key_fn, - batch_size_fn=dataset.batch_size_fn, + sort_key_fn=sort_key_fn, + batch_size_fn=batch_size_fn, groups=dataset.groups, ) # get the sorted data_source @@ -861,7 +868,8 @@ def load_config_json(args): 'eval_tgt_languages', ] - # train and predict scripts have these arguments in common. We use the values from train only if they are not provided in predict + # train and predict scripts have these arguments in common. We use the values from train only if they are not provided in predict. + # NOTE: do not set default values for these arguments in predict cause the defaults will always override training arguments overwrite = [ 'val_batch_size', 'num_beams', @@ -877,13 +885,23 @@ def load_config_json(args): 'min_output_length', 'reduce_metrics', 'database_dir', + 'e2e_dialogue_valid_subtasks', + 'e2e_dialogue_valid_submetrics', + 'e2e_dialogue_valid_subweights', ] - # these are true/ false arguments - overwrite_actions = ['do_alignment', 'align_preserve_input_quotation', 'align_remove_output_quotation'] for o in overwrite: if o not in args or getattr(args, o) is None: retrieve.append(o) + + # these are true/ false arguments + overwrite_actions = [ + 'do_alignment', + 'align_preserve_input_quotation', + 'align_remove_output_quotation', + 'e2e_dialogue_evaluation', + ] for o in overwrite_actions: + # if argument is True in predict overwrite train; if False retrieve from train if not getattr(args, o, False): retrieve.append(o) @@ -946,6 +964,13 @@ def load_config_json(args): # use default value setattr(args, r, None) + if args.e2e_dialogue_valid_subtasks is None: + setattr(args, 'e2e_dialogue_valid_subtasks', ['dst', 'api', 'da']) + if args.e2e_dialogue_valid_submetrics is None: + setattr(args, 'e2e_dialogue_valid_submetrics', ['jga', 'em', 'em']) + if args.e2e_dialogue_valid_subweights is None: + setattr(args, 'e2e_dialogue_valid_subweights', [1.0, 1.0, 1.0]) + # backward compatibility for models trained with genienlp before NED Refactoring (2) if args.max_features_size is None: if hasattr(args, 'ned_features_size'): diff --git a/genienlp/validate.py b/genienlp/validate.py index a84718ba..934a9f7d 100644 --- a/genienlp/validate.py +++ b/genienlp/validate.py @@ -27,18 +27,27 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +import copy +import logging +import os +import re import sys +from collections import defaultdict import torch +import ujson from dateparser.languages import default_loader +from dialogues import Bitod from transformers import MarianTokenizer +from .data_utils.example import NumericalizedExamples, SequentialField from .data_utils.progbar import progress_bar from .metrics import calculate_and_reduce_metrics from .models import TransformerForSequenceClassification, TransformerForTokenClassification from .util import GenerationOutput, merge_translated_sentences +logger = logging.getLogger(__name__) + def generate_with_model( model, @@ -51,9 +60,22 @@ def generate_with_model( original_order=None, confidence_estimators=None, disable_progbar=True, + eval_dir=None, ): + if args.e2e_dialogue_evaluation: + return generate_with_seq2seq_model_for_dialogue( + model, + data_iterator, + numericalizer, + task, + args, + eval_dir, + output_predictions_only=output_predictions_only, + original_order=original_order, + disable_progbar=disable_progbar, + ) - if isinstance(model, TransformerForTokenClassification) or isinstance(model, TransformerForSequenceClassification): + elif isinstance(model, (TransformerForTokenClassification, TransformerForSequenceClassification)): return generate_with_classification_model( model, data_iterator, numericalizer, task, original_order=original_order, disable_progbar=disable_progbar ) @@ -72,6 +94,259 @@ def generate_with_model( ) +def replace_capturing_group(input, re_pattern, replacement): + # replace first captured group in the input with replacement using regex re_pattern + if re_pattern.search(input): + whole_match = re_pattern.search(input).group(0).strip() + captured_match = re_pattern.search(input).group(1).strip() + new_whole_match = whole_match.replace(captured_match, replacement) + new_input = re.sub(re_pattern, new_whole_match, input) + else: + new_input = input + return new_input + + +def generate_with_seq2seq_model_for_dialogue( + model, + data_iterator, + numericalizer, + task, + args, + eval_dir, + output_predictions_only=False, + original_order=None, + disable_progbar=True, +) -> GenerationOutput: + """ + Inputs: + original_order: List of indices. If provided, we will sort the results according to this order + confidence_estimator: if provided, will use it to calculate and output confidence scores + Outputs: predictions if `output_predictions_only` == True, (loss, predictions, answers, contexts) otherwise + loss + predictions: a List of Lists of strings + answers + contexts + """ + + dataset = Bitod() + e2e_dialogue_preds = dict() + + predictions = [] + example_ids = [] + answers = [] + contexts = [] + + # TODO: handle multiple responses + hyperparameter_idx = 0 + + cur_dial_id = '' + knowledge = None + + device = model.device + + special_tokens = numericalizer._tokenizer.all_special_tokens + + for k, turn in enumerate(progress_bar(data_iterator, desc='Generating', disable=disable_progbar)): + batch_size = len(turn.example_id) + assert batch_size == 1 + batch_prediction = [] + batch_example_ids = turn.example_id + + example_ids += batch_example_ids + + task_name, dial_id, turn_id, train_target = example_ids[-1].split('/') + turn_id = int(turn_id) + + if cur_dial_id != dial_id: + # new dialogue + cur_dial_id = dial_id + dialogue_state = {} + # new_state_text = 'null' + knowledge = defaultdict(dict) + new_knowledge_text = 'null' + new_actions_text = 'null' + active_api = None + e2e_dialogue_preds[dial_id] = {"turns": defaultdict(dict), "API": defaultdict(dict)} + + batch_context = [] + batch_tokens = numericalizer.convert_ids_to_tokens(turn.context.value.data, skip_special_tokens=False) + + # remove only beginning and trailing special tokens + # otherwise the sep_token added between context and question will be lost + for text in batch_tokens: + i = 0 + while text[i] in special_tokens: + i += 1 + j = len(text) - 1 + while text[j] in special_tokens: + j -= 1 + text = text[i : j + 1] + + batch_context.append(numericalizer._tokenizer.convert_tokens_to_string(text)) + + contexts += batch_context + + if not output_predictions_only: + batch_answer = numericalizer.reverse(turn.answer.value.data, 'answer') + batch_answer = [ + task.postprocess_prediction(batch_example_ids[i], batch_answer[i]) for i in range(len(batch_answer)) + ] + answers += batch_answer + + new_state_text = dataset.state2span(dialogue_state) + + if train_target == 'dst': + input_text = replace_capturing_group(contexts[-1], dataset.state_re, new_state_text) + + ## we always use gold history following common practice + ## if you want to use predicted response instead of gold uncomment the following + # last_sys_pred = predictions[-1][0].strip() + # input_text = replace_match(input_text, last_system_re, last_sys_pred) + + elif train_target == 'api': + + # replace state + input_text = replace_capturing_group(contexts[-1], dataset.state_re, new_state_text) + + elif train_target == 'da': + # replace state + input_text = replace_capturing_group(contexts[-1], dataset.state_re, new_state_text) + + # replace knowledge + input_text = replace_capturing_group(input_text, dataset.knowledge_re, new_knowledge_text) + + elif train_target == 'rg': + + # replace actions + input_text = replace_capturing_group(contexts[-1], dataset.actions_re, new_actions_text) + + else: + raise ValueError(f'Invalid train_target: {train_target}') + + # replace old context with updated + contexts[-1] = input_text + + tokenized_contexts = numericalizer.encode_batch([input_text], field_name='context', features=None)[0] + + numericalized_turn = NumericalizedExamples( + example_id=[turn.example_id[0]], + context=SequentialField( + value=torch.tensor([tokenized_contexts.value], device=device), + length=torch.tensor([tokenized_contexts.length], device=device), + limited=torch.tensor([tokenized_contexts.limited], device=device), + feature=None, + ), + answer=SequentialField(value=None, length=None, limited=None, feature=None), + ) + + generated = model.generate( + numericalized_turn, + max_output_length=args.max_output_length, + min_output_length=args.min_output_length, + num_outputs=args.num_outputs[hyperparameter_idx], + temperature=args.temperature[hyperparameter_idx] if args.temperature[hyperparameter_idx] > 0 else 1.0, + repetition_penalty=args.repetition_penalty[hyperparameter_idx], + top_k=args.top_k[hyperparameter_idx], + top_p=args.top_p[hyperparameter_idx], + num_beams=args.num_beams[hyperparameter_idx], + num_beam_groups=args.num_beam_groups[hyperparameter_idx], + diversity_penalty=args.diversity_penalty[hyperparameter_idx], + no_repeat_ngram_size=args.no_repeat_ngram_size[hyperparameter_idx], + do_sample=args.temperature[hyperparameter_idx] != 0, + ) + + partial_batch_prediction_ids = generated.sequences + + partial_batch_prediction = numericalizer.reverse(partial_batch_prediction_ids, 'answer')[0] + + if train_target == 'da': + partial_batch_prediction = dataset.postprocess_prediction( + partial_batch_prediction, knowledge, lang=numericalizer._tokenizer.src_lang[:2] + ) + + partial_batch_prediction = task.postprocess_prediction(batch_example_ids[0], partial_batch_prediction) + + # put them into the right array + batch_prediction.append([partial_batch_prediction]) + + predictions += batch_prediction + + if train_target == 'dst': + # update dialogue_state + lev = predictions[-1][0].strip() + state_update = dataset.span2state(lev) + if state_update: + active_api = list(state_update.keys())[-1] + dataset.update_state(state_update, dialogue_state) + + #### save latest state + state_to_record = copy.deepcopy(dialogue_state) + state_to_record = {dataset.domain2api_name(k): v for k, v in state_to_record.items()} + e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["state"] = state_to_record + #### + + elif train_target == 'api': + if dataset.do_knowledge_reset(active_api): + new_knowledge_text = "null" + knowledge = defaultdict(dict) + + do_api_call = predictions[-1][0].strip() + + if do_api_call == 'yes': + # make api call + api_name = active_api + if api_name in dialogue_state: + constraints, new_knowledge_text = dataset.make_api_call( + dialogue_state, knowledge, api_name, numericalizer._tokenizer.src_lang, dial_id, turn_id + ) + #### save latest api constraints + e2e_dialogue_preds[dial_id]["API"][dataset.domain2api_name(api_name)] = copy.deepcopy(constraints) + #### + + elif do_api_call == 'no': + # do nothing + pass + else: + logger.error( + f'API call should be either yes or no but got {do_api_call}. Seems model is not trained for enough steps. For now we assume it\'s a no' + ) + + #### save latest api results + e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["api"] = new_knowledge_text + #### + + elif train_target == 'da': + new_actions_text = predictions[-1][0] + #### save latest actions + e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["actions"] = predictions[-1][0] + #### + + elif train_target == 'rg': + #### save latest response + e2e_dialogue_preds[dial_id]["turns"][str(turn_id)]["response"] = predictions[-1] + #### + + with open(os.path.join(eval_dir, 'e2e_dialogue_preds.json'), 'w') as fout: + ujson.dump(e2e_dialogue_preds, fout, indent=2, ensure_ascii=False) + + if original_order is not None: + # sort back to the original order + original_order, example_ids, predictions, answers, contexts = [ + list(a) for a in tuple(zip(*sorted(list(zip(original_order, example_ids, predictions, answers, contexts))))) + ] + + # TODO calculate and return loss + loss = None + output = GenerationOutput(loss=loss) + + if output_predictions_only: + output.predictions = predictions + else: + output.example_ids, output.predictions, output.answers, output.contexts = example_ids, predictions, answers, contexts + + return output + + def generate_with_seq2seq_model( model, data_iterator, @@ -407,9 +682,7 @@ def validate(task, val_iter, model, numericalizer, args, num_print=10): # loss is already calculated metrics_to_return = [metric for metric in task.metrics if metric != 'loss'] - metrics = calculate_and_reduce_metrics( - output.predictions, output.answers, metrics_to_return, args.reduce_metrics, model.tgt_lang - ) + metrics = calculate_and_reduce_metrics(output, metrics_to_return, args, model.tgt_lang) results = {'model prediction': output.predictions, 'gold answer': output.answers, 'context': output.contexts} diff --git a/setup.py b/setup.py index ab085dd2..025d1825 100644 --- a/setup.py +++ b/setup.py @@ -68,5 +68,7 @@ 'scikit-learn>=0.23,<2.0', 'dill~=0.3', 'xgboost~=1.3', + # dialogues + 'dialogues @ git+https://github.com/Mehrad0711/dialogues', ], ) diff --git a/tests/dataset/bitod/test.json b/tests/dataset/bitod/test.json new file mode 100644 index 00000000..ad9227b2 --- /dev/null +++ b/tests/dataset/bitod/test.json @@ -0,0 +1,256 @@ +{ + "data": [ + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "DST:
null USER: Hey, can you help me with hotel booking? ", + "output_text": "( hotels search )", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "API: null ( hotels search ) USER: Hey, can you help me with hotel booking? ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "DA: null ( hotels search ) USER: Hey, can you help me with hotel booking? ", + "output_text": "( hotels search ) request location , request rating", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "RG: ( hotels search ) request location , request rating USER: Hey, can you help me with hotel booking? ", + "output_text": "Sure! Do you have a destination in mind, and did you have a certain hotel rating in mind?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "DST: ( hotels search ) AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "API: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "DA: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" AGENT_DA: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "( hotels search ) request stars", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "RG: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "How many stars would be your ideal?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "DST: ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "( hotels search ) stars equal_to \" 5 \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "API: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "DA: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_DA: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "( hotels search ) request price_level", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "RG: ( hotels search ) request price_level AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "What's your budget?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "DST: ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "( hotels search ) price_level equal_to \" moderate \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "API: null ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "yes", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "DA: ( hotels search ) available_options \" 26 \" , location \" Mong Kok | Kowloon | Yau Tsim Mong District \" , name \" Cordis, Hong Kong \" , price_level \" moderate \" , price_per_night \" 839 HKD \" , rating \" 10 \" , stars \" 5 \" ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_DA: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \"", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "RG: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "We have 26 hotels with availability here, and my recommendation would be Cordis, Hong Kong which has a 10 star rating.", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "DST: ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. ", + "output_text": "( hotels booking ) name equal_to \" Cordis, Hong Kong \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "API: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "DA: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_DA: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. ", + "output_text": "( hotels booking ) request number_of_nights", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "RG: ( hotels booking ) request number_of_nights AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. ", + "output_text": "How many nights do you plan to stay?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 6, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.", + "input_text": "DST: ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. ", + "output_text": "( hotels booking ) number_of_nights equal_to \" 9 \" , user_name equal_to \" David \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 6, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.", + "input_text": "API: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 6, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.", + "input_text": "DA: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_DA: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. ", + "output_text": "( hotels booking ) request number_of_rooms", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 6, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David.", + "input_text": "RG: ( hotels booking ) request number_of_rooms AGENT_ACTS_PREV: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS: ( hotels booking ) request number_of_nights USER: I need 9 nights booked. My name is David. ", + "output_text": "Of course, David. How many rooms?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 7, + "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.", + "input_text": "DST: ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. ", + "output_text": "( hotels booking ) number_of_rooms equal_to \" eight \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 7, + "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.", + "input_text": "API: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 7, + "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.", + "input_text": "DA: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights equal_to \" 9 \" , number_of_rooms equal_to \" eight \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name equal_to \" David \" , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_DA: ( hotels booking ) request number_of_rooms USER: eight rooms please. ", + "output_text": "( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \"", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 7, + "dialog_history": "AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please.", + "input_text": "RG: ( hotels booking ) confirm name equal_to \" Cordis, Hong Kong \" , confirm number_of_nights equal_to \" 9 \" , confirm number_of_rooms equal_to \" eight \" , confirm start_date equal_to \" November 23 \" AGENT_ACTS_PREV: ( hotels booking ) request number_of_nights AGENT_ACTS: ( hotels booking ) request number_of_rooms USER: eight rooms please. ", + "output_text": "Okay, just to confirm: You would like to book eight rooms at the Cordis, Hong Kong hotel for 9 nights with a check-in date of November 23rd?", + "train_target": "rg" + } + ] +} \ No newline at end of file diff --git a/tests/dataset/bitod/train.json b/tests/dataset/bitod/train.json new file mode 100644 index 00000000..ad9227b2 --- /dev/null +++ b/tests/dataset/bitod/train.json @@ -0,0 +1,256 @@ +{ + "data": [ + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "DST: null USER: Hey, can you help me with hotel booking? ", + "output_text": "( hotels search )", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "API: null ( hotels search ) USER: Hey, can you help me with hotel booking? ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "DA: null ( hotels search ) USER: Hey, can you help me with hotel booking? ", + "output_text": "( hotels search ) request location , request rating", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 1, + "dialog_history": "USER: Hey, can you help me with hotel booking?", + "input_text": "RG: ( hotels search ) request location , request rating USER: Hey, can you help me with hotel booking? ", + "output_text": "Sure! Do you have a destination in mind, and did you have a certain hotel rating in mind?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "DST: ( hotels search ) AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "API: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "DA: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" AGENT_DA: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "( hotels search ) request stars", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 2, + "dialog_history": "AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars.", + "input_text": "RG: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request location , request rating USER: No specific destination, but I do want a hotel that's at least 4 stars. ", + "output_text": "How many stars would be your ideal?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "DST: ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "( hotels search ) stars equal_to \" 5 \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "API: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "DA: null ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_DA: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "( hotels search ) request price_level", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 3, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars.", + "input_text": "RG: ( hotels search ) request price_level AGENT_ACTS_PREV: ( hotels search ) request location , request rating AGENT_ACTS: ( hotels search ) request stars USER: 5 stars. ", + "output_text": "What's your budget?", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "DST: ( hotels search ) location equal_to \" don't care \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "( hotels search ) price_level equal_to \" moderate \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "API: null ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "yes", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "DA: ( hotels search ) available_options \" 26 \" , location \" Mong Kok | Kowloon | Yau Tsim Mong District \" , name \" Cordis, Hong Kong \" , price_level \" moderate \" , price_per_night \" 839 HKD \" , rating \" 10 \" , stars \" 5 \" ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_DA: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \"", + "train_target": "da" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels search", + "turn_id": 4, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price.", + "input_text": "RG: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" AGENT_ACTS_PREV: ( hotels search ) request stars AGENT_ACTS: ( hotels search ) request price_level USER: I'm looking for a moderate room price. ", + "output_text": "We have 26 hotels with availability here, and my recommendation would be Cordis, Hong Kong which has a 10 star rating.", + "train_target": "rg" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "DST: ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. ", + "output_text": "( hotels booking ) name equal_to \" Cordis, Hong Kong \" , start_day equal_to \" 23 \" , start_month equal_to \" 11 \"", + "train_target": "dst" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "API: null ( hotels booking ) name equal_to \" Cordis, Hong Kong \" , number_of_nights #unknown , number_of_rooms #unknown , start_day equal_to \" 23 \" , start_month equal_to \" 11 \" , user_name #unknown , ( hotels search ) location equal_to \" don't care \" , price_level equal_to \" moderate \" , rating at_least \" 4 \" , stars equal_to \" 5 \" AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd. ", + "output_text": "no", + "train_target": "api" + }, + { + "dial_id": "ec909032-d5fd-4dee-8dd2-53a895a98824", + "task": "hotels booking", + "turn_id": 5, + "dialog_history": "AGENT_ACTS_PREV: ( hotels search ) request price_level AGENT_ACTS: ( hotels search ) offer available_options equal_to \" 26 \" , offer name equal_to \" Cordis, Hong Kong \" , offer rating equal_to \" 10 \" USER: I'll take it. I need to check in on November 23rd.", + "input_text": "DA: