forked from wcmac/sippycup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
travel_examples.py
375 lines (368 loc) · 23.8 KB
/
travel_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
"""
Defines 100 examples for the travel domain, divided into 75 training examples
and 25 test examples.
To construct the sample of 100 queries, we:
- Began from the set of unique queries in the AOL dataset (10M queries).
- Selected queries containing one of the ~600 locations named in Geobase
(1M queries).
- Selected queries containing "from" or "to" (23K queries).
- Selected queries containing a "travel term" (below), or both "from" and "to"
(6,600 queries).
- Selected queries that appeared at least twice in the AOL dataset.
- Selected queries uniformly at random, throwing out just a few that were
"too long".
The ~60 "travel terms" were: airfare, airfares, airline, airlines, amtrak, auto,
bicycle, bicycling, bike, biking, boat, bus, buses, cab, cabs, car, cheap,
cheapest, cruise, cruises, cycling, direction, directions, distance, drive,
driving, fare, fares, ferry, ferries, flight, flights, fly, how far, how long,
how much, miles, mileage, on foot, pedestrian, rail, route, shortest, shuttles,
subway, taxi, ticket, tickets, time, train, trains, transit, transportation,
travel, trip, walk, walking.
"""
__author__ = "Bill MacCartney"
__copyright__ = "Copyright 2015, Bill MacCartney"
__credits__ = []
__license__ = "GNU General Public License, version 2.0"
__version__ = "0.9"
__maintainer__ = "Bill MacCartney"
__email__ = "See the author's website"
from example import Example
travel_train_examples = [
Example(input='discount tickets to new york city ballet',
semantics={'domain': 'other'}),
Example(input='travel boston to fr. myers fla',
semantics={'domain': 'travel',
'origin': {'id': 4930956, 'name': 'Boston, MA, US'},
'destination': {'id': 4155995, 'name': 'Fort Myers, FL, US'}}),
Example(input='how do i get from tulsa oklahoma to atlantic city. new jersey by air',
semantics={'domain': 'travel', 'type': 'directions', 'mode': 'air',
'origin': {'id': 4553433, 'name': 'Tulsa, OK, US'},
'destination': {'id': 4500546, 'name': 'Atlantic City, NJ, US'}}),
Example(input='airbus from boston to europe', # user's intent is unclear
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4930956, 'name': 'Boston, MA, US'},
'destination': {'id': 9408659, 'name': 'Western Europe'}}),
Example(input='george washington borrows 500 000 from pennsylvania farmer to finance war',
semantics={'domain': 'other'}),
Example(input='cheap tickets to south carolina',
semantics={'domain': 'travel',
'destination': {'id': 4597040, 'name': 'South Carolina, US'}}),
Example(input='birmingham al distance from indianapolish in',
semantics={'domain': 'travel', 'type': 'distance',
'origin': {'id': 4259418, 'name': 'Indianapolis, IN, US'},
'destination': {'id': 4049979, 'name': 'Birmingham, AL, US'}}),
Example(input='transportation to the philadelphia airport',
semantics={'domain': 'travel',
'destination': {'id': 4560342,
'name': 'Philadelphia International Airport, PA, US'}}),
Example(input='one day cruise from fort lauderdale florida',
semantics={'domain': 'travel', 'mode': 'boat',
'origin': {'id': 4155966, 'name': 'Fort Lauderdale, FL, US'}}),
Example(input='directions from washington to canada',
semantics={'domain': 'travel', 'type': 'directions',
'origin': {'id': 4140963, 'name': 'Washington, DC, US'},
'destination': {'id': 6251999, 'name': 'Canada'}}),
Example(input='flights from portland or to seattle wa',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 5746545, 'name': 'Portland, OR, US'},
'destination': {'id': 5809844, 'name': 'Seattle, WA, US'}}),
Example(input='honeymoon trip to hawaii',
semantics={'domain': 'travel',
'destination': {'id': 5855797, 'name': 'Hawaii, US'}}),
Example(input='travel packages from hartford ct. to berlin germany',
semantics={'domain': 'travel',
'origin': {'id': 4835797, 'name': 'Hartford, CT, US'},
'destination': {'id': 2950159, 'name': 'Berlin, DE'}}),
Example(input='nyc flights to buffalo ny',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 5128581, 'name': 'New York City, NY, US'},
'destination': {'id': 5110629, 'name': 'Buffalo, NY, US'}}),
Example(input='norwegian cruises lines to alaska combined with land tours to denali',
# It's hard to fully capture the semantics of this input.
semantics={'domain': 'travel', 'mode': 'boat',
'destination': {'id': 5879092, 'name': 'Alaska, US'}}),
Example(input='direct flights from california to loreto mexico',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 5332921, 'name': 'California, US'},
'destination': {'id': 8581711, 'name': 'Loreto, MX'}}),
Example(input='cheap flights to hawaii',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 5855797, 'name': 'Hawaii, US'}}),
Example(input='santiago chile to atlanta ga. airline schedule 3 26 06',
semantics={'domain': 'travel', 'type': 'schedule', 'mode': 'air',
'origin': {'id': 3871336, 'name': 'Santiago, Chile'},
'destination': {'id': 4180439, 'name': 'Atlanta, GA, US'}}),
Example(input='directions one address to another in mobile al.',
semantics={'domain': 'other'}),
Example(input='university of washington transportation to seatac',
semantics={'domain': 'travel',
'origin': {'id': 5814448, 'name': 'University of Washington, WA, US'},
'destination': {'id': 5809805, 'name': 'SeaTac, WA, US'}}),
Example(input='buses to sacramento rivercats games',
semantics={'domain': 'travel', 'mode': 'bus',
'destination': {'id': 5389489, 'name': 'Sacramento, CA, US'}}),
Example(input='cruises departing from norfolk va',
semantics={'domain': 'travel', 'mode': 'boat',
'origin': {'id': 4776222, 'name': 'Norfolk, VA, US'}}),
Example(input='cruises from new york',
semantics={'domain': 'travel', 'mode': 'boat',
'origin': {'id': 5128581, 'name': 'New York, US'}}),
Example(input='car service to newark airport',
semantics={'domain': 'travel', 'mode': 'taxi',
'destination': {'id': 5101809, 'name': 'Newark Liberty International Airport, NJ, US'}}),
Example(input='transatlantic cruise southampton to tampa',
semantics={'domain': 'travel', 'mode': 'boat',
'origin': {'id': 2637487, 'name': 'Southampton, GB'},
'destination': {'id': 4174757, 'name': 'Tampa, FL, US'}}),
Example(input='rand mcnally direction to horse shoe casino hammond in',
# Horseshoe Casino is not in the GeoNames database.
semantics={'domain': 'travel', 'type': 'directions',
'destination': {'id': 4921100, 'name': 'Hammond, IN, US'}}),
Example(input='how to apply for uk old age pension from usa',
semantics={'domain': 'other'}),
Example(input='flight from atlanta to vermont',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4180439, 'name': 'Atlanta, GA, US'},
'destination': {'id': 5242283, 'name': 'Vermont, US'}}),
Example(input='directions to rupparena in lexington',
# Rupp Arena is not in GeoNames database.
semantics={'domain': 'travel', 'type': 'directions',
'destination': {'id': 4297983, 'name': 'Lexington, KY, US'}}),
Example(input='distance of oxnard ca. to rohnert park ca.',
semantics={'domain': 'travel', 'type': 'distance',
'origin': {'id': 5380184, 'name': 'Oxnard, CA, US'},
'destination': {'id': 5388564, 'name': 'Rohnert Park, CA, US'}}),
Example(input='last minute flights to dallas tx from memphis tn',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4641239, 'name': 'Memphis, TN, US'},
'destination': {'id': 4684888, 'name': 'Dallas, TX, US'}}),
Example(input='driving distance washington dc to niagara falls',
semantics={'domain': 'travel', 'type': 'distance', 'mode': 'car',
'origin': {'id': 4140963, 'name': 'Washington, DC, US'},
'destination': {'id': 5128723, 'name': 'Niagara Falls, NY, US'}}),
Example(input='2000 to 2006 time line of tennessee',
semantics={'domain': 'other'}),
Example(input='jet blue flights to new york',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 5128581, 'name': 'New York, US'}}),
Example(input='flights from jacksonville nc to clt',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4473083, 'name': 'Jacksonville, NC, US'},
'destination': {'id': 4460310, 'name': 'Charlotte Douglas International Airport, NC, US'}}),
Example(input='cost of gas from cedar rapids ia to las vegas',
semantics={'domain': 'travel', 'type': 'cost', 'mode': 'car',
'origin': {'id': 4850751, 'name': 'Cedar Rapids, IA, US'},
'destination': {'id': 5506956, 'name': 'Las Vegas, NV, US'}}),
Example(input='one way air fare to chicago',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 4887398, 'name': 'Chicago, IL, US'}}),
Example(input='distance cunmming georgia to chattanooga',
semantics={'domain': 'travel', 'type': 'distance',
'origin': {'id': 4190396, 'name': 'Cumming, GA, US'},
'destination': {'id': 4612862, 'name': 'Chattanooga, TN, US'}}),
Example(input='cheapest place to live in florida',
semantics={'domain': 'other'}),
Example(input='cheap flights from orlando to st maarten',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4167147, 'name': 'Orlando, FL, US'},
'destination': {'id': 7609695, 'name': 'Sint Maarten'}}),
Example(input='cheap flights to detroit michigan from new york',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 5128581, 'name': 'New York, US'},
'destination': {'id': 4990729, 'name': 'Detroit, MI, US'}}),
Example(input='train from pontiac mi to chicago',
semantics={'domain': 'travel', 'mode': 'train',
'origin': {'id': 5006166, 'name': 'Pontiac, MI, US'},
'destination': {'id': 4887398, 'name': 'Chicago, IL, US'}}),
Example(input='travel by train from bakersfield ca',
semantics={'domain': 'travel', 'mode': 'train',
'origin': {'id': 5325738, 'name': 'Bakersfield, CA, US'}}),
Example(input='how to get a oklahoma vehicle accident report from police agency',
semantics={'domain': 'other'}),
Example(input='bike riding-seattle to portland',
semantics={'domain': 'travel', 'mode': 'bike',
'origin': {'id': 5809844, 'name': 'Seattle, WA, US'},
'destination': {'id': 5746545, 'name': 'Portland, OR, US'}}),
Example(input='photos at las vegas nevada from april 1 to april 5 2006',
semantics={'domain': 'other'}),
Example(input='shoreline train to chicago',
semantics={'domain': 'travel', 'mode': 'train',
'destination': {'id': 4887398, 'name': 'Chicago, IL, US'}}),
Example(input='all airlines from memphis to mcallen',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4641239, 'name': 'Memphis, TN, US'},
'destination': {'id': 4709796, 'name': 'McAllen, TX, US'}}),
Example(input='what is 1500 miles away from texas city',
semantics={'domain': 'other'}),
Example(input='delta flights to oakland',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 5378538, 'name': 'Oakland, CA, US'}}),
Example(input='public transportation to new jersey',
semantics={'domain': 'travel', 'mode': 'transit',
'destination': {'id': 5101760, 'name': 'New Jersey, US'}}),
Example(input='shutle from seatle airport to vancouver airport',
semantics={'domain': 'travel', 'mode': 'bus',
'origin': {'id': 5809876, 'name': 'Seattle-Tacoma International Airport, WA, US'},
'destination': {'id': 6301485, 'name': 'Vancouver International Airport, CA'}}),
Example(input='hotels close to 10 manhattan square drive rochester ny 14607',
semantics={'domain': 'other'}),
Example(input='cruise from new york to canada',
semantics={'domain': 'travel', 'mode': 'boat',
'origin': {'id': 5128581, 'name': 'New York, US'},
'destination': {'id': 6251999, 'name': 'Canada'}}),
Example(input='discount travel flights to austin texas',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 4671654, 'name': 'Austin, TX, US'}}),
Example(input='book a trip to chicago',
semantics={'domain': 'travel',
'destination': {'id': 4887398, 'name': 'Chicago, IL, US'}}),
Example(input='rochester ny bus tours to yankees stadium',
# Yankees Stadium is not in GeoNames database.
semantics={'domain': 'travel', 'mode': 'bus',
'origin': {'id': 5134086, 'name': 'Rochester, NY, US'},
'destination': {'id': 5128581, 'name': 'New York City, NY, US'}}),
Example(input='airfares to honolulu hawaii',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 5856195, 'name': 'Honolulu, HI, US'}}),
Example(input='fly boston to myrtle beach spirit airlines',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4930956, 'name': 'Boston, MA, US'},
'destination': {'id': 4588718, 'name': 'Myrtle Beach, SC, US'}}),
Example(input='distance usa to peru',
semantics={'domain': 'travel', 'type': 'distance',
'origin': {'id': 6252001, 'name': 'United States'},
'destination': {'id': 3932488, 'name': 'Peru'}}),
Example(input='mileage from different airports in florida to st. augustine',
semantics={'domain': 'travel', 'type': 'distance',
'destination': {'id': 4170894, 'name': 'St. Augustine, FL, US'}}),
Example(input='train from moscow to st. petersburg',
semantics={'domain': 'travel', 'mode': 'train',
'origin': {'id': 524901, 'name': 'Moscow, RU'},
'destination': {'id': 498817, 'name': 'Saint-Petersburg, RU'}}),
Example(input='flight from ft.lauderdale florida to lexington kentucky',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4155966, 'name': 'Fort Lauderdale, FL, US'},
'destination': {'id': 4297983, 'name': 'Lexington, KY, US'}}),
Example(input='ithaca to scranton trains',
semantics={'domain': 'travel', 'mode': 'train',
'origin': {'id': 5122432, 'name': 'Ithaca, NY, US'},
'destination': {'id': 5211303, 'name': 'Scranton, PA, US'}}),
Example(input='time shares florida to rent',
semantics={'domain': 'other'}),
Example(input='what will it cost to drive from kemerovo to st. petersburg russia',
semantics={'domain': 'travel', 'type': 'cost', 'mode': 'car',
'origin': {'id': 1503901, 'name': 'Kemerovo, RU'},
'destination': {'id': 498817, 'name': 'Saint-Petersburg, RU'}}),
Example(input='balloons and flowers from israel to usa',
semantics={'domain': 'other'}),
Example(input='transportation to ft lauderdale from orlando',
semantics={'domain': 'travel',
'origin': {'id': 4167147, 'name': 'Orlando, FL, US'},
'destination': {'id': 4155966, 'name': 'Fort Lauderdale, FL, US'}}),
Example(input='lawton va to orlando fl amtrak',
semantics={'domain': 'travel', 'mode': 'train',
'origin': {'id': 4770714, 'name': 'Lorton, VA, US'},
'destination': {'id': 4167147, 'name': 'Orlando, FL, US'}}),
Example(input='ride this train to roseburg oregon now ther\'s a town for ya',
semantics={'domain': 'other'}),
Example(input='cheapest airline flight from charlotte to nashville',
semantics={'domain': 'travel','mode': 'air',
'origin': {'id': 4460243, 'name': 'Charlotte, NC, US'},
'destination': {'id': 4644585, 'name': 'Nashville, TN, US'}}),
Example(input='flights newark to raleigh nc',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 5101798, 'name': 'Newark, NJ, US'},
'destination': {'id': 4487042, 'name': 'Raleigh, NC, US'}}),
Example(input='driving from los angeles to seattle',
semantics={'domain': 'travel', 'mode': 'car',
'origin': {'id': 5368361, 'name': 'Los Angeles, CA, US'},
'destination': {'id': 5809844, 'name': 'Seattle, WA, US'}}),
Example(input='cheap airfare from pittsburgh pa to las vegas nv',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 5206379, 'name': 'Pittsburgh, PA, US'},
'destination': {'id': 5506956, 'name': 'Las Vegas, NV, US'}}),
Example(input='scenic drive to duluth mn',
semantics={'domain': 'travel', 'mode': 'car',
'destination': {'id': 5024719, 'name': 'Duluth, MN, US'}}),
]
travel_test_examples = [
Example(input='how to fight a disorderly conduct ticket in fairborn ohio',
semantics={'domain': 'other'}),
Example(input='csaa discount ticket to marine world usa in vallejo',
semantics={'domain': 'other'}),
Example(input='bus from nyc to boston',
semantics={'domain': 'travel', 'mode': 'bus',
'origin': {'id': 5128581, 'name': 'New York City, NY, US'},
'destination': {'id': 4930956, 'name': 'Boston, MA, US'}}),
Example(input='distance from hawaii to california',
semantics={'domain': 'travel', 'type': 'distance',
'origin': {'id': 5855797, 'name': 'Hawaii, US'},
'destination': {'id': 5332921, 'name': 'California, US'}}),
Example(input='cheap tickets to hawaii',
semantics={'domain': 'travel',
'destination': {'id': 5855797, 'name': 'Hawaii, US'}}),
Example(input='california coach tours and trains tours from san francisco',
semantics={'domain': 'travel', 'mode': 'train',
'origin': {'id': 5391959, 'name': 'San Francisco, CA, US'}}),
Example(input='personal rights to selling a used car in pennsylvania',
semantics={'domain': 'other'}),
Example(input='travel to sioux falls',
semantics={'domain': 'travel',
'destination': {'id': 5231851, 'name': 'Sioux Falls, SD, US'}}),
Example(input='road trip fun on i10 alabama to phoenix',
semantics={'domain': 'other'}),
Example(input='cheap air fares to florida',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 4155751, 'name': 'Florida, US'}}),
Example(input='travel orlando fl to las vegas nv by auto',
semantics={'domain': 'travel', 'mode': 'car',
'origin': {'id': 4167147, 'name': 'Orlando, FL, US'},
'destination': {'id': 5506956, 'name': 'Las Vegas, NV, US'}}),
Example(input='bus service from south bend in to ft. lauderdale',
semantics={'domain': 'travel', 'mode': 'bus',
'origin': {'id': 4926563, 'name': 'South Bend, IN, US'},
'destination': {'id': 4155966, 'name': 'Fort Lauderdale, FL, US'}}),
Example(input='2006 airfare to san juan puerto rico',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 4568127, 'name': 'San Juan, PR'}}),
Example(input='flight to tampa florida name your own price',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 4174757, 'name': 'Tampa, FL, US'}}),
Example(input='train schedule from new hampshire to nova scotia',
semantics={'domain': 'travel', 'type': 'schedule', 'mode': 'train',
'origin': {'id': 5090174, 'name': 'New Hampshire, US'},
'destination': {'id': 6091530, 'name': 'Nova Scotia, CA'}}),
Example(input='flights from san juan puerto rico to jfk ny',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4568127, 'name': 'San Juan, PR'},
'destination': {'id': 5122732, 'name': 'John F. Kennedy International Airport, NY, US'}}),
Example(input='seattle to portlad bike ride',
semantics={'domain': 'travel', 'mode': 'bike',
'origin': {'id': 5809844, 'name': 'Seattle, WA, US'},
'destination': {'id': 5746545, 'name': 'Portland, OR, US'}}),
Example(input='cheap flight to charlotte',
semantics={'domain': 'travel', 'mode': 'air',
'destination': {'id': 4460243, 'name': 'Charlotte, NC, US'}}),
Example(input='boats from jacksonville fl to the bahamas',
semantics={'domain': 'travel', 'mode': 'boat',
'origin': {'id': 4160021, 'name': 'Jacksonville, FL, US'},
'destination': {'id': 3572887, 'name': 'Bahamas'}}),
Example(input='is it legal to drive in florida using a cellular phone',
semantics={'domain': 'other'}),
Example(input='taking drivers license away from senior citizen texas driving',
semantics={'domain': 'other'}),
Example(input='best time to visit niagara falls',
semantics={'domain': 'other'}),
Example(input='airline tickers to monteray california fromn dc area',
semantics={'domain': 'travel', 'mode': 'air',
'origin': {'id': 4140963, 'name': 'Washington, DC, US'},
'destination': {'id': 5374361, 'name': 'Monterey, CA, US'}}),
Example(input='bus service -detroit to chicago',
semantics={'domain': 'travel', 'mode': 'bus',
'origin': {'id': 4990729, 'name': 'Detroit, MI, US'},
'destination': {'id': 4887398, 'name': 'Chicago, IL, US'}}),
Example(input='transportation from winston salem to raleigh durham',
semantics={'domain': 'travel',
'origin': {'id': 4499612, 'name': 'Winston-Salem, NC, US'},
'destination': {'id': 4487056, 'name': 'Raleigh-Durham International Airport, NC, US'}}),
]