-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqueryExperiment1.R
63 lines (51 loc) · 1.92 KB
/
queryExperiment1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Goal: use a query's own op models to predict its latency for a certain quantile.
# Arg passed to function: basePath
# Log files should already be deinterleaved & parsed.
# ie, Expects to find the following files in "basePath":
# training-logs/Thread-*.csv
# validation+-logs/Thread-*.csv
# paramFile.RData
# *: startingThread to endingThread
# +: 1 to numValidationRuns
# Values that should appear in paramFile.RData:
# startingThread
# endingThread
# numValidationRuns
# latencyQuantile: \in (0,1)
# queryType: string rep of query (eg, "thoughtstream")
# numSampleSets
queryExperiment1 = function(basePath) {
source("experiment-functions.R")
print("Loading params...")
load(file=paste(basePath, "/paramFile.RData", sep=""))
## Training Phase
print("TRAINING PHASE:")
print("Loading training data...")
trainingData = getTrainingData(startingThread, endingThread, basePath)
print("Creating & saving histograms...")
if (queryType == "thoughtstream") {
createAndSaveThoughtstreamOpHistograms(basePath)
} else if (queryType == "userByEmail") {
createAndSaveUserByEmailOpHistograms(basePath)
} else if (queryType == "userByName") {
createAndSaveUserByNameOpHistograms(basePath)
} else if (queryType == "thoughtsByHashTag") {
createAndSaveThoughtsByHashTagOpHistograms(basePath)
} else {
return("Unrecognized query type.")
}
# Sanity Check
print("Sanity check the training data's dim:")
print(dim(trainingData))
print("Sanity check the # queries in the training data:")
print(length(which(trainingData$opLevel==3)))
## Validation Phase
print("VALIDATION PHASE:")
print("Getting validation stats...")
getValidationStats(startingThread, endingThread, basePath, numValidationRuns, latencyQuantile)
print("Getting predicted latency...")
getPredictedQueryLatencyQuantiles(queryType, numSampleSets, basePath, latencyQuantile)
error = getPredictionError(basePath)
print(paste("Error:", error))
return(error)
}