forked from ccelio/riscv-boom-doc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibliography.bib
executable file
·368 lines (333 loc) · 13 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
@article{Gharachorloo:1990:MCE:325096.325102,
author = {Gharachorloo, Kourosh and Lenoski, Daniel and Laudon, James and Gibbons, Phillip and Gupta, Anoop and Hennessy, John},
title = {Memory Consistency and Event Ordering in Scalable Shared-memory Multiprocessors},
journal = {SIGARCH Comput. Archit. News},
issue_date = {June 1990},
volume = {18},
number = {2SI},
month = may,
year = {1990},
issn = {0163-5964},
pages = {15--26},
numpages = {12},
url = {http://doi.acm.org/10.1145/325096.325102},
doi = {10.1145/325096.325102},
acmid = {325102},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings{seznec2002design,
title={Design tradeoffs for the Alpha EV8 conditional branch predictor},
author={Seznec, Andr{\'e} and Felix, Stephen and Krishnan, Venkata and Sazeides, Yiannakis},
booktitle={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on},
pages={295--306},
year={2002},
organization={IEEE}
}
@article{seznec2006case,
title={A case for (partially) TAgged GEometric history length branch prediction},
author={Seznec, Andr{\'e} and Michaud, Pierre},
journal={Journal of Instruction Level Parallelism},
volume={8},
pages={1--23},
year={2006}
}
@inproceedings{seznec2011new,
title={A new case for the TAGE branch predictor},
author={Seznec, Andr{\'e}},
booktitle={Proceedings of the 44th Annual IEEE/ACM International Symposium on Microarchitecture},
pages={117--127},
year={2011},
organization={ACM}
}
@misc{hwacha,
author = {},
title = "{The Hwacha Project}",
year = {2015},
note = {http://hwacha.org}
}
@misc{gem5,
author = {},
title = "{Gem5 Visualization}",
year = {2014},
note = {http://www.m5sim.org/Visualization}
}
@misc{rocket,
author = {},
title = "{Rocket Microarchitectural Implementation of RISC-V ISA}",
year = {2016},
note = {https://github.com/ucb-bar/rocket}
}
@article{riscv_nature,
title={Single-chip microprocessor that communicates directly using light},
author={Sun, Chen and Wade, Mark T and Lee, Yunsup and Orcutt, Jason S and Alloatti, Luca and Georgas, Michael S and Waterman, Andrew S and Shainline, Jeffrey M and Avizienis, Rimas R and Lin, Sen and others},
journal={Nature},
volume={528},
number={7583},
pages={534--538},
year={2015},
publisher={Nature Publishing Group}
}
@article{mipsr10k,
author = {K.C. Yeager},
title = "{The MIPS R10000 Superscalar Microprocessor}",
journal ={IEEE Micro},
volume = {16},
number = {2},
issn = {0272-1732},
year = {1996},
pages = {28-41},
doi = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=491460},
publisher = {IEEE Computer Society}
}
@article{alpha21264,
author = {R.E. Kessler},
title = "{The Alpha 21264 Microprocessor}",
journal ={IEEE Micro},
volume = {19},
number = {2},
issn = {0272-1732},
year = {1999},
pages = {24-36},
doi = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=755465},
publisher = {IEEE Computer Society}
}
@techreport{sam_thesis,
author = {S. Williams},
title = "{Autotuning Performance on Multicore Computers, PhD thesis}",
institution = {U.C. Berkeley},
year = {2008}
}
@techreport{asanovic_thesis,
author = {K. Asanovic},
title = "{Vector Microprocessors, PhD thesis}",
institution = {U.C. Berkeley},
year = {1998}
}
@article{ieee.vectorthread.2004,
author = {Ronny Krashinsky and Christopher Batten and Mark Hampton and Steve Gerding and Brian Pharris and Jared Casper and Krste Asanovic},
title = {The Vector-Thread Architecture},
journal ={IEEE Micro},
volume = {24},
number = {6},
issn = {0272-1732},
year = {2004},
pages = {84-90},
doi = {http://doi.ieeecomputersociety.org/10.1109/MM.2004.90},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
}
@article{roofline_cacm,
author = {Williams,, Samuel and Waterman,, Andrew and Patterson,, David},
title = "{Roofline: an insightful visual performance model for multicore architectures}",
journal = {Commun. ACM},
volume = {52},
number = {4},
year = {2009},
issn = {0001-0782},
pages = {65--76},
doi = {http://doi.acm.org/10.1145/1498765.1498785},
publisher = {ACM},
address = {New York, NY, USA},
}
@techreport{berkeley_view,
Author = {Asanovic, Krste and Bodik, Ras and Catanzaro, Bryan Christopher and Gebis, Joseph James and Husbands, Parry and Keutzer, Kurt and Patterson, David A. and Plishker, William Lester and Shalf, John and Williams, Samuel Webb and Yelick, Katherine A.},
Title = "{The Landscape of Parallel Computing Research: A View from Berkeley}",
Institution = {EECS Department, University of California, Berkeley},
Year = {2006},
Month = {Dec},
URL = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2006/EECS-2006-183.html},
Number = {UCB/EECS-2006-183}
}
@article{simplescalar,
author = {Austin,, Todd and Larson,, Eric and Ernst,, Dan},
title = "{SimpleScalar: An Infrastructure for Computer System Modeling}",
journal = {Computer},
volume = {35},
number = {2},
year = {2002},
issn = {0018-9162},
pages = {59--67},
doi = {http://dx.doi.org/10.1109/2.982917},
publisher = {IEEE Computer Society Press},
address = {Los Alamitos, CA, USA},
}
@unpublished{sesc,
author = {Ortego, P. M. and Sack, P. },
citeulike-article-id = {232792},
keywords = {bibtex-import},
month = {Dec},
posted-at = {2005-06-20 21:13:08},
priority = {2},
title = "{SESC: SuperESCalar Simulator}",
year = {2004}
}
@misc{palladium,
author = "{Cadence Design Systems}",
title = "{Palladium Accelerator/Emulator}",
note = {http://www.cadence.com/products/functional\_ver/palladium/}
}
@article{asim,
author = {Joel Emer and Pritpal Ahuja and Eric Borch and Artur Klauser and Chi-Keung Luk and Srilatha Manne and Shubhendu S. Mukherjee and Harish Patil and Steven Wallace and Nathan Binkert and Roger Espasa and Toni Juan},
title = "{Asim: A Performance Model Framework}",
journal ={Computer},
volume = {35},
number = {2},
issn = {0018-9162},
year = {2002},
pages = {68-76},
doi = {http://doi.ieeecomputersociety.org/10.1109/2.982918},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
}
@misc{hasim,
author = {Michael Pellauer and Joel Emer and Arvind},
title = "{HAsim: Implementing a Partitioned Performance Model on an FPGA}",
year = {2007},
note = {http://publications.csail.mit.edu/abstracts/abstracts07/pellauer-abstract/hasim.html}
}
@INPROCEEDINGS{rsim,
author = {Vijay S. Pai and Parthasarathy Ranganathan and Sarita V. Adve},
title = "{RSIM: An Execution-Driven Simulator for ILP-Based Shared-Memory Multiprocessors and Uniprocessors}",
booktitle = {In Proceedings of the Third Workshop on Computer Architecture Education},
year = {1997}
}
@article{simics,
author = {Magnusson, P. S. and Christensson, M. and Eskilson, J. and Forsgren, D. and Hallberg, G. and Hogberg, J. and Larsson, F. and Moestedt, A. and Werner, B. },
citeulike-article-id = {1474011},
journal = {IEEE Computer},
keywords = {simulator},
posted-at = {2007-07-23 02:09:37},
priority = {2},
title = {Simics: A full system simulation platform},
volume = {35},
year = {2002}
}
@inproceedings{ramp_blue,
author = {Alex Krasnov and Andrew Schultz and John Wawrzynek
and Greg Gibeling and Pierre-Yves Droz},
title = "{RAMP Blue: A Message-Passing Manycore System in
FPGAs}",
booktitle = {International Conference on Field Programmable
Logic and Applications},
month = {August},
year = {2007},
URL = {http://www.gigascale.org/pubs/1033.html}
}
@INPROCEEDINGS{ramp_red,
author = {Njuguna Njoroge Sewook and Sewook Wee and Jared Casper and Justin Burdick and Yuriy Teslyar and Christos Kozyrakis and Kunle Olukotun},
title = "{Building and Using the ATLAS Transactional Memory System}",
booktitle = {in Proceedings of the Workshop on Architecture Research using FPGA Platforms, held at HPCA12. 2006},
year = {2006}
}
@article{bee2,
author = {Chen Chang and John Wawrzynek and Robert W. Brodersen},
interHash = {93fdd4142452750074cecb2b7f2be032},
intraHash = {b751d16d1b4e578f1fdea08baf7b3920},
journal = {IEEE Design \& Test of Computers},
number = {2},
pages = {114-125},
title = "{BEE2: A High-End Reconfigurable Computing System.}",
url = {http://dblp.uni-trier.de/db/journals/dt/dt22.html#ChangWB05},
volume = {22},
year = {2005},
ee = {http://doi.ieeecomputersociety.org/10.1109/MDT.2005.30},
date = {2006-04-27}
}
@misc{BEE3,
author = "{Microsoft Research}",
title = "{Berkeley Emulation Engine 3}",
note = {http://research.microsoft.com/en-us/projects/BEE3/}
}
@inproceedings{lithe,
author = {Heidi Pan and Benjamin Hindman and Krste Asanovic},
title = "{Lithe: Enabling Efficient Composition of Parallel
Libraries}",
booktitle = "{Workshop on Hot Topics in Parallelism (HotPar-09)}",
organization = "{USENIX}",
month = {March},
year = {2009},
abstract = {For the software industry to take advantage of
multicore processors, we must allow programmers to
arbitrarily compose parallel libraries without
sacrificing performance. We argue that high-level
task or thread abstractions and a common global
scheduler cannot provide effective library
composition. Instead, the operating system should
expose unvirtualized processing resources that can
be shared cooperatively between parallel libraries
within an application. In this paper, we describe
a system that standardizes and facilitates the
exchange of these unvirtualized processing
resources between libraries.},
URL = {http://www.gigascale.org/pubs/1870.html}
}
% Jae's QoS
@inproceedings{lee08memqos,
author = {Jae W. Lee and Man Cheuk Ng and Krste Asanovic},
title = "{Globally-Synchronized Frames for Guaranteed Quality-of-Service in On-Chip Networks}",
booktitle = {ISCA '08: Proceedings of the 35th International Symposium on Computer Architecture},
year = {2008},
isbn = {978-0-7695-3174-8},
pages = {89--100},
doi = {http://dx.doi.org/10.1109/ISCA.2008.31},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
% Memory QoS
@inproceedings{nesbit06queuing,
author = {Kyle J. Nesbit and Nidhi Aggarwal and James Laudon and James E. Smith},
title = "{Fair Queuing Memory Systems}",
booktitle = {MICRO 39: Proceedings of the 39th Annual IEEE/ACM International Symposium on Microarchitecture},
year = {2006},
isbn = {0-7695-2732-9},
pages = {208--222},
doi = {http://dx.doi.org/10.1109/MICRO.2006.24},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
@Article{statistical_sampling,
abstract = {Current software-based micro architecture simulators are
many orders of magnitude slower than the hardware they
simulate. Hence, most microarchitecture design studies
draw their conclusions from drastically truncated
benchmark simulations that are often inaccurate and
misleading. This article presents the Sampling
Microarchitecture Simulation (SMARTS) framework as an
approach to enable fast and accurate performance
measurements of full-length benchmarks. SMARTS
accelerates simulation by selectively measuring in detail
only an appropriate benchmark subset. SMARTS prescribes a
statistically sound procedure for configuring a
systematic sampling simulation run to achieve a desired
quantifiable confidence in estimates. Analysis of the
SPEC CPU2000 benchmark suite shows that CPI and energy
per instruction (EPI) can be estimated to within
±3% with 99.7% confidence by measuring fewer than
50 million instructions per benchmark. In practice,
inaccuracy in microarchitectural state initialization
introduces an additional uncertainty which we empirically
bound to ∼2% for the tested benchmarks. Our
implementation of SMARTS achieves an actual average error
of only 0.64% on CPI and 0.59% on EPI for the tested
benchmarks, running with average speedups of 35 and 60
over detailed simulation of 8-way and 16-way out-of-order
processors, respectively. © 2006 ACM.},
affiliation = {OTHER},
author = {Wunderlich, Roland E. and Wenisch, Thomas F. and
Falsafi, Babak and Hoe, James C.},
details = {http://infoscience.epfl.ch/record/135593},
doi = {10.1145/1147224.1147225},
issn = {10493301},
journal = {{ACM} {T}ransactions on {M}odeling and {C}omputer {S}imulation},
number = {3},
oai-id = {oai:infoscience.epfl.ch:135593},
oai-set = {article},
pages = {197 -- 224},
review = {REVIEWED},
status = {PUBLISHED},
title = {Statistical sampling of microarchitecture simulation},
unit = {PARSA},
volume = {16},
year = 2006
}