-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathPlagiarism Checking Result for Btech Project - RLCaR.html
841 lines (817 loc) · 137 KB
/
Plagiarism Checking Result for Btech Project - RLCaR.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" class="gr__"><style id="stylish-7" class="stylish" type="text/css">
</style><head>
<title>Plagiarism Checking Result for your Document</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link href="Plagiarism%20Checking%20Result%20for%20Btech%20Project%20-%20RLCaR_files/jatt.css" rel="stylesheet">
</head><body onload="document.getElementById('div_wait').style.display='none';" data-gr-c-s-loaded="true"><div id="div_wait" style="display: none; font-size: 14px;">Please wait, Your report is being rendered.</div>
<script type="text/javascript" src="Plagiarism%20Checking%20Result%20for%20Btech%20Project%20-%20RLCaR_files/jquery-1.js"></script>
<script src="Plagiarism%20Checking%20Result%20for%20Btech%20Project%20-%20RLCaR_files/jquery.js"></script>
<!--oncontextmenu="return false"-->
<div id="wrap">
<div class="header_report">
<div class="content_box">
<h1 style="padding-left: 40px;font-size:28px;">Plagiarism Checker X Originality Report</h1>
<!--Trial_Info
<p style="padding-left: 70px">This report is generated by the Unregistered PlagiarismCheckerX <b>Demo version!</b></p>
Trial_Info-->
<!--Trial_Info
<p style="padding-left:120px;padding-top: 10px;"><a href="http://plagiarismcheckerx.com" target="_blank">Register the software</a> and get the complete functionality!</p>
Trial_Info-->
<table border="0">
<tbody><tr>
<td style="padding-left: 70px; padding-top: 15px;"><a href="http://plagiarismcheckerx.com/" target="_blank"><img src="Plagiarism%20Checking%20Result%20for%20Btech%20Project%20-%20RLCaR_files/PlagiarismCheckerX-Icon.png" width="128px" height="93px"></a></td>
<td align="center"><h4> Plagiarism Quantity: 9% Duplicate</h4></td>
</tr>
</tbody></table>
<div class="primary">
<div id="content">
<div class="textarea-wrapper">
<table id="newspaper-c" width="100%" border="1">
<tbody><tr>
<td width="90px"> Date </td>
<td width="650px"> Saturday, December 14, 2019</td>
</tr>
<tr>
<td> Words </td>
<td> 924 Plagiarized Words / Total 10185 Words</td>
</tr>
<tr>
<td> Sources </td>
<td> More than 140 Sources Identified.</td>
</tr>
<tr>
<td> Remarks </td>
<td> Low Plagiarism Detected - Your Document needs Optional Improvement. </td>
</tr>
</tbody></table>
<div class="txt_check_plag">
<p>
<!--Header_End-->
<input type="hidden" id="version" value="1.0">
<span id="n0" class="tooltip null_selection" title=" "> An Approach Based on Reinforcement Learning for Efficient Process Cache Management in Operating Systems B.Tech</span><span id="n1" class="tooltip red_selection" title=" ">
Project Report Submitted in partial fulfillment of the requirements for
the award of the degree of Bachelors of Technology in Department of
Computer Science and</span><span id="n2" class="tooltip null_selection" title=" "> Engineering by Sumanyu Muku, Vaibhav Kumar, Vikas Tomar (2K16/CO/322, 2K16/CO/346, 2K16/CO/352) under the guidance of Dr.</span><span id="n3" class="tooltip red_selection" title=" "> Rajni Jindal Head of department Department of Computer Science and Engineering DEPTT.<br><br></span><span id="n4" class="tooltip null_selection" title=" ">
OF COMPUTER SCIENCE AND ENGINEERING DELHI TECHNOLOGICAL
UNIVERSITY,DELHI December, 2019 CERTIFICATE This is to certify that
Project Report entitled An Approach Based</span><span id="n5" class="tooltip null_selection" title="">
on Reinforcement Learning for Efficient Process Cache Management in
Operating Systems submitted by Sumanyu Muku (2K16/CO/322), Vaibhav Kumar
(2K16/CO/346), and Vikas</span><span id="n6" class="tooltip null_selection" title="">
Tomar (2K16/CO/352) for partial fulfillment of the requirement for the
award of degree Bachelors Of Technology (Computer Science and
Engineering) is a record of</span><span id="n7" class="tooltip red_selection" title=""> the candidate work carried out by them under my supervision. Dr.<br><br></span><span id="n8" class="tooltip red_selection" title=" ">
Rajni Jindal Head of Department, Project Guide, Department of Computer
Science and Engineering Delhi Technological University 1 DECLARATION We
hereby declare that</span><span id="n9" class="tooltip null_selection" title=" ">
the project work entitled An Approach Based on Reinforcement Learning
for Efficient Process Cache Management in Operating Systems which is
being submitted to Delhi</span><span id="n10" class="tooltip red_selection" title=" ">
Technological University, in partial fulfillment of requirements for
the award of degree of Bachelor Of Technology (Computer Science and
Engineering) is a bonafide</span><span id="n11" class="tooltip red_selection" title=" ">
report of B.Tech Thesis carried out by us. The material contained in
the report has not been submitted to any university or institution for
the award of any degree.<br><br></span><span id="n12" class="tooltip null_selection" title=" ">
Sumanyu Muku (2K16/CO/322), Vaibhav Kumar (2K16/CO/346), Vikas Tomar
(2K16/CO/352) 2 ACKNOWLEDGEMENT We would like to express our gratitude
and appreciation to all</span><span id="n13" class="tooltip null_selection" title=" "> those who gave us the support to complete this project. A special thanks to our mentor and project guide, Dr.</span><span id="n14" class="tooltip null_selection" title=" ">
Rajni Jindal , Head of Department, Computer Science and Engineering,
Delhi Technological University for her inspiring guidance, constructive
criticism and valuable</span><span id="n15" class="tooltip red_selection" title=" ">
suggestion throughout this project work. We also extend our sincere
thanks to all our friends who have patiently helped us in accomplishing
this undertaking.<br><br></span><span id="n16" class="tooltip null_selection" title=" ">
Sumanyu Muku (2K16/CO/322), Vaibhav Kumar (2K16/CO/346), Vikas Tomar
(2K16/CO/352) 3 Contents 1 Abstract 7 2 Introduction 7 3 Related work 8 4
Preliminaries 8 4.1</span><span id="n17" class="tooltip null_selection" title=" ">
Page cache management . . . . . . . . . . . . . . . . . . . . . . . 9
4.2 Caching Strategy . . . . . . . . . . . . . . . . . . . . . . . . . .
. 9 4.2.1</span><span id="n18" class="tooltip null_selection" title=" ">
Write Through . . . . . . . . . . . . . . . . . . . . . . . . 10 4.2.2
Write Back . . . . . . . . . . . . . . . . . . . . . . . . . . 10 4.2.3
Write on Read . .</span><span id="n19" class="tooltip null_selection" title=" ">
. . . . . . . . . . . . . . . . . . . . . . 10 4.3 TTL Strategy . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . 11 4.4 Eviction
Strategy . . . . . . .<br><br></span><span id="n20" class="tooltip red_selection" title=" ">
. . . . . . . . . . . . . . . . . . . . 12 4.4.1 First In First Out
(FIFO) . . . . . . . . . . . . . . . . . . 12 4.4.2 Least Recently Used
(LRU) . . . . . . . .</span><span id="n21" class="tooltip null_selection" title=" ">
. . . . . . . . . 12 4.4.3 Least Frequently Used (LFU) . . . . . . . . .
. . . . . . . 13 4.4.4 CLOCK . . . . . . . . . . . . . . . . . . . . . .
. . . . . .</span><span id="n22" class="tooltip null_selection" title=" ">
13 4.4.5 Most Recently Used (MRU) . . . . . . . . . . . . . . . . . 13
4.4.6 Random Replacement (RR) . . . . . . . . . . . . . . . . . 14 4.4.7</span><span id="n23" class="tooltip red_selection" title=" ">
Low Inter-reference Recency Set (LIRS) . . . . . . . . . . 14 4.4.8 2Q .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15 4.4.9<br><br></span><span id="n24" class="tooltip null_selection" title=" ">
Adaptive Replacement Cache . . . . . . . . . . . . . . . . 15 4.4.10
Clock with Adaptive Replacement . . . . . . . . . . . . . 16 5
Reinforcement Learning 16 5.1</span><span id="n25" class="tooltip null_selection" title=" ">
Reinforcement Learning Problem . . . . . . . . . . . . . . . . . . 18
5.1.1 Environment . . . . . . . . . . . . . . . . . . . . . . . . . 19
5.1.2 Policy . . . .</span><span id="n26" class="tooltip null_selection" title=" ">
. . . . . . . . . . . . . . . . . . . . . . . . . 19 5.1.3 Rewards/Goal
. . . . . . . . . . . . . . . . . . . . . . . . 19 5.1.4 Reward
Function . . . . . . . .</span><span id="n27" class="tooltip null_selection" title=" ">
. . . . . . . . . . . . . . . 20 5.1.5 Discounted Rewards . . . . . . .
. . . . . . . . . . . . . . 20 5.1.6 Value Function . . . . . . . . . .
. . . . . . . . .<br><br></span><span id="n28" class="tooltip null_selection" title=" ">
. . . . . 21 5.1.7 Model . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . 21 5.1.8 The Bellman Equation . . . . . . . . . . . . . . . .
. . . . 22 5.2</span><span id="n29" class="tooltip null_selection" title=" ">
Markov Decision Process . . . . . . . . . . . . . . . . . . . . . . .
22 4 5.2.1 Temporal Learning (?) . . . . . . . . . . . . . . . . . . . .
23 5.2.2</span><span id="n30" class="tooltip null_selection" title=" ">
Q-learning . . . . . . . . . . . . . . . . . . . . . . . . . . . 24
5.2.3 SARSA . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
5.2.4 Expected SARSA .</span><span id="n31" class="tooltip null_selection" title=" ">
. . . . . . . . . . . . . . . . . . . . . . 25 5.3 Classes of
Algorithms . . . . . . . . . . . . . . . . . . . . . . . . 26 5.3.1
Model Based Learning . . . . .<br><br></span><span id="n32" class="tooltip null_selection" title=" ">
. . . . . . . . . . . . . . . 26 5.3.2 Model Free Learning . . . . . . .
. . . . . . . . . . . . . . 26 5.3.3 Policy Search . . . . . . . . . . .
. . . . . . . .</span><span id="n33" class="tooltip null_selection" title=" ">
. . . . . . 26 6 Our approach 27 6.1 Reinforcement Learning formation .
. . . . . . . . . . . . . . . . 27 6.1.1 State space . . . . . . . . . .
. . . . . . . .</span><span id="n34" class="tooltip null_selection" title=" ">
. . . . . . . . 27 6.1.2 Action space . . . . . . . . . . . . . . . . .
. . . . . . . . 27 6.1.3 Reward . . . . . . . . . . . . . . . . . . . .
. . . . . . . .</span><span id="n35" class="tooltip null_selection" title=" ">
28 6.1.4 Environment . . . . . . . . . . . . . . . . . . . . . . . . .
29 7 Result 29 7.1 Reward Maximization . . . . . . . . . . . . . . . . .
. . . . . . .<br><br></span><span id="n36" class="tooltip null_selection" title=" ">
30 7.2 Cache hits . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . 31 7.2.1 LFU . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . 32 7.2.2</span><span id="n37" class="tooltip null_selection" title=" ">
LRU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
7.2.3 RLCaR . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33
7.3</span><span id="n38" class="tooltip null_selection" title=" ">
Comparative Analysis . . . . . . . . . . . . . . . . . . . . . . . . 33 8
Code 34 8.1 OS page request module . . . . . . . . . . . . . . . . . . .
. . . . 34 8.2</span><span id="n39" class="tooltip null_selection" title=" ">
CacheEnv . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
35 8.3 Qlearning . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . .<br><br></span><span id="n40" class="tooltip null_selection" title=" ">
38 9 Future work 40 10 Conclusion 40 5 List of Figures 1 General
Caching Mechanism . . . . . . . . . . . . . . . . . . . . . 9 2 Cache
Management Flowchart . . .</span><span id="n41" class="tooltip null_selection" title=" ">
. . . . . . . . . . . . . . . . . 10 4 Demonstration of LRU for a
sequence of data items . . . . . . . 12 5 Demonstration of Clock
Algorithm . . . . . . . . . .</span><span id="n42" class="tooltip null_selection" title=" ">
. . . . . . . 13 6 Demonstration of MRU algorithm for a sequence of
data items. . 14 7 Demonstration of RR algorithm for a sequence of data
items. . .</span><span id="n43" class="tooltip null_selection" title=" ">
14 8 Demonstration of LIRS Mechanism . . . . . . . . . . . . . . . . .
15 9 Demonstration of 2Q algorithm. . . . . . . . . . . . . . . . . . . .<br><br></span><span id="n44" class="tooltip null_selection" title=" ">
16 10 Demonstration of the CAR algorithm. . . . . . . . . . . . . . . .
17 11 The reinforcement learning framework . . . . . . . . . . . . . . .</span><span id="n45" class="tooltip null_selection" title=" ">
18 12 Effect of a delayed reward signal . . . . . . . . . . . . . . . .
. . 24 13 Backup diagram for Q-Learning . . . . . . . . . . . . . . . .
. . .</span><span id="n46" class="tooltip null_selection" title=" "> 24
14 Backup diagram for SARSA . . . . . . . . . . . . . . . . . . . . .
25 15 Backup diagram for Expected SARSA . . . . . . . . . . . . . . .</span><span id="n47" class="tooltip null_selection" title=" ">
26 16 Reward Curve . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . 31 17 LFU Cache hits . . . . . . . . . . . . . . . . . . . . . . . .
. . . .<br><br></span><span id="n48" class="tooltip null_selection" title=" ">
32 18 LRU Cache hits . . . . . . . . . . . . . . . . . . . . . . . . . .
. . 32 19 RLCaR Cache hits . . . . . . . . . . . . . . . . . . . . . . .
. . .</span><span id="n49" class="tooltip null_selection" title=" "> 33 20 Comparitive analysis of LFU, LRU and RLCaR . . . . . . . . . .</span><span id="n50" class="tooltip null_selection" title=" ">
33 6 1 Abstract Adaptive Cache replacement strategies have shown
superior performance in comparison to classical strategies like LRU and
LFU.</span><span id="n51" class="tooltip null_selection" title=" "> Some
of these strategies like Adaptive Replacement Cache (ARC), Clock with
Adaptive Replacement (CAR) are quite effective for day to day
applications but they do</span><span id="n52" class="tooltip null_selection" title=" ">
not encode access history or truly learn from cache misses. We propose a
reinforcement learning framework, RLCaR which seeks to tackle these
limitations.<br><br></span><span id="n53" class="tooltip null_selection" title=" ">
We use TD 0 model-free algorithms like Q-Learning, Expected SARSA and
SARSA to train our agent to efficiently replace pages in cache in order
to maximize the cache</span><span id="n54" class="tooltip null_selection" title=" "> hit ratio. We also developed a memory cache simulator in order to test our approach and compare it with LRU and LFU policies.</span><span id="n55" class="tooltip null_selection" title=" "> 2 Introduction Operating systems like Linux, Unix and Windows implement caching techniques for quick access of memory pages.</span><span id="n56" class="tooltip null_selection" title=" ">
Cache is a special portion allocated in the memory or disk for quick
access of data. Pages stored in the cache need to be replaced for
allocation of new pages.<br><br></span><span id="n57" class="tooltip null_selection" title=" ">
This process of evicting existing pages from cache to make space for
new pages is done by a page replacement policy like LRU (Least recently
used page) or LFU (Least</span><span id="n58" class="tooltip null_selection" title=" "> frequently used page). If the page requested by operating system is currently allocated in cache, it is said to be a cache hit.</span><span id="n59" class="tooltip null_selection" title=" "> If the page is not allocated in cache, it needs to be allocated. Such a scenario is called a cache miss.</span><span id="n60" class="tooltip null_selection" title=" "> In order to maximize performance, cache replacement is done in a way to maximize the cache hit ratio.<br><br></span><span id="n61" class="tooltip null_selection" title=" ">
In this project we present a Reinforcement Learning based cache
replacement technique. We call it RLCaR - Reinforcement Learning Cache
Replacement.</span><span id="n62" class="tooltip null_selection" title=" ">
We train an agent with a goal to optimize the cache hit ratio. Agents
learns an optimal policy p * which helps it optimize (maximize) an
objective (cache hit ratio).</span><span id="n63" class="tooltip null_selection" title=" ">
We use reinforcement learning to train our system rather than
conventional machine or deep learning because this way, we can create a
generalized agent that can</span><span id="n64" class="tooltip null_selection" title=" "> work in various scenarios without relying on a plethora of data to train and evaluate.<br><br></span><span id="n65" class="tooltip null_selection" title=" ">
We describe our contributions as follows: � A novel reinforcement
learning for the task of page replacement � A cache simulation model to
train and test our approach</span><span id="n66" class="tooltip null_selection" title=" ">
� A probabilistic simulation for operating system�s page request
routine 7 � Comparative analysis of LRU, LFU and RLCaR The structure of
this project report is as</span><span id="n67" class="tooltip null_selection" title=" "> follows: We first present all the preliminaries required to understand our work, followed by our approach, results and code.</span><span id="n68" class="tooltip null_selection" title=" "> Finally, we mention the future work and conclude our report.<br><br></span><span id="n69" class="tooltip null_selection" title=" ">
3 Related work There have not been many works which utilize learning
the changing patterns in cache replacement mechanism. [Vietri et al.,</span><span id="n70" class="tooltip null_selection" title=" ">
2018] is one of the few works which try to learn the eviction strategy.
It tries to achieve a balance between recency and frequency.</span><span id="n71" class="tooltip null_selection" title=" "> It uses both LRU and LFU policies for its functioning. LeCAR inviolves regret minimization using reinforcement learning.</span><span id="n72" class="tooltip null_selection" title=" "> This algorithm maintains and learns a probability distribution for both LRU and LFU.<br><br></span><span id="n73" class="tooltip null_selection" title=" "> Weights are associated WLRU and WLF U to denote the regret associated when a page is evicted using the particular mechanism e.g</span><span id="n74" class="tooltip null_selection" title=" ">
if a miss occurred due to the eviction of a page by LFU then the
weights are updated in the way such that next time more importance is
given to eviction of the page</span><span id="n75" class="tooltip null_selection" title=" "> by LRU. [Surbhi, 2018] is another work which tries to employ UCB1 algorithm for page cache replacement.</span><span id="n76" class="tooltip null_selection" title=" "> It tries to tackle the problem of loss of past access information and caching of large data with infrequent access patterns.<br><br></span><span id="n77" class="tooltip null_selection" title=" ">
This algorithm treats the page cache replacement as a multi arm bandit
problem. It tries to achieve a balance between exploration and
exploitation.</span><span id="n78" class="tooltip null_selection" title=" "> It can both greedily or randomly remove a page based on reward obtained.</span><span id="n79" class="tooltip null_selection" title=" ">
The reward depends on two parameters:- 1) nj which is the number of
times a page j has been evicted and 2) T which is the number of times an
action is taken and</span><span id="n80" class="tooltip null_selection" title=" ">
a reward is received. The eviction strategy of [Alabed, 2019] treats
each cached object as a state of the Reinforcement Learning Algorithm.<br><br></span><span id="n81" class="tooltip null_selection" title=" "> When the cache is full and a page eviction is required this algorithm stops cache execution and analyzes all the states.</span><span id="n82" class="tooltip null_selection" title=" "> It then chooses the action of evicting an object or not based on past access patterns.</span><span id="n83" class="tooltip null_selection" title=" ">
If a page eviction later results in a cache miss then the agent is
penalized with a high negative reward otherwise it is given a positive
reward.</span><span id="n84" class="tooltip null_selection" title=" "> 4
Preliminaries In this section we�ll be discussing about the
pre-requisites required to understand and go through this report, in
order to maximise the take-away.<br><br></span><span id="n85" class="tooltip null_selection" title=" ">
Reinforce8 Figure 1: General Caching Mechanism ment Learning for
efficient Cache management requires one to have a knowledge certain
basic concepts such as: Page</span><span id="n86" class="tooltip null_selection" title=" "> Cache Management, Eviction/Page Replacement, Least Recently Used and Least Frequently used page replacement algorithms.</span><span id="n87" class="tooltip null_selection" title=" ">
In the following section, we�ll cover Reinforcement learning, Markov
Decision Process, Model free and Model Dependent learning, Q-learning,
SARS and Expected SARSA.</span><span id="n88" class="tooltip null_selection" title=" ">
We�ve tried to cover each topic in sufficient depth so that the
application of our project becomes as much clear as possible. 4.1<br><br></span><span id="n89" class="tooltip null_selection" title=" ">
Page cache management Cache Memory is a type of memory which is used to
speed up the data access process. Caching tries to achieve a balance
between speed and size.</span><span id="n90" class="tooltip null_selection" title=" "> Cache memory is small but the latency produced while accessing data is the least of all storage devices.</span><span id="n91" class="tooltip null_selection" title=" ">
Page cache is a type of disk cache which is responsible for caching
pages which are derived from accessing files in the secondary storage.</span><span id="n92" class="tooltip null_selection" title=" ">
The operating system is responsible for managing the page cache using
page memory management. The kernel keeps the page cache in the unused
section of the main memory.<br><br></span><span id="n93" class="tooltip null_selection" title=" "> Cache Management task is performed by a Cache Manager (CM).</span><span id="n94" class="tooltip null_selection" title=" ">
Cache manager involves following tasks: 1) deciding whether to cache an
object or not 2) time for which an object should be cached 3) which
object to remove when</span><span id="n95" class="tooltip null_selection" title=" ">
the cache is full. 4.2 Caching Strategy Caching strategy is used to
determine whether an object is supposed to be cached or not.</span><span id="n96" class="tooltip null_selection" title=" "> If the requested page is found in the cache then there is a cache hit otherwise it is a cache miss.<br><br></span><span id="n97" class="tooltip null_selection" title=" ">
When a cache miss is occurred them the missed page is to be retrieved
from the secondary slower storage. These are also known as cache
coherence strategies.</span><span id="n98" class="tooltip null_selection" title=" ">
There are 3 kinds of these strategies: 1) Write Through 2) Write Back
3) Write on Read. 9 Figure 2: Cache Management Flowchart 4.2.1</span><span id="n99" class="tooltip null_selection" title=" ">
Write Through In write through the data is written to both cache and
the backend slower storage. It is good for processes that have read
after write access patterns.</span><span id="n100" class="tooltip null_selection" title=" "> However, it is quite slow to write as write operation occurs simultaneously at two places. 4.2.2<br><br></span><span id="n101" class="tooltip null_selection" title=" "> Write Back Write back is a more complex strategy which dosen�t write to the back-end immediately.</span><span id="n102" class="tooltip null_selection" title=" ">
It keeps track of locations which were written over and marks them as
dirty, When the pages are evicted from cache then they are written to
the back-end store.</span><span id="n103" class="tooltip null_selection" title=" ">
Read operations on an average tend to be slower as if a altered page is
to be read then first it is written over to the backend store and then
is read from there.</span><span id="n104" class="tooltip null_selection" title=" ">
It is also known as lazy write. 4.2.3 Write on Read In this strategy
the data is brought into the cache when a read operation is invoked.<br><br></span><span id="n105" class="tooltip null_selection" title=" "> When there is a cache miss then the data is read from the slower backend and is also brought into the cache at the same time.</span><span id="n106" class="tooltip null_selection" title=" ">
The data object remains in the cache until TTL. There is always a
chance of cache miss in this mechanism when a read operation is called
after a write operation.</span><span id="n107" class="tooltip null_selection" title=" "> 10 (a) Write Through Mechanism. (b) Write Back Mechanism 4.3</span><span id="n108" class="tooltip null_selection" title=" ">
TTL Strategy Time To Live (TTL) strategy is used to ensure data
freshness and make sure not to clutter cache with unwanted objects.<br><br></span><span id="n109" class="tooltip null_selection" title=" "> TTL is an integer which denotes 11 the time for which an object is kept in the cache after which it is evicted.</span><span id="n110" class="tooltip null_selection" title=" "> This strategy is used in order to tackle the problem of data staleness.</span><span id="n111" class="tooltip null_selection" title=" ">
Data staleness occurs when an object is updated in the backend storage
but not in the cache or when the write operation somehow fails in cache
but succeeds in the</span><span id="n112" class="tooltip null_selection" title=" ">
backend store. Underestimating the value of TTL can lead to various
cache misses whereas overestimating it can clutter it with unwanted
objects. 4.4<br><br></span><span id="n113" class="tooltip null_selection" title=" "> Eviction Strategy A cache eviction strategy is used when the cache is full.</span><span id="n114" class="tooltip null_selection" title=" ">
It is used to determine which data object to remove to make space for
another data object so that cache hit ratio remains maximum.</span><span id="n115" class="tooltip null_selection" title=" "> There are various cache eviction strategies out there. Some of the following have been explained below:- 4.4.1</span><span id="n116" class="tooltip null_selection" title=" ">
First In First Out (FIFO) FIFO algorithm is used for evicting objects
which were first inserted in the cache irrespective of the access
patterns.<br><br></span><span id="n117" class="tooltip null_selection" title=" "> It is useful for events which are linear in occurrence e.g.</span><span id="n118" class="tooltip null_selection" title=" ">
it is common to go back to a page which was visited recently but it is
uncommon to go to a page which was accessed quite earlier. 4.4.2</span><span id="n119" class="tooltip null_selection" title=" ">
Least Recently Used (LRU) LRU evicts the data item which was least
recently used. It keeps track of the access history of each data item.</span><span id="n120" class="tooltip null_selection" title=" "> It is a popular algorithm due to its simplicity and applicability in real world scenarios.<br><br></span><span id="n121" class="tooltip null_selection" title=" "> Generally �age bits� are tracked and the LRU algorithm makes decision on the basis of these �age bits�.</span><span id="n122" class="tooltip null_selection" title=" ">
Figure 4: Demonstration of LRU for a sequence of data items 12 4.4.3
Least Frequently Used (LFU) LFU keeps track of the frequency of the data
items.</span><span id="n123" class="tooltip null_selection" title=" "> It evicts those items which have been least frequently used.</span><span id="n124" class="tooltip null_selection" title=" ">
It is similar to LRU, but instead of keeping track of the access
history of a data item it keeps track of how many times the item was
accessed.<br><br></span><span id="n125" class="tooltip null_selection" title=" ">
It is useful in situations where popular data item is accessed
throughout the application life cycle. For e.g. the icon of a website.
4.4.4</span><span id="n126" class="tooltip null_selection" title=" ">
CLOCK The CLOCK algorithm is similar to second chance algorithm but
instead of moving the data items again and again to the end of the list,
it implement the cache</span><span id="n127" class="tooltip null_selection" title=" "> as a circular list. Each data item has a R bit. The hand or the iterator of the circular list points towards the oldest page.</span><span id="n128" class="tooltip null_selection" title=" ">
When a page fault occurs, then the hand checks the R bit of the data
item it is referencing to. If the R bit is 0 them the page is evicted.<br><br></span><span id="n129" class="tooltip null_selection" title=" "> If the R bit is 1 then the bit is cleared and the hand is moved to the next data item.</span><span id="n130" class="tooltip null_selection" title=" "> This continues until it finds a data item whose bit has been cleared and it evicts that page.</span><span id="n131" class="tooltip null_selection" title=" ">
This algorithm tries to find a page which is both old and unused in the
clock interval. Figure 5: Demonstration of Clock Algorithm 4.4.5</span><span id="n119" class="tooltip red_selection" title=" ">
Most Recently Used (MRU) MRU algorithm discards the most recently used
item in contrast to LRU. It is useful in applications which are
processed sequentially once.<br><br></span><span id="n133" class="tooltip null_selection" title=" ">
13 Figure 6: Demonstration of MRU algorithm for a sequence of data
items. 4.4.6 Random Replacement (RR) RR algorithm randomly selects a
data item for eviction.</span><span id="n134" class="tooltip null_selection" title=" ">
It dosen�t keep any information regarding the access history of the
data item. Figure 7: Demonstration of RR algorithm for a sequence of
data items. 4.4.7</span><span id="n135" class="tooltip null_selection" title=" ">
Low Inter-reference Recency Set (LIRS) LIRS is an improvement over LRU
algorithm. It uses reuse distance as its metric for choosing the page
for eviction.</span><span id="n136" class="tooltip red_selection" title=" "> Reuse distance is the number of distinct pages accessed between two references of a page.<br><br></span><span id="n137" class="tooltip red_selection" title=" "> For a page which was accessed for the first time, the reuse distance is infinite.</span><span id="n138" class="tooltip null_selection" title=" ">
In order to take recency into consideration, the LIRS algorithm uses
larger value between reuse distance and recency of a page. This metric
is known as RD-R.</span><span id="n139" class="tooltip null_selection" title=" "> So pages are ranked according to the RD-R and the worst one is then evicted. 14 Figure 8: Demonstration of LIRS Mechanism 4.4.8</span><span id="n140" class="tooltip null_selection" title=" ">
2Q [Johnson and Shasha, 1994] proposed the 2Q page replacement
algorithm. 2Q uses two buffers:- 1) a secondary buffer (As) 2) a main
buffer (Am).<br><br></span><span id="n141" class="tooltip null_selection" title=" "> When a page is referenced for the first time it is put at the head of the As.</span><span id="n142" class="tooltip null_selection" title=" "> When a page present in As is referenced then it is put at the head of Am queue which is itself a LRU queue.</span><span id="n143" class="tooltip null_selection" title=" ">
When a page is to be removed from Am the least recently used one is
removed and put in the A1 queue. From the As queue, the page present at
the tail is removed.</span><span id="n144" class="tooltip null_selection" title=" "> 4.4.9 Adaptive Replacement Cache The ARC algorithm was proposed by [Megiddo and Modha, 2003].The</span><span id="n145" class="tooltip null_selection" title=" "> ARC algorithm employs multiple queues for tracking both recency and frequency.<br><br></span><span id="n146" class="tooltip null_selection" title=" ">
The queues used in this algorithm are as follows: � A T1 queue for
caching recent entries. � A T2 queue for caching entries on the basis of
frequency.</span><span id="n147" class="tooltip null_selection" title=" "> � A B1 queue which is a ghost queue for keeping the metadata of the pages evicted from T1.</span><span id="n148" class="tooltip null_selection" title=" ">
� A B2 queue which is similar to B2 queue but is used for storing the
metadata for T2. 15 Figure 9: Demonstration of 2Q algorithm. 4.4.10</span><span id="n149" class="tooltip null_selection" title=" "> Clock with Adaptive Replacement The CAR algorithm was proposed by [Bansal and Modha, 2004].<br><br></span><span id="n150" class="tooltip null_selection" title=" ">
CAR like the ARC algorithm uses multiple lists for tracking recency and
frequency. It also ensures that there is scan resistance and lock
contention is achieved.</span><span id="n151" class="tooltip null_selection" title=" "> The T1 and T2 queues used in this case are circular lists.</span><span id="n152" class="tooltip red_selection" title=" ">
5 Reinforcement Learning Machine Learning is broadly divided into three
categories: Supervised Learning, Unsupervised Learning and
Reinforcement Learning.</span><span id="n153" class="tooltip null_selection" title=" ">
Supervised learning involves a function that maps an input value to an
output target value based on 16 Figure 10: Demonstration of the CAR
algorithm.<br><br></span><span id="n154" class="tooltip red_selection" title=" "> example input-output pairs of value. In other words we are provided with a labeled training data.</span><span id="n155" class="tooltip null_selection" title=" ">
In contrast to this, unsupervised learning methods aims to extract
relevant features from unlabelled data, encapsulating their useful
properties with minimum errors.</span><span id="n156" class="tooltip null_selection" title=" ">
The third category, which we�ll be discussing in detail in this report
further is called the Reinforcement Learning, which has an agent that
learns about the constraints</span><span id="n157" class="tooltip null_selection" title=" "> and decision in real-time while interacting with the environment.<br><br></span><span id="n158" class="tooltip null_selection" title=" ">
In the past few years, Reinforcement Learning (RL) or adaptive (or
approximate) dynamic programming (ADP), came up as an effective tool for
evaluating complicated</span><span id="n159" class="tooltip red_selection" title=" "> sequential decision-making problems.</span><span id="n160" class="tooltip null_selection" title=" ">
Although many influential studies that were conducted in this area
within the artificial intelligence (AI) network, more recently, it has
been a point of attraction</span><span id="n161" class="tooltip null_selection" title=" "> for optimization researchers because of many notable achievement memories from operations control.<br><br></span><span id="n162" class="tooltip null_selection" title=" "> The fulfilment of Reinforcement Learning is because of its sturdy mathematical backgrounds.</span><span id="n163" class="tooltip null_selection" title=" ">
A recurrent problem in Machine Learning involves making the agent learn
when and what action to perform in order to achieve the desired goal or
complete certain</span><span id="n164" class="tooltip null_selection" title=" "> task in a given environment. During the learning phase the agent constantly interacts with the 17 5.1</span><span id="n165" class="tooltip null_selection" title=" ">
Reinforcement Learning Problem According to [Andrew, 1999],
Reinforcement Learning is defined as a learning method where the learner
or the agent constantly interact</span><span id="n166" class="tooltip null_selection" title=" "> with the environment in an hit and trial manner and aims at minimizing the errors encountered.<br><br></span><span id="n167" class="tooltip null_selection" title=" "> Unlike other machine learning methods, the agent isn�t told beforehand what actions to perform.</span><span id="n168" class="tooltip null_selection" title=" ">
Instead, it is the responsibility of the agent itself to explore and
exploit it�s environment and figure out the suitable action.</span><span id="n169" class="tooltip null_selection" title=" ">
This process is derived by the rewards obtained by the agent and the
goal of the agent in every step is to maximise the future rewards(or
statistically speaking,</span><span id="n170" class="tooltip null_selection" title=" ">
the highest sum of expected reward), usually in search for a objective
or a target space which is represented numerically by a large reward.<br><br></span><span id="n171" class="tooltip null_selection" title=" ">
Every reinforcement learning problem possess similar modules: an agent
or learner, a reward function or teacher, a performance measurement
depicting how well the</span><span id="n172" class="tooltip null_selection" title=" "> agent is doing based upon tests represented numerically by rewards and few more variable.</span><span id="n173" class="tooltip null_selection" title=" ">
Figure 11: The reinforcement learning framework Figure 11 depicts the
fundamental concept of reinforcement learning. At every individual time
step t = 1,2,3,...</span><span id="n174" class="tooltip null_selection" title=" "> the agent being in certain state st ? S and takes one of the possible available actions at ? A(s).<br><br></span><span id="n175" class="tooltip null_selection" title=" "> The action will cause a change in environment and takes it to a new state st + 1.</span><span id="n176" class="tooltip null_selection" title=" ">
In addition to this, a reward Rt + 1 is received from the environment
which serves as a feedback about the goodness of the taken actions at in
state st.</span><span id="n177" class="tooltip red_selection" title=" ">
Alongwith agent and environment, there are four additional
sub-components of a reinforcement learning system: a policy, a reward
function, a value function, and</span><span id="n178" class="tooltip null_selection" title=" ">
optionally, a model of the environment. 18 5.1.1 Environment An
environment comprises a world for the agent or the learner to act and
learn.<br><br></span><span id="n179" class="tooltip null_selection" title=" ">
For example in any arcade game, our player would be an agent and it�s
surroundings(say a maze) in the particular level or episode would be
considered as it�s environment.</span><span id="n180" class="tooltip null_selection" title=" "> In order to define an event within the environment, we�ve considered the state to be a vector. 5.1.2</span><span id="n181" class="tooltip null_selection" title=" "> Policy A policy specifies the way of behaving for the learning agent at any given time.</span><span id="n182" class="tooltip null_selection" title=" ">
Roughly speaking, a policy maps the various states of the environment
to possible actions that can taken when the agent is in those states.<br><br></span><span id="n183" class="tooltip red_selection" title=" ">
At times the policy may be basic function or simply a lookup table,
whereas in some cases it may involve extensive computation such as a
proper search process.</span><span id="n184" class="tooltip null_selection" title=" "> The policy is the heart and soul of a reinforcement learning agent as it alone is enough to determine behaviour of the agent.</span><span id="n185" class="tooltip red_selection" title=" "> Policies can be either deterministic or stochastic.</span><span id="n186" class="tooltip null_selection" title=" ">
Deterministic policy is formulated as: p(st) = at , st ? S and at ? A
(1) Stochastic policy is formulated as: p(at|st) = P[A = at|S = st] , st
? S and at ? A (2)</span><span id="n187" class="tooltip null_selection" title=" ">
where P[A = at|S = st] is the probability of taking particular action
conditioned on being in some state and lies between 0 and 1.<br><br></span><span id="n188" class="tooltip null_selection" title=" "> S and A are the set of states and possible actions respectively. 5.1.3</span><span id="n189" class="tooltip null_selection" title=" "> Rewards/Goal In a Reinforcement learning methods, an agent will learn by reinforcement or hit and trail.</span><span id="n190" class="tooltip null_selection" title=" ">
A negative reward or a penalty will lead to and unwanted reactions.
Conversely, a series of positive rewards leads towards a good policy.</span><span id="n191" class="tooltip null_selection" title=" "> According to [Andrew, 1999], the aim of any agent must be to maximise the accumulate sum of rewards: Rt = <t +="" <t+1="" <t+2="" <t+3="" ...<br=""><br></t></span><span id="n192" class="tooltip null_selection" title=" "> + <t (3)="" 19="" where="" rt="" denotes="" the="" return,="" t="" a="" particular="" time="" step="" upto="" t.<="" span=""><span id="n193" class="tooltip red_selection" title=" "> For stochastic environment, the goal is to maximise the expected value of the return within the task.</span><span id="n194" class="tooltip null_selection" title=" "> The above equation can be rewritten as follow: Rt = X T k=0 <t+k+1 (4)="" however="" the="" future="" reward="" may="" have="" a="" different="" present="" value.<="" span=""><span id="n195" class="tooltip null_selection" title=" "> This rate is referred in as discount factor. A discount factor can be defined as a learning value, varying from 0 = ? = 1.<br><br></span><span id="n196" class="tooltip null_selection" title=" "> Each future reward at time stamp t will be discounted by ? k-1 .</span><span id="n197" class="tooltip null_selection" title=" "> If ? approaches zero, the reward in the near future to the present state is considered more valuable by the agent.</span><span id="n198" class="tooltip null_selection" title=" "> Conversely, if ? is close to one, then the agent treats future rewards with equal importance and behaves greedily.</span><span id="n199" class="tooltip null_selection" title=" "> The return for a specific policiy can be formulated as: Rt = X T k=0 ? k<t+k+1 (5)="" 5.1.4<br=""><br></t+k+1></span><span id="n200" class="tooltip null_selection" title=" "> Reward Function Reward function defines the goal of any reinforcement learning problem.</span><span id="n201" class="tooltip null_selection" title=" ">
It maps each input state(or state-action pair) in an environment to a
single numerical value, called a reward, depicting the innate preference
of that state.</span><span id="n202" class="tooltip red_selection" title=" "> The ultimate goal of the reinforcement learning agent is to maximise the rewards it receives in the long run.</span><span id="n203" class="tooltip null_selection" title=" "> The reward function defines how good or bad certain actions can be for our agent.<br><br></span><span id="n204" class="tooltip null_selection" title=" "> The utmost important characteristic of the reward function is that it cannot be changed by the agent.</span><span id="n205" class="tooltip null_selection" title=" "> It may, however, serve the basis of altering the policy of our system.</span><span id="n206" class="tooltip null_selection" title=" ">
For example, an action chosen by policy might yield a low reward which
will lead to change in policy so that some other action is chosen in
that situation in the</span><span id="n207" class="tooltip null_selection" title=" "> future. 5.1.5<br><br></span><span id="n208" class="tooltip null_selection" title=" ">
Discounted Rewards In order to train an effective agent, the goal
should be to maximise the reward in the long run instead of just caring
about the immediate returns.</span><span id="n209" class="tooltip null_selection" title=" "> The discounted expected return is defined as the cumulative sum of future returns and can be 20 found in (6).</span><span id="n210" class="tooltip null_selection" title=" "> The discount factor ? ? [0, 1] tells how to weight the distant rewards. Rt = Rt+1 + ?Rt + 2 + ? 2Rt+2 + ...</span><span id="n211" class="tooltip null_selection" title=" ">
= X inf k=0 ? kRt+k+1 (6) It can be divided into two categories, which
are as follow: � Episodic: We can divide the training process into
episodes.<br><br></span><span id="n212" class="tooltip null_selection" title=" ">
Episode is considered to be over when the agent reaches a terminal
state, which resets the scene and the agent has to start again in the
next episode.</span><span id="n213" class="tooltip null_selection" title=" "> The immediate reward for terminal state is 0 and cam be reached in T finite time steps.</span><span id="n214" class="tooltip null_selection" title=" "> � Continuous: We cannot form episodes as it is a continuous ongoing process such that T = inf. 5.1.6</span><span id="n182" class="tooltip null_selection" title=" ">
Value Function While the reward function defines what�s good in an
immediate sense, the value function defines what would be good for the
agent in the far sight.<br><br></span><span id="n216" class="tooltip red_selection" title=" ">
Roughly speaking, the value of the state is the total amount of reward
an agent can expect to collect over the steps taken in future, starting
from the present state.</span><span id="n217" class="tooltip null_selection" title=" ">
Whereas rewards determine the immediate, inherent preference for the
states, values determine the long term favourablity of state after
considering the states that</span><span id="n218" class="tooltip null_selection" title=" "> are likely to follow, and the rewards that will be received from those states.</span><span id="n182" class="tooltip null_selection" title=" ">
For example, a state yielding a low immediate reward might have high
value because of it being regularly followed by states of high rewards
and vice-versa.<br><br></span><span id="n220" class="tooltip null_selection" title=" "> In a system following a policy p, the value Vp is give by the expected rewards from that state.</span><span id="n221" class="tooltip null_selection" title=" "> Mathematically, it is expressed as: Vp(state) = Ep[Rt + ?Rt+1 + ? 2Rt+2 + ...|St = state] (7) 5.1.7</span><span id="n222" class="tooltip null_selection" title=" "> Model Model of the environment imitates the behaviour of the environment itself.</span><span id="n223" class="tooltip red_selection" title=" ">
For instance, given a state and action, the model is capable of
predicting the resultant state and reward. Generally, models are used
for planning, i.e.,<br><br></span><span id="n224" class="tooltip null_selection" title=" ">
the ways of deciding on a way of action by taking into account the
possible prospect situation before they actually happen.Earlier</span><span id="n225" class="tooltip null_selection" title=" ">
reinforcement learning system were 21 explicitly trial-and-error
learners; where they did something that was almost opposite of planning.
5.1.8</span><span id="n226" class="tooltip null_selection" title=" ">
The Bellman Equation Mathematician Richard Bellman derived the very
famous Bellman Equation which allow us to start solving Markovian
Decision Process(MDP).</span><span id="n227" class="tooltip null_selection" title=" ">
The Bellman equation is omnipresent in Reinforcement Learning and allow
us to understand various Reinforcement Learning algorithms working.<br><br></span><span id="n228" class="tooltip null_selection" title=" "> The importance of Bellman Equation is that it let us express the value of states as value of other state.</span><span id="n229" class="tooltip null_selection" title=" ">
It provides a recursive relation between current states and successive
states, i.e., if we know the value of st+1, we can calculate the value
of st.</span><span id="n230" class="tooltip null_selection" title=" "> Before we quote the Bellman Equations, we must understand how P and R are defined.</span><span id="n231" class="tooltip null_selection" title=" "> We define P and R as follows: P a ss0 = P r(st+1 = s 0 |st = s, at = a) (8) P is the transitional probability, i.e.,<br><br></span><span id="n232" class="tooltip null_selection" title=" "> if we start at state s and take action a we end up in state s 0 with probability P a ss0 .</span><span id="n233" class="tooltip null_selection" title=" ">
R a ss0 = E[rt+1|st = s, st+1 = s 0 , at = a] (9) Ra ss0 , is another
way of writing the expected(or mean) reward that can be received when
starting from state s,</span><span id="n234" class="tooltip red_selection" title=" "> taking action a, and moving into state s 0 . Finally with these in hand we can formulate the Bellman Equation.</span><span id="n235" class="tooltip null_selection" title=" ">
The Bellman Equation for state value function is denoted as follow: vp =
X a p(a|s) X s 0 ,r P a ss0 [R a ss0 + ?vp(s 0 ] (10) For action value
function it is denoted</span><span id="n236" class="tooltip null_selection" title=" "> as follows: Q p (s, a) = X s 0 P a ss0 [R a ss0 + ? X a0 p(s 0 , a0 )Q p (s 0 , a0 )] (11) 5.2<br><br></span><span id="n237" class="tooltip null_selection" title=" ">
Markov Decision Process In order to compare real world tasks and
theoretical problems, a common framework is required and hence, we need
to mathematically formalize</span><span id="n238" class="tooltip null_selection" title=" "> this decision making process.</span><span id="n239" class="tooltip null_selection" title=" ">
According to Markov Assumption or Property, an indepen22 dence of past
and future states, means that the state and the behaviour of the
environment of the environment</span><span id="n240" class="tooltip null_selection" title=" "> at time step t are not affected by the past agent-environment interactions a1, ..., at-1. P(st+1|st, at, st-1, at-1, ...)<br><br></span><span id="n241" class="tooltip null_selection" title=" "> = P(st+1|st, at) (12) In short, the future must be independent of the past given the present.</span><span id="n242" class="tooltip null_selection" title=" ">
If the reinforcement learning task can fulfill the Markov Assumption,
it can be formulated as five-tuple M arkovDecisionP rocess(S, A, Pa ss0 ,
Ra ss0 , ?).</span><span id="n243" class="tooltip null_selection" title=" ">
� Set of states S. � Set of Actions A. A(s) is the set of available
actions in the state s. � Transitional probabilities P a ss0 : (S � A
�S) ? [0, 1].</span><span id="n244" class="tooltip null_selection" title=" ">
It is the probability of the transition from s to s� when taking action
a in state s at time step t. � Reward Probabilities P a ss0 : (S � A �
S) ? IR.<br><br></span><span id="n245" class="tooltip null_selection" title=" ">
It defines the immediate reward the agent will receives after the
transition from s to s 0 . � Discount factor ? ? [0, 1] for computing
the discounted expected return.</span><span id="n246" class="tooltip null_selection" title=" "> Markov Decision Process is finite if the states set S and actions set A are finite.</span><span id="n247" class="tooltip red_selection" title=" "> MDP formally describes a given environment for reinforcement learning given that the environment is fully observable, i.e.,</span><span id="n248" class="tooltip null_selection" title=" ">
the current state completely characterise the process. Markov Decision
Process is finite if the states set S and actions set A are finite.<br><br></span><span id="n247" class="tooltip red_selection" title=" "> MDP formally describes a given environment for reinforcement learning given that the environment is fully observable, i.e.,</span><span id="n248" class="tooltip red_selection" title=" "> the current state completely characterise the process. 5.2.1</span><span id="n251" class="tooltip null_selection" title=" ">
Temporal Learning (?) This method is very analogous to natural learning
agent, in which the farther in the past a decision was made, the less
its value Vt-k is reinforced</span><span id="n252" class="tooltip red_selection" title=" "> by the current reward signal rt as shown in the figure below.<br><br></span><span id="n253" class="tooltip null_selection" title=" ">
Temporal learning algorithms(Q-learning, SARSA) can learn directly from
the fresh experiences without the requirement of specific model of the
environment.</span><span id="n254" class="tooltip null_selection" title=" ">
The cost of being able to regularly vary the value function is paid by
23 Figure 12: Effect of a delayed reward signal the necessity of
updating the estimated values</span><span id="n255" class="tooltip null_selection" title=" ">
based on other learnt estimations. There is no need to disregard any
episodes as it can learns from every transition(state, reward, action,
next state, next reward).</span><span id="n256" class="tooltip null_selection" title=" ">
5.2.2 Q-learning Q-Learning is a groundbreaking solution to Markov
Decision Process. It is a Temporal Difference (TD) learning algorithm.<br><br></span><span id="n257" class="tooltip red_selection" title=" "> It is an off-policy algorithm which uses the greedy policy to update the Q-Table.</span><span id="n258" class="tooltip null_selection" title=" ">
Compared to Mote-Carlo methods where Q-Table is updated at end of every
episode, here Q-Table is continuously updated at every timestep.</span><span id="n259" class="tooltip null_selection" title=" ">
Figure 13: Backup diagram for Q-Learning The figure 13 represents the
backup diagram for Q-Learning and its update rule beneath it. Source:
Sutton and Barto.</span><span id="n260" class="tooltip red_selection" title=" "> 24 5.2.3 SARSA SARSA � an acronym for State-Action-Reward-State-Action is an on-policy learning algorithm.<br><br></span><span id="n261" class="tooltip null_selection" title=" "> It learns by taking actions in accordance with the current policy rather than the greedy policy.</span><span id="n262" class="tooltip null_selection" title=" ">
SARSA updates and store the Q-Values for every state-action pair in a
Q-Table as follows: Figure 14: Backup diagram for SARSA The figure 14
represents the backup</span><span id="n263" class="tooltip null_selection" title=" "> diagram for SARSA and its update rule beneath it. Source: Sutton and barto. 5.2.4</span><span id="n264" class="tooltip null_selection" title=" ">
Expected SARSA Expected SARSA is similar to SARSA with the difference
that it takes the expected value of Q-Values for every possible action
in a particular state</span><span id="n265" class="tooltip red_selection" title=" "> to update the Q-Table.<br><br></span><span id="n266" class="tooltip null_selection" title=" ">
The update equation of Expected SARSA is as follows: The figure 15
represents the backup diagram for Expected SARSA and its update rule
beneaMarkov Decision Process</span><span id="n267" class="tooltip null_selection" title=" "> is finite if the states set S and actions set A are finite.</span><span id="n247" class="tooltip red_selection" title=" "> MDP formally describes a given environment for reinforcement learning given that the environment is fully observable, i.e.,</span><span id="n269" class="tooltip null_selection" title=" "> the current state completely characterise the process. 25 Figure 15: Backup diagram for Expected SARSA 5.3<br><br></span><span id="n270" class="tooltip null_selection" title=" "> Classes of Algorithms Given all the inputs, we need to decide how the agent will learn the optimal policy.</span><span id="n271" class="tooltip null_selection" title=" "> There are three main categories of reinforcement learning algorithms which are briefly discussed below. 5.3.1</span><span id="n272" class="tooltip null_selection" title=" ">
Model Based Learning A model based reinforcement learning algorithms
takes the state, action, reward sequence as an input and then learn the
transition probability</span><span id="n273" class="tooltip null_selection" title=" "> P a ss0 .Once</span><span id="n274" class="tooltip null_selection" title=" ">
learnt, the transitions can be used to solve the Markov Decision
Process which give the optimal value function Q* , which in turn gives
the optimal policy p * .<br><br></span><span id="n275" class="tooltip null_selection" title=" "> 5.3.2 Model Free Learning In some problems, it is not required at all to learn the transitional probability P a ss0 .</span><span id="n276" class="tooltip null_selection" title=" ">
In such cases, the agent instead learns the state values or the
state-action values which corresponds to the optimal policy of the
problem, without having to learn</span><span id="n277" class="tooltip null_selection" title=" "> the state transitions. 5.3.3</span><span id="n278" class="tooltip null_selection" title=" ">
Policy Search This is the last class of the reinforcement learning
algorithms which takes in a policy and feeds it to a policy update
equation.<br><br></span><span id="n279" class="tooltip null_selection" title=" "> The update equation now modifies the original policy based on the state, action, reward during the learning process.</span><span id="n280" class="tooltip null_selection" title=" ">
26 6 Our approach For this section, we propose a reinforcement learning
based optimizing solution for the cache replacement policy - RLCaR
(Reinforcement Learning</span><span id="n281" class="tooltip null_selection" title=" "> Cache Replacement).</span><span id="n282" class="tooltip null_selection" title=" ">
In particular, we want to train an agent to learn an optimal policy (p *
) to efficiently replace frames in the memory cache in order to
maximize a certain optimization</span><span id="n283" class="tooltip null_selection" title=" "> goal metric. In our approach, we aim to maximize the cache hit ratio. 6.1 Reinforcement Learning formation 6.1.1<br><br></span><span id="n284" class="tooltip null_selection" title=" ">
State space Intuitively, a state in our environment represents the
operating system�s cache allocation structure and related statistics.</span><span id="n285" class="tooltip red_selection" title=" ">
In order to inform the agent we use two heuristics - Least recently
used (LRU) based and Least frequently used (LFU) based statistics.</span><span id="n286" class="tooltip null_selection" title=" ">
More formally, a state in our system is an n-dimensional tuple as
follows: S = (?1, ?2...., ?n) (13) here. n is the size of cache.</span><span id="n287" class="tooltip null_selection" title=" "> It denotes the maximum number of pages that can allocated at once in the memory cache.<br><br></span><span id="n288" class="tooltip null_selection" title=" ">
Each of ?i , ?i ? [1, n] is again a 2-dimensional tuple as follows: ?i =
(?i , ?i) (14) where, ?i is a counter for the number of times page i
was accessed.</span><span id="n289" class="tooltip null_selection" title=" "> And ?i is a counter for the number of timesteps since the page i has not been accessed.</span><span id="n290" class="tooltip null_selection" title=" ">
The purpose was adding these counters was that ?i will help the agent
to make decisions using the LFU heuristic while ?i will help it in
making cache replacement</span><span id="n291" class="tooltip null_selection" title=" ">
decisions using the LRU heuristic. 6.1.2 Action space The task of the
agent is to learn an optimal policy to maximize the cumulative reward
function.<br><br></span><span id="n292" class="tooltip null_selection" title=" "> For this, the agent has to make decisions regarding the eviction or cache replacement policy.</span><span id="n293" class="tooltip null_selection" title=" "> Hence, if the number of unique pages is k, the agent has to choose anyone of these k pages to evict as an action.</span><span id="n294" class="tooltip null_selection" title=" ">
Therefore, the action 27 space is a set as follows: A = {ai |?i ? [0, k
- 1]} (15) On choosing action ai , the i th page is evicted from the
cache to make space</span><span id="n295" class="tooltip null_selection" title=" ">
for a new page. 6.1.3 Reward The reward function has a paramount
significance for guiding the agent to learn an optimal policy p * .<br><br></span><span id="n296" class="tooltip null_selection" title=" ">
We want to maximize the hit ratio of out cache replacement system, thus
we reward the agent with a positive reward on taking an action that
causes a cache hit, while</span><span id="n297" class="tooltip null_selection" title=" ">
a negative reward is given for a cache miss. It is possible that the
agent takes an action to replace a page that is not even allocated in
the cache.</span><span id="n298" class="tooltip null_selection" title=" "> In this case, the agent is given a heavily negative reward.</span><span id="n299" class="tooltip null_selection" title=" ">
Mathematically, the reward function is formalized as: R(si , ai) = ?
???? ???? ? ? ? +1, Cache hit -1, Cache miss i ? C(si) -10, i /? C(si) .<br><br></span><span id="n300" class="tooltip null_selection" title=" "> (16) Where, si and ai are the current state and action.</span><span id="n301" class="tooltip null_selection" title=" ">
C(si) represents the cache set of state i or simple the list of all the
pages currently allocated in the cache in the state si . ? represents
the magnitude of reward.</span><span id="n302" class="tooltip null_selection" title=" "> A positive reward of +? is given for cache hit while a negative reward of -? is given for a cache miss.</span><span id="n303" class="tooltip null_selection" title=" "> A heavily negative reward -? is given in case the agent tries for evict a page from the cache that doesn�t exist in the cache.<br><br></span><span id="n304" class="tooltip null_selection" title=" "> This is to heavily penalize the agent on making such grave mistakes.</span><span id="n305" class="tooltip null_selection" title=" ">
In our experiments, we have used the following values of these
constants: ? = 1 (17) and, ? = 10 (18) The agent should learn an optimal
policy (p * ) by getting</span><span id="n306" class="tooltip null_selection" title=" "> positive reward for taking actions that cause cache hits and getting negative rewards for actions causing cache miss. 28 6.1.4</span><span id="n307" class="tooltip null_selection" title=" "> Environment In reinforcement learning, the agent learns through active interactions with an environment.<br><br></span><span id="n308" class="tooltip null_selection" title=" "> For our problem statement, the ideal environment would the actual operating system�s cache handling routine.</span><span id="n309" class="tooltip null_selection" title=" ">
However, training an agent in such a practical setting is impudent. We,
therefore, create a simple probability based simulator for cache
management.</span><span id="n310" class="tooltip null_selection" title=" ">
Our simulator consists of two modules - the environment and the OS. We
have created the environment by extending the OpenAI gym environment
class.</span><span id="n311" class="tooltip null_selection" title=" "> Using OpenAI gym�s base class for our own custom environment makes our approach open sourced and more robust.<br><br></span><span id="n312" class="tooltip null_selection" title=" ">
For the Operating system module, we have created a simple simulation of
the operating system system call that randomly requests access to
certain page as follows:</span><span id="n313" class="tooltip null_selection" title=" ">
ai ~ P (19) Where, ai is the requested page id sampled from a custom
uniform distribution P which is represented as follows: P = (p1, p2,
....,</span><span id="n314" class="tooltip null_selection" title=" "> pk) (20) where k is the number pages and pi is the probability of the OS requesting for page i.</span><span id="n315" class="tooltip null_selection" title=" ">
Since pi represents probability: X k i=1 pk = 1 (21) and, pi ? [0, 1]?i
? [0, k] (22) It is reasonable to sample the page requests from a
probability distribution</span><span id="n316" class="tooltip null_selection" title=" "> because operating systems make calls to certain pages more often than others.<br><br></span><span id="n317" class="tooltip null_selection" title=" ">
Following this principle, it is plausible to assume that the page
requests made by operating system follows a certain distribution.</span><span id="n318" class="tooltip null_selection" title=" "> 7 Result This section outlines the results of our work for Reinforcement Learning Cache Replacement (RLCaR).</span><span id="n319" class="tooltip null_selection" title=" ">
We first present the reward optimization using three common model-free
TD 0 RL algorithms - Q-Learning, SARSA and Expected 29 SARSA.</span><span id="n320" class="tooltip null_selection" title=" "> Following this, we do a comparative analysis between RLCaR, LRU and LFU based on number of cache hits per episode. 7.1<br><br></span><span id="n321" class="tooltip null_selection" title=" ">
Reward Maximization We train an agent to maximize our reward function
as illustrated in the section 6.1.3. We train the agent using three TD 0
algorithms: 1.</span><span id="n322" class="tooltip null_selection" title=" "> Q-Learning - Depicted by blue color in Figure 16 2. SARSA - Depicted by blue orange in Figure 16 3.</span><span id="n323" class="tooltip null_selection" title=" "> Expected SARSA - Depicted by green color in Figure 16 Figure 16 is a plot of episodic reward gains.</span><span id="n324" class="tooltip null_selection" title=" ">
We ran the agent training for total 1,000 episodes and plot each
episode�s cumulative discounted reward (as used in the Bellman Equation)
on the Y-Axis.<br><br></span><span id="n325" class="tooltip null_selection" title=" ">
The expected cumulative reward that is to be maximized is: R = E[ X T
i=1 ? * ri ] (23) here, ? is the discount factor, ri is the reward gain
at i th timestep.</span><span id="n326" class="tooltip null_selection" title=" ">
T is the episode length in number of timesteps. It should be noted that
an episode terminates after running for T timesteps in our system.</span><span id="n327" class="tooltip null_selection" title=" ">
For our experiments, we have initialized these hyperparameters as
follows: T = 4, T ? [0, 8) (24) and for the ? (discount factor), ? =
0.9,</span><span id="n328" class="tooltip null_selection" title=" "> ? ?
[0, 1] (25) We justify the choice of discount factor as ? = 0.9 by
stating that the task of the agent is to replace cache in a way to
maximize the hit ratio.<br><br></span><span id="n329" class="tooltip null_selection" title=" "> The future rewards (future cache hits) are equally important as the current reward (current cache hit).</span><span id="n330" class="tooltip null_selection" title=" ">
Therefore, to promote exploration and give global optimization based
importance to cache hits at every timestep, we set the value of ? close
to 1.</span><span id="n331" class="tooltip null_selection" title=" ">
However, we are yet to do an experimental selection of the value of
discount factor by varying it from 0 to 1. 30 Figure 16: Reward Curve
7.2</span><span id="n332" class="tooltip null_selection" title=" ">
Cache hits This section outlines the results for the percentage of cache
hits for each of the three - LRU, RLCaR and LFU cache replacement
policies.<br><br></span><span id="n333" class="tooltip null_selection" title=" ">
To account for stochasticity of operating system page request, we
perform each of our experiment 100 times and plot the mean, high and low
of cache hit percentage</span><span id="n334" class="tooltip null_selection" title=" "> in an error-bound line curve.</span><span id="n335" class="tooltip null_selection" title=" ">
The quantity on the y-axis of the following curves is the cache hit
ratio which is defined as follows: ? = 100 * |Cache hits| L (26) 31
where |Cache hits| is the</span><span id="n336" class="tooltip null_selection" title=" "> number of cache hits and L is the episode length. Higher value of ? entails better cache replacement strategy. 7.2.1<br><br></span><span id="n337" class="tooltip null_selection" title=" ">
LFU Figure 17: LFU Cache hits As observed in Figure 17, the percentage
of cache hits goes down with an increase in episode length.</span><span id="n338" class="tooltip null_selection" title=" ">
This explained by the incremental stochasticity of environment. For a
longer length of episode, the operating systems makes more number of
requests to pages in memory.</span><span id="n339" class="tooltip null_selection" title=" ">
Since each of this request is stochastic, it adds up to increasing the
overall stochasticity of the system, thereby reducing the performance.
7.2.2</span><span id="n340" class="tooltip null_selection" title=" "> LRU Figure 18: LRU Cache hits Figure 18 shows the cache hit percentage with varying episode length of the system.<br><br></span><span id="n341" class="tooltip null_selection" title=" "> It follows a trend similar to the LFU cache replacement policy. 32 7.2.3</span><span id="n342" class="tooltip null_selection" title=" ">
RLCaR Figure 19: RLCaR Cache hits Figure 19 shows the cache hit
percentage with varying episode length of the system for our
reinforcement learning based cache replacement</span><span id="n343" class="tooltip null_selection" title=" "> policy. A general trend of decrease in percentage of cache hit can be observed with an increasing episode length.</span><span id="n344" class="tooltip null_selection" title=" "> Again, stochasticity of environment can be used to explain this trend.<br><br></span><span id="n345" class="tooltip null_selection" title=" ">
However, drawing inferences from TD 0 RL based algorithms is
notoriously difficult and requires a careful mathematical analysis which
we leave for future work. 7.3</span><span id="n346" class="tooltip null_selection" title=" ">
Comparative Analysis Figure 20: Comparitive analysis of LFU, LRU and
RLCaR Figure 20 shows a comparison between the three - LRU, LFU and
RLCaR cache replacement</span><span id="n347" class="tooltip null_selection" title=" "> policies. It can be observed that our algorithm performs 33 only a little worse than LRU and LFU page replacement policies.</span><span id="n348" class="tooltip null_selection" title=" "> Also, for certain time lengths, RLCaR seem perform better than both LRU and LFU cache replacement algorithms.<br><br></span><span id="n349" class="tooltip null_selection" title=" "> The state space of such a cache replacement environment is huge and continuous.</span><span id="n350" class="tooltip null_selection" title=" ">
We expect to get much better results on approximating the policies
using an Artificial Neural Network (ANN). We leave that for the future
work, for now. 8 Code 8.1</span><span id="n351" class="tooltip null_selection" title=" ">
OS page request module 1 import gym 2 import numpy as np 3 import
pandas as pd 4 import pickle 5 from collections import defaultdict 6 7 P
= [0.3, 0.3, 0.2, 0.15,</span><span id="n352" class="tooltip null_selection" title=" ">
0.05] 8 9 class OS(): 10 """ 11 Simulate a simple OS cache handler 12
""" 13 def __init__(self, limit, n_pages): 14 super(OS, self).__init__()
15 self.limit</span><span id="n353" class="tooltip null_selection" title=" "> = limit 16 self.n_pages = n_pages 17 if len(P) != self.n_pages:</span><span id="n354" class="tooltip null_selection" title=" ">
18 raise Exception("Size mismatch for P and n_pages") 19 20 def
init_pages(self): 21 pages = {} 22 NT = defaultdict(int) 23 for i in
range(self.limit):</span><span id="n355" class="tooltip null_selection" title=" ">
24 #page_id = self.get_id() 25 page_id = i #For now let it be
sequential 26 lu = 0 #No. of timesteps ago this page was accessed ~LRU
34 27 nt = 1 #No.<br><br></span><span id="n356" class="tooltip null_selection" title=" ">
of times this page was accessed ~LFU 28 page = [lu, nt] 29 NT[page_id]
+= 1 # Update NT dict 30 pages[page_id] = page 31 return pages, NT 32 33
def get_id(self):</span><span id="n357" class="tooltip null_selection" title=" "> 34 return int(np.random.choice(np.arange(self.n_pages), p=P)) 8.2</span><span id="n358" class="tooltip null_selection" title=" ">
CacheEnv 1 import gym 2 import numpy as np 3 import pandas as pd 4
import pickle 5 from collections import defaultdict 6 from os_sim import
OS 7 8 LIMIT = 3 9 N_PAGES</span><span id="n359" class="tooltip null_selection" title=" ">
= 5 10 EPS_LEN = 3 11 POS_REW = 1 12 NEG_REW = -1 13 HEAVY_NEG_R = -10
14 15 class CacheEnv(gym.Env): 16 metadata = {'render.modes':</span><span id="n360" class="tooltip null_selection" title=" ">
['human']} 17 def __init__(self, limit=LIMIT, n_pages=N_PAGES,
eps_len=EPS_LEN): 18 super(CacheEnv, self).__init__() 19 self.limit =
limit 20 self.n_pages</span><span id="n361" class="tooltip null_selection" title=" ">
= n_pages 21 self.eps_len = eps_len 22 self.os = OS(limit, n_pages) 23
self.pages, self.NT = self.os.init_pages() 24 self.total_hits = 0 25
self.timestep</span><span id="n362" class="tooltip null_selection" title=" "> = 0 #counter; if this reaches eps_len, return done=True 26 self.done = False 35 27 self.new_page_id</span><span id="n363" class="tooltip null_selection" title=" ">
= -1 28 29 def step(self, action, test=False): 30 """ 31 First OS will
send a page id (randomly from distribution P) 32 based on the action
choose to evict a page</span><span id="n364" class="tooltip null_selection" title=" ">
33 allocate this page id cache inplace of the 'action' id 34 """ 35
self.timestep += 1 36 if self.timestep >= self.eps_len: 37 self.done</span><span id="n365" class="tooltip null_selection" title=" "> = True #Episode reached its end 38 new_page_id = self.os.get_id() #This is page requested by the OS 39 self.new_page_id</span><span id="n366" class="tooltip null_selection" title=" "> = new_page_id #Store for debugging 40 reward, hit = self.allocate_cache(action,</span><span id="n367" class="tooltip null_selection" title=" "> new_page_id) 41 if hit: 42 observation = f"This was a hit, OS asked for: {new_page_id}" 43 self.total_hits</span><span id="n368" class="tooltip null_selection" title=" "> += 1 44 else: 45 observation = f"This was not a hit, OS asked for: {new_page_id}" 46 return self.pages, reward, self.done,</span><span id="n369" class="tooltip null_selection" title=" ">
observation 47 48 def reset(self): 49 self.pages, self.NT =
self.os.init_pages() 50 self.total_hits = 0 #Intuitive 51 self.done =
False 52 return self.pages</span><span id="n370" class="tooltip null_selection" title=" ">
53 54 def render(self, mode='human'): 55 pass 56 57 def close(self): 58
pass 59 60 def if_allocated(self, id): 61 """ 62 returns true if 'id'
is allocated a cache</span><span id="n371" class="tooltip null_selection" title=" "> currently 63 """ 36 64 if id in self.pages.keys():</span><span id="n372" class="tooltip null_selection" title=" ">
65 return True 66 return False 67 68 def allocate_cache(self, action,
id): 69 """ 70 remove page 'action' 71 add page 'id' 72 """ 73 action =
int(action) 74 id =</span><span id="n373" class="tooltip null_selection" title=" ">
int(id) 75 hit = False #Page hit or not? 76 self.NT[id] += 1 77 # For
all the pages except id, increament their lu counter 78 for page_id in
self.pages.keys():</span><span id="n374" class="tooltip null_selection" title=" "> 79 if page_id == id: 80 continue 81 else: 82 self.pages[page_id][0] += 1 83 84 if action not in self.pages.keys():</span><span id="n375" class="tooltip null_selection" title=" ">
85 #Agent asked to remove a page that wasn't even allocated 86 return
HEAVY_NEG_R, hit 87 88 if self.if_allocated(id): 89 hit = True #HIT! 90
page = self.pages[id]</span><span id="n376" class="tooltip null_selection" title=" ">
91 page[0] = 0 92 page[1] += 1 93 self.pages[id] = page 94 reward =
POS_REW #pos reward for hit 95 else: 96 self.pages.pop(action) #Remove
page 'action' 97 self.pages[id]</span><span id="n377" class="tooltip null_selection" title=" "> = [0, self.NT[id]]</span><span id="n378" class="tooltip null_selection" title=" ">
#Add page 'id' 98 reward = NEG_REW #neg reward for no hit 99 100 return
reward, hit 37 101 102 if __name__ == "__main__": 103 env = CacheEnv()
104 env.reset() 8.3<br><br></span><span id="n379" class="tooltip null_selection" title=" ">
Qlearning 1 import gym 2 import numpy as np 3 import time 4 5 def
init_q(s, a, type="ones"): 6 """ 7 initialize the Q-Table 8 """ 9 if
type == "ones": 10 return</span><span id="n380" class="tooltip null_selection" title=" "> np.ones((s, a)) 11 elif type == "random": 12 return np.random.random((s, a)) 13 elif type == "zeros": 14 return np.zeros((s,</span><span id="n381" class="tooltip null_selection" title=" ">
a)) 15 16 17 def epsilon_greedy(Q, epsilon, n_actions, s, train=False):
18 """ 19 Epsilon greedy policy for exloration-exploitation tradeoff 20
""" 21 if train or</span><span id="n382" class="tooltip null_selection" title=" "> np.random.rand() >= epsilon: 22 action = np.argmax(Q[s, :]) 23 else: 24 action = np.random.randint(0,</span><span id="n383" class="tooltip null_selection" title=" ">
n_actions) 25 return action 26 27 def qlearning(alpha, gamma, epsilon,
episodes, max_steps, n_tests, render = False, test=False): 28 """ 29 A
simple Q-Learning algorithm</span><span id="n384" class="tooltip null_selection" title=" ">
30 More info: https://github.com/TimeTraveller-San/RL_from_scratch 38
31 """ 32 env = CacheEnv() # Init using default args 33 n_states,
n_actions = env.observation_space.n,<br><br></span><span id="n385" class="tooltip null_selection" title=" "> env.action_space.n</span><span id="n386" class="tooltip null_selection" title=" ">
34 Q = init_q(n_states, n_actions, type="ones") 35 timestep_reward = []
36 for episode in range(episodes): 37 print(f"Episode: {episode}") 38 s
= env.reset()</span><span id="n387" class="tooltip null_selection" title=" ">
39 a = epsilon_greedy(Q, epsilon, n_actions, s) 40 t = 0 41
total_reward = 0 42 done = False 43 while t < max_steps: 44 if
render: 45 env.render()</span><span id="n388" class="tooltip null_selection" title=" "> 46 t += 1 47 s_, reward, done, info = env.step(a) 48 total_reward += reward 49 a_ = np.argmax(Q[s_,</span><span id="n389" class="tooltip null_selection" title=" ">
:]) 50 if done: 51 Q[s, a] += alpha * ( reward - Q[s, a] ) 52 else: 53
Q[s, a] += alpha * ( reward + (gamma * Q[s_, a_]) - Q[s, a] ) 54 s, a =
s_, a_ 55 if done:</span><span id="n390" class="tooltip null_selection" title=" "> 56 if render: 57 print(f"This episode took {t} timesteps and reward: {total_reward}") 58 timestep_reward.append(total_reward)</span><span id="n391" class="tooltip null_selection" title=" ">
59 break 60 if render: 61 print(f"Here are the Q values:\n{Q}\nTesting
now:") 62 if test: 63 test_agent(Q, env, n_tests, n_actions) 64 return
timestep_reward 39</span><span id="n392" class="tooltip null_selection" title=" ">
9 Future work Eventually we want to use the experience that we learn
from this system to run on a deep reinforcement learning function
approximator that will provide</span><span id="n393" class="tooltip null_selection" title=" "> us a decision of which page we should evict.<br><br></span><span id="n394" class="tooltip null_selection" title=" "> The experience that we get over a period of time, makes this unsupervised learning problem into a supervised learning problem.</span><span id="n395" class="tooltip null_selection" title=" "> Current Linux cache behaves mostly like LRU. However, it is beneficial sometimes to behave like LFU.</span><span id="n396" class="tooltip null_selection" title=" ">
The current algorithm for eviction is somewhere in between but
certainly not the best. Hopefully function approximators could give
better results.</span><span id="n397" class="tooltip null_selection" title=" "> There has not been much research in applying AI techniques to caching.<br><br></span><span id="n398" class="tooltip null_selection" title=" ">
Hopefully because this is very much an empirical problem rather than a
theoretical one, function approximators might give us a better solution
than currently exists.</span><span id="n399" class="tooltip null_selection" title=" ">
One problem in using neural network based reinforcement learning for
kernel level caching decisions that is very outright is that, all
frameworks run in userspace</span><span id="n400" class="tooltip null_selection" title=" "> whereas caching decisions in the kernel are taken in the kernel space.</span><span id="n401" class="tooltip null_selection" title=" ">
Alternatively, we could apply this approach to user space caches and
apply simpler kernel level functions for policy gradient based
reinforcement learning to the</span><span id="n402" class="tooltip null_selection" title=" "> problem of caching.<br><br></span><span id="n403" class="tooltip null_selection" title=" ">
We could also use techniques to evaluate what policies work better and
switch between these policies accordingly on the basis of their access
patterns.</span><span id="n404" class="tooltip null_selection" title=" ">
10 Conclusion Our work is an novel attempt to learn the variations in
data traffic during file read and write operations and model these
variations for effective</span><span id="n405" class="tooltip null_selection" title=" "> page caching.</span><span id="n406" class="tooltip null_selection" title=" ">
Our method encapsulates the caching problem as a MDP (Markov Decision
Process) and makes use of RL (Reinforcemnt Learning) to optimally choose
its actions depending</span><span id="n407" class="tooltip null_selection" title=" ">
on the fluctuations encountered in the environment in order to maximize
the cache hit ratio. References [Alabed, 2019] Alabed, S. (2019).<br><br></span><span id="n408" class="tooltip null_selection" title=" ">
Rlcache: Automated cache management using reinforcement learning.
[Andrew, 1999] Andrew, A. M. (1999). Reinforcement learning: An
introduction by richard s.</span><span id="n409" class="tooltip red_selection" title=" "> sutton and andrew g. barto, adaptive computation and 40 machine learning series, mit press (bradford book), cambridge, mass.,</span><span id="n410" class="tooltip null_selection" title=" ">
1998, xviii+ 322 pp, isbn 0-262-19398-1,(hardback,� 31.95). Robotica,
17(2):229� 235. [Bansal and Modha, 2004] Bansal, S. and Modha, D. S.
(2004).</span><span id="n411" class="tooltip red_selection" title=" ">
Car: Clock with adaptive replacement. In Proceedings of the 3rd USENIX
Conference on File and Storage Technologies, FAST �04, pages 187�200,
Berkeley, CA, USA.<br><br></span><span id="n412" class="tooltip red_selection" title=" ">
USENIX Association. [Johnson and Shasha, 1994] Johnson, T. and Shasha,
D. (1994). 2q: A low overhead high performance buffer management
replacement algorithm.</span><span id="n413" class="tooltip null_selection" title=" "> In VLDB. [Megiddo and Modha, 2003] Megiddo, N. and Modha, D. S. (2003). Arc: A self-tuning, low overhead replacement cache.</span><span id="n414" class="tooltip red_selection" title=" ">
In Proceedings of the 2Nd USENIX Conference on File and Storage
Technologies, FAST �03, pages 115� 130, Berkeley, CA, USA. USENIX
Association.</span><span id="n415" class="tooltip null_selection" title=" ">
[Surbhi, 2018] Surbhi, P. (2018). Better caching using reinforcement
learning. https://github.com/csurbhi/UCB1-algorithm-for-better-caching.
[Vietri et al.,<br><br></span><span id="n416" class="tooltip null_selection" title=" "> 2018] Vietri, G., Rodriguez, L. V., Martinez, W. A., Lyons, S., Liu, J., Rangaswami, R., Zhao, M., and Narasimhan, G. (2018).</span><span id="n417" class="tooltip red_selection" title=" "> Driving cache replacement with ml-based lecar.</span><span id="n418" class="tooltip red_selection" title=" ">
In Proceedings of the 10th USENIX Conference on Hot Topics in Storage
and File Systems, HotStorage�18, pages 3�3, Berkeley, CA, USA. USENIX
Association.</span>
<span id="class#" class="tooltip red_selection" title="tooltip#"> </span>
<!--Content_End-->
<!--Body_Start-->
</t+k+1></span></t></span></p>
</div>
</div>
<div class="upload-content content_info">
<div class="command-rcol">
<div class="title_right_col">Sources found:</div>
<div class="italic_hint_txt sources_hint">Click on the highlighted sentence to see sources.</div>
<ul class="list_link">
<h3 style="">Internet Pages</h3>
<!--Body_End-->
<li class="n1" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://ethesis.nitrkl.ac.in/1709/2/das.pdf" title="Click on the link to visit the source.">http://ethesis.nitrkl.ac.in/1709/2/das.p</a></li><li class="n3" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www.cse.iitm.ac.in/index.php" title="Click on the link to visit the source.">http://www.cse.iitm.ac.in/index.php</a></li><li class="n4" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.slideshare.net/zbhavyai/implementation-of-automatic-railway-gate-controller" title="Click on the link to visit the source.">https://www.slideshare.net/zbhavyai/impl</a></li><li class="n6" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.iitk.ac.in/dord/data/Annual_Report/AnnualReport2007-08finalfiles.pdf" title="Click on the link to visit the source.">https://www.iitk.ac.in/dord/data/Annual_</a></li><li class="n7" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/profile/Mohd_Faraz3/publication/329196488_Study_of_vehicle_dynamics_of_a_formula_student_car/links/5bfc4279458515b41d106262/Study-of-vehicle-dynamics-of-a-formula-student-car.pdf" title="Click on the link to visit the source.">https://www.researchgate.net/profile/Moh</a></li><li class="n8" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://innovaiv.com/data/Major_Thesis.pdf" title="Click on the link to visit the source.">http://innovaiv.com/data/Major_Thesis.pd</a></li><li class="n10" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www.kscst.iisc.ernet.in/spp/41_series/40S_awarded_&_selected_projs_further_devpt/40S_BE_2311.pdf" title="Click on the link to visit the source.">http://www.kscst.iisc.ernet.in/spp/41_se</a></li><li class="n11" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/profile/Nagendra_Kempegowda/publication/284240507_MODELING_SIMULATION_AND_PERFORMANCE_STUDY_OF_GRID-CONNECTED_WIND_AND_PHOTOVOLTAIC_HYBRID_ENERGY_SYSTEM/links/5650130908aefe619b122d48.pdf?origin=publication_list" title="Click on the link to visit the source.">https://www.researchgate.net/profile/Nag</a></li><li class="n14" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www-scf.usc.edu/~ppremkum/portfolio/pdf/report.pdf" title="Click on the link to visit the source.">http://www-scf.usc.edu/~ppremkum/portfol</a></li><li class="n15" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://ethesis.nitrkl.ac.in/7658/1/2015_BT_Development_Abhishek_Kumar_Gupta.pdf" title="Click on the link to visit the source.">http://ethesis.nitrkl.ac.in/7658/1/2015_</a></li><li class="n20" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.cnblogs.com/ctrltab/p/10013815.html" title="Click on the link to visit the source.">https://www.cnblogs.com/ctrltab/p/100138</a></li><li class="n21" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://codingnerves.000webhostapp.com/2018/02/c-program-lfu-least-frequently-used-page-replacement-algorithm" title="Click on the link to visit the source.">https://codingnerves.000webhostapp.com/2</a></li><li class="n23" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dl.acm.org/citation.cfm?id=511340" title="Click on the link to visit the source.">https://dl.acm.org/citation.cfm?id=51134</a></li><li class="n51" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://blogs.oracle.com/roch/general-54/rss" title="Click on the link to visit the source.">https://blogs.oracle.com/roch/general-54</a></li><li class="n53" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/325215559_Deep_Reinforcement_Learning_for_Network_Slicing" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n57" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.ijert.org/clru-a-page-replacemnet-algorithm-for-nand-flash-based-electronic-devices" title="Click on the link to visit the source.">https://www.ijert.org/clru-a-page-replac</a></li><li class="n58" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.allinterview.com/showanswers/3289/what-is-hit-ratio.html" title="Click on the link to visit the source.">https://www.allinterview.com/showanswers</a></li><li class="n60" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://aircconline.com/ijcsit/V10N2/10218ijcsit08.pdf" title="Click on the link to visit the source.">http://aircconline.com/ijcsit/V10N2/1021</a></li><li class="n61" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/221082511_Learning_based_address_mapping_for_improving_the_performance_of_memory_subsystems" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n63" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/308981137_Multi-Objective_Deep_Reinforcement_Learning" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n64" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1711.03936.pdf" title="Click on the link to visit the source.">https://arxiv.org/pdf/1711.03936.pdf</a></li><li class="n65" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/science/article/pii/S1389128618311216" title="Click on the link to visit the source.">https://www.sciencedirect.com/science/ar</a></li><li class="n70" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dl.acm.org/citation.cfm?id=2339762" title="Click on the link to visit the source.">https://dl.acm.org/citation.cfm?id=23397</a></li><li class="n72" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://cpb-us-e1.wpmucdn.com/sites.usc.edu/dist/b/364/files/2019/05/webmodel.pdf" title="Click on the link to visit the source.">https://cpb-us-e1.wpmucdn.com/sites.usc.</a></li><li class="n76" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://medium.com/@saisandeepmopuri/system-design-online-judge-with-data-modelling-40cb2b53bfeb" title="Click on the link to visit the source.">https://medium.com/@saisandeepmopuri/sys</a></li><li class="n77" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/317420360_Learning_Based_Content_Caching_and_Sharing_for_Wireless_Networks" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n79" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1909.13158" title="Click on the link to visit the source.">https://arxiv.org/pdf/1909.13158</a></li><li class="n84" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.oreilly.com/library/view/rtf-pocket-guide/9781449302047/ch01.html" title="Click on the link to visit the source.">https://www.oreilly.com/library/view/rtf</a></li><li class="n86" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.youtube.com/watch?v=ECAGOfeKU5I" title="Click on the link to visit the source.">https://www.youtube.com/watch?v=ECAGOfeK</a></li><li class="n87" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.academia.edu/630063/Generalized_markov_decision_processes_Dynamic-programming_and_reinforcement-learning_algorithms" title="Click on the link to visit the source.">https://www.academia.edu/630063/Generali</a></li><li class="n88" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://blog.close.com/follow-up-emails" title="Click on the link to visit the source.">https://blog.close.com/follow-up-emails</a></li><li class="n92" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www.ic.unicamp.br/~celio/mc404-2013/arm-manuals/Paging%20Systems.pdf" title="Click on the link to visit the source.">http://www.ic.unicamp.br/~celio/mc404-20</a></li><li class="n95" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.thefreelibrary.com/Cache-filter%3a+a+cache+permission+policy+for+information-centric...-a0442779733" title="Click on the link to visit the source.">https://www.thefreelibrary.com/Cache-fil</a></li><li class="n96" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://medium.com/system-design-blog/what-is-caching-1492abb92143" title="Click on the link to visit the source.">https://medium.com/system-design-blog/wh</a></li><li class="n99" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/Instruction_cache" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/Instructio</a></li><li class="n104" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://girdhargopalbansal.blogspot.com/2013/05/operating-system-gate-questions.html" title="Click on the link to visit the source.">https://girdhargopalbansal.blogspot.com/</a></li><li class="n105" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://stackoverflow.com/a/37826297" title="Click on the link to visit the source.">https://stackoverflow.com/a/37826297</a></li><li class="n108" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dl.acm.org/citation.cfm?id=2982114" title="Click on the link to visit the source.">https://dl.acm.org/citation.cfm?id=29821</a></li><li class="n109" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://docs.spring.io/spring-data-gemfire/docs/1.3.0.M1/reference/htmlsingle/" title="Click on the link to visit the source.">https://docs.spring.io/spring-data-gemfi</a></li><li class="n110" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.slideshare.net/BernardMarr/3-steps-to-tackle-the-problem-of-bias-in-artificial-intelligence" title="Click on the link to visit the source.">https://www.slideshare.net/BernardMarr/3</a></li><li class="n115" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.cse.iitk.ac.in/users/biswap/BITP.pdf" title="Click on the link to visit the source.">https://www.cse.iitk.ac.in/users/biswap/</a></li><li class="n116" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.9tut.com/?s=IOS" title="Click on the link to visit the source.">https://www.9tut.com/?s=IOS</a></li><li class="n119" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://wiki2.org/en/Cache_replacement_policies" title="Click on the link to visit the source.">https://wiki2.org/en/Cache_replacement_p</a></li><li class="n120" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://faculty.mccombs.utexas.edu/deepayan.chakrabarti/mywww/papers/cikm18-labelprop.pdf" title="Click on the link to visit the source.">https://faculty.mccombs.utexas.edu/deepa</a></li><li class="n122" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/220788195_LFU-K_An_Effective_Buffer_Management_Replacement_Algorithm" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n124" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/Least_Recently_Used" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/Least_Rece</a></li><li class="n126" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www2.cs.uregina.ca/~hamilton/courses/330/notes/memory/page_replacement.html" title="Click on the link to visit the source.">http://www2.cs.uregina.ca/~hamilton/cour</a></li><li class="n128" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://robust.cs.utep.edu/~freudent/tannenbaum-4e-notes/ch3-mem.html" title="Click on the link to visit the source.">http://robust.cs.utep.edu/~freudent/tann</a></li><li class="n129" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/Page_replacement_algorithm" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/Page_repla</a></li><li class="n130" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://sites.google.com/site/indusitfactory/cobol-interview-questions" title="Click on the link to visit the source.">https://sites.google.com/site/indusitfac</a></li><li class="n135" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/221595675_LIRS_An_Efficient_Low_Inter-reference_Recency_Set_Replacement_to_Improve_Buffer_Cache_Performance" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n136" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/220773479_Reuse-distance-based_Miss-rate_Prediction_on_a_Per_Instruction_Basis" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n137" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/LIRS_caching_algorithm" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/LIRS_cachi</a></li><li class="n141" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.youtube.com/watch?v=5lqdsCWN6Cg" title="Click on the link to visit the source.">https://www.youtube.com/watch?v=5lqdsCWN</a></li><li class="n142" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.cs.uic.edu/~jbell/CourseNotes/OperatingSystems/9_VirtualMemory.html" title="Click on the link to visit the source.">https://www.cs.uic.edu/~jbell/CourseNote</a></li><li class="n143" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://u.cs.biu.ac.il/~wiseman/2os/2os/os2.pdf" title="Click on the link to visit the source.">http://u.cs.biu.ac.il/~wiseman/2os/2os/o</a></li><li class="n145" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/science/article/pii/S1383762111001354" title="Click on the link to visit the source.">https://www.sciencedirect.com/science/ar</a></li><li class="n149" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.usenix.org/events/fast04/tech/bansal.html" title="Click on the link to visit the source.">https://www.usenix.org/events/fast04/tec</a></li><li class="n152" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.geeksforgeeks.org/ml-semi-supervised-learning/" title="Click on the link to visit the source.">https://www.geeksforgeeks.org/ml-semi-su</a></li><li class="n153" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3858647/" title="Click on the link to visit the source.">https://www.ncbi.nlm.nih.gov/pmc/article</a></li><li class="n154" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.cs.ubc.ca/~murphyk/Teaching/CS340-Fall07/introHandout.pdf" title="Click on the link to visit the source.">https://www.cs.ubc.ca/~murphyk/Teaching/</a></li><li class="n155" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://export.arxiv.org/rss/cs" title="Click on the link to visit the source.">http://export.arxiv.org/rss/cs</a></li><li class="n158" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://link.springer.com/article/10.1007%2Fs13748-019-00185-z" title="Click on the link to visit the source.">https://link.springer.com/article/10.100</a></li><li class="n159" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1402.0590v1" title="Click on the link to visit the source.">https://arxiv.org/pdf/1402.0590v1</a></li><li class="n164" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.hindawi.com/journals/misy/2016/4041291/" title="Click on the link to visit the source.">https://www.hindawi.com/journals/misy/20</a></li><li class="n167" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.datasciencecentral.com/profiles/blogs/reinforcement-learning-explained-overview-comparisons-and" title="Click on the link to visit the source.">https://www.datasciencecentral.com/profi</a></li><li class="n169" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://patents.google.com/patent/US9679258B2/en" title="Click on the link to visit the source.">https://patents.google.com/patent/US9679</a></li><li class="n177" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://webpages.iust.ac.ir/mozayani/Papers-pdf/v6-2943-2950.pdf" title="Click on the link to visit the source.">http://webpages.iust.ac.ir/mozayani/Pape</a></li><li class="n180" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/261974909_A_Geologic_and_Geomorphologic_Analysis_of_the_Zacatecas_and_Guadalupe_Quadrangles_in_Order_to_Define_Hazardous_Zones_Associated_with_the_Erosion_Processes" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n181" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://towardsdatascience.com/modeling-with-reinforcement-learning-47593623c7d6" title="Click on the link to visit the source.">https://towardsdatascience.com/modeling-</a></li><li class="n182" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.lesswrong.com/posts/hN2aRnu798yas5b2k/a-crash-course-in-the-neuroscience-of-human-motivation" title="Click on the link to visit the source.">https://www.lesswrong.com/posts/hN2aRnu7</a></li><li class="n183" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/topics/computer-science/markov-decision-process" title="Click on the link to visit the source.">https://www.sciencedirect.com/topics/com</a></li><li class="n185" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://spinningup.openai.com/en/latest/spinningup/rl_intro.html" title="Click on the link to visit the source.">https://spinningup.openai.com/en/latest/</a></li><li class="n187" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://crumplab.github.io/statistics/probability-sampling-and-estimation.html" title="Click on the link to visit the source.">https://crumplab.github.io/statistics/pr</a></li><li class="n189" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.freecodecamp.org/news/an-introduction-to-reinforcement-learning-4339519de419/" title="Click on the link to visit the source.">https://www.freecodecamp.org/news/an-int</a></li><li class="n193" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dzone.com/articles/reinforcement-learning-overview" title="Click on the link to visit the source.">https://dzone.com/articles/reinforcement</a></li><li class="n194" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://varunkamboj.typepad.com/files/engineering-fluid-mechanics-1.pdf" title="Click on the link to visit the source.">https://varunkamboj.typepad.com/files/en</a></li><li class="n196" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1710.11277.pdf" title="Click on the link to visit the source.">https://arxiv.org/pdf/1710.11277.pdf</a></li><li class="n197" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://immunityageing.biomedcentral.com/articles/10.1186/s12979-017-0111-6" title="Click on the link to visit the source.">https://immunityageing.biomedcentral.com</a></li><li class="n200" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1009.2566" title="Click on the link to visit the source.">https://arxiv.org/pdf/1009.2566</a></li><li class="n202" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www.cs.rochester.edu/~brown/242/assts/termprojs/reinf.pdf" title="Click on the link to visit the source.">http://www.cs.rochester.edu/~brown/242/a</a></li><li class="n203" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://courses.ece.ubc.ca/592/PDFfiles/Reinforcement_Learning_c.pdf" title="Click on the link to visit the source.">http://courses.ece.ubc.ca/592/PDFfiles/R</a></li><li class="n206" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.analyticsvidhya.com/blog/2019/11/game-theory-ai/" title="Click on the link to visit the source.">https://www.analyticsvidhya.com/blog/201</a></li><li class="n211" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.m.wikipedia.org/wiki/Explicit_memory" title="Click on the link to visit the source.">https://en.m.wikipedia.org/wiki/Explicit</a></li><li class="n216" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://nichtverfuegbar.wordpress.com/2017/07/17/reinforcement-learning/" title="Click on the link to visit the source.">https://nichtverfuegbar.wordpress.com/20</a></li><li class="n223" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://pdfs.semanticscholar.org/8a86/1120542b2b6b0dcf38d7a87bdf15f4dee38e.pdf" title="Click on the link to visit the source.">https://pdfs.semanticscholar.org/8a86/11</a></li><li class="n228" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://joshgreaves.com/reinforcement-learning/" title="Click on the link to visit the source.">https://joshgreaves.com/reinforcement-le</a></li><li class="n229" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5924785/" title="Click on the link to visit the source.">https://www.ncbi.nlm.nih.gov/pmc/article</a></li><li class="n234" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://joshgreaves.com/" title="Click on the link to visit the source.">https://joshgreaves.com/</a></li><li class="n239" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www.math.uchicago.edu/~may/VIGRE/VIGRE2007/REUPapers/FINALFULL/Volfovsky.pdf" title="Click on the link to visit the source.">http://www.math.uchicago.edu/~may/VIGRE/</a></li><li class="n240" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://medium.com/deep-math-machine-learning-ai/ch-12-reinforcement-learning-complete-guide-towardsagi-ceea325c5d53" title="Click on the link to visit the source.">https://medium.com/deep-math-machine-lea</a></li><li class="n243" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://gphu.public.iastate.edu/lecture.pdf" title="Click on the link to visit the source.">https://gphu.public.iastate.edu/lecture.</a></li><li class="n244" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://cs.brown.edu/courses/csci2951-k/papers/kaelbling98.pdf" title="Click on the link to visit the source.">http://cs.brown.edu/courses/csci2951-k/p</a></li><li class="n245" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://towardsdatascience.com/self-learning-ai-agents-part-i-markov-decision-processes-baf6b8fc4c5f?source=---------9------------------" title="Click on the link to visit the source.">https://towardsdatascience.com/self-lear</a></li><li class="n246" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/Policy_iteration" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/Policy_ite</a></li><li class="n247" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://github.com/mebusy/notes/blob/master/dev_notes/RL_DavidSilver.md" title="Click on the link to visit the source.">https://github.com/mebusy/notes/blob/mas</a></li><li class="n248" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://www0.cs.ucl.ac.uk/staff/D.Silver/web/Teaching_files/MDP.pdf" title="Click on the link to visit the source.">http://www0.cs.ucl.ac.uk/staff/D.Silver/</a></li><li class="n252" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://grey.colorado.edu/CompCogNeuro/index.php/CCNBook/Motor" title="Click on the link to visit the source.">https://grey.colorado.edu/CompCogNeuro/i</a></li><li class="n253" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/262305263_Oppositional_extension_of_reinforcement_learning_techniques" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n256" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/Q_learning" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/Q_learning</a></li><li class="n257" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://ai.stackexchange.com/questions/9024/why-are-q-values-updated-according-to-the-greedy-policy" title="Click on the link to visit the source.">https://ai.stackexchange.com/questions/9</a></li><li class="n260" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://towardsdatascience.com/introduction-to-various-reinforcement-learning-algorithms-i-q-learning-sarsa-dqn-ddpg-72a5e0cb6287" title="Click on the link to visit the source.">https://towardsdatascience.com/introduct</a></li><li class="n262" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://europepmc.org/articles/PMC5797660" title="Click on the link to visit the source.">http://europepmc.org/articles/PMC5797660</a></li><li class="n264" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.ai.rug.nl/~mwiering/GROUP/ARTICLES/CACLA_Discrete_Problems.pdf" title="Click on the link to visit the source.">https://www.ai.rug.nl/~mwiering/GROUP/AR</a></li><li class="n265" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.freecodecamp.org/news/an-introduction-to-q-learning-reinforcement-learning-14ac0b4493cc/" title="Click on the link to visit the source.">https://www.freecodecamp.org/news/an-int</a></li><li class="n270" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://faculty.ucmerced.edu/mcarreira-perpinan/teaching/CSE176/lecturenotes.pdf" title="Click on the link to visit the source.">http://faculty.ucmerced.edu/mcarreira-pe</a></li><li class="n271" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/topics/neuroscience/reinforcement-learning" title="Click on the link to visit the source.">https://www.sciencedirect.com/topics/neu</a></li><li class="n272" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1802.03006.pdf" title="Click on the link to visit the source.">https://arxiv.org/pdf/1802.03006.pdf</a></li><li class="n274" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/221619239_Double_Q-learning" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n276" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.oreilly.com/radar/reinforcement-learning-explained/" title="Click on the link to visit the source.">https://www.oreilly.com/radar/reinforcem</a></li><li class="n278" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://danieltakeshi.github.io/2017/03/28/going-deeper-into-reinforcement-learning-fundamentals-of-policy-gradients/" title="Click on the link to visit the source.">https://danieltakeshi.github.io/2017/03/</a></li><li class="n280" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/science/article/pii/S1874490718303033" title="Click on the link to visit the source.">https://www.sciencedirect.com/science/ar</a></li><li class="n285" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/320792384_Coded_Caching_Under_Arbitrary_Popularity_Distributions" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n291" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1702.07475.pdf" title="Click on the link to visit the source.">https://arxiv.org/pdf/1702.07475.pdf</a></li><li class="n294" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://practice.geeksforgeeks.org/answers/Amit+Khandelwal+1/" title="Click on the link to visit the source.">https://practice.geeksforgeeks.org/answe</a></li><li class="n295" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/science/article/pii/S0020025519309107" title="Click on the link to visit the source.">https://www.sciencedirect.com/science/ar</a></li><li class="n296" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/328327880_Caching_at_Base_Stations_With_Heterogeneous_User_Demands_and_Spatial_Locality" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n301" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://en.wikipedia.org/wiki/Wikipedia:Reference_desk/Archives/Science/December_2005" title="Click on the link to visit the source.">https://en.wikipedia.org/wiki/Wikipedia:</a></li><li class="n305" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://scienceblogs.com/startswithabang/2013/05/29/the-fundamental-constants-behind-our-universe" title="Click on the link to visit the source.">https://scienceblogs.com/startswithabang</a></li><li class="n307" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://mil.ufl.edu/5666/handouts/ERL_tae.PDF" title="Click on the link to visit the source.">https://mil.ufl.edu/5666/handouts/ERL_ta</a></li><li class="n310" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/328189466_ns3-gym_Extending_OpenAI_Gym_for_Networking_Research" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n321" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://arxiv.org/pdf/1906.02312" title="Click on the link to visit the source.">https://arxiv.org/pdf/1906.02312</a></li><li class="n325" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/322583137_Pareto_Optimal_Solutions_for_Network_Defense_Strategy_Selection_Simulator_in_Multi-Objective_Reinforcement_Learning" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n332" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://jwcn-eurasipjournals.springeropen.com/articles/10.1186/1687-1499-2012-63" title="Click on the link to visit the source.">https://jwcn-eurasipjournals.springerope</a></li><li class="n335" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://docs.microsoft.com/en-us/azure/sql-data-warehouse/sql-data-warehouse-concept-resource-utilization-query-activity" title="Click on the link to visit the source.">https://docs.microsoft.com/en-us/azure/s</a></li><li class="n336" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/228722841_A_quantitative_study_of_recency_and_frequency_based_web_cache_replacement_strategies" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n343" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.hindawi.com/journals/bmri/2018/1049257/" title="Click on the link to visit the source.">https://www.hindawi.com/journals/bmri/20</a></li><li class="n344" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://quizlet.com/41473236/exam-4-final-exam-flash-cards/" title="Click on the link to visit the source.">https://quizlet.com/41473236/exam-4-fina</a></li><li class="n350" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dl.acm.org/citation.cfm?id=2999122" title="Click on the link to visit the source.">https://dl.acm.org/citation.cfm?id=29991</a></li><li class="n401" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://news.ycombinator.com/item?id=14790251" title="Click on the link to visit the source.">https://news.ycombinator.com/item?id=147</a></li><li class="n406" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.researchgate.net/publication/268623993_Planning_with_Markov_Decision_Processes_An_AI_Perspective" title="Click on the link to visit the source.">https://www.researchgate.net/publication</a></li><li class="n407" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.sciencedirect.com/science/article/pii/S0743731518305422" title="Click on the link to visit the source.">https://www.sciencedirect.com/science/ar</a></li><li class="n409" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://www.cambridge.org/core/journals/robotica/article/reinforcement-learning-an-introduction-by-richard-s-sutton-and-andrew-g-barto-adaptive-computation-and-machine-learning-series-mit-press-bradford-book-cambridge-mass-1998-xviii-322-pp-isbn-0262193981-hardback-3195/176DB49A1247A53B75B81EFCF32CA157" title="Click on the link to visit the source.">https://www.cambridge.org/core/journals/</a></li><li class="n411" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://journal.hep.com.cn/fcs/EN/10.1007/s11704-017-6500-3" title="Click on the link to visit the source.">http://journal.hep.com.cn/fcs/EN/10.1007</a></li><li class="n412" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dl.acm.org/citation.cfm?id=3050755" title="Click on the link to visit the source.">https://dl.acm.org/citation.cfm?id=30507</a></li><li class="n413" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://theory.stanford.edu/~megiddo/bio.html" title="Click on the link to visit the source.">http://theory.stanford.edu/~megiddo/bio.</a></li><li class="n414" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://github.com/facebook/rocksdb/commit/6e78fe3c8d35fa1c0836af4501e0f272bc363bab" title="Click on the link to visit the source.">https://github.com/facebook/rocksdb/comm</a></li><li class="n417" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="http://people.cis.fiu.edu/liux/category/research/papers/" title="Click on the link to visit the source.">http://people.cis.fiu.edu/liux/category/</a></li><li class="n418" style="display: list-item;"><span style="color:#ffffff;background-color:#95a5a6;text-align:center;position:relative;display:inline-block;width:35px"><1%</span> <a class="show_texts" href="https://dl.acm.org/citation.cfm?id=2387914" title="Click on the link to visit the source.">https://dl.acm.org/citation.cfm?id=23879</a></li>
<li class="class#" style="display: list-item;"> <a class="show_texts" href="#" title="Click on the link to visit the source."></a> </li>
<!--Links_End-->
<!--Footer_Start-->
</ul>
<a href="#" title="Click on the link to see all sources" class="all_sources" style="display: none;">View all sources</a> </div>
<div class="clear"> </div>
</div>
</div>
</div>
</div>
</div>
<script type="text/javascript">
$('.show_texts').attr("title",'Click on the link to visit the source.');
$('.red_selection, .green_selection').live('click', function () {
unselect_selection();
var link_class = $(this).attr('id');
$(this).removeClass('red_selection').addClass('green_selection');
$('.list_link li').hide();
$('.list_link li.' + link_class).show();
$('.sources_hint').text('Click on the link below to visit the source.');
$('.all_sources').show();
checkListHeader();
});
//Show Sources
$('a.plag_sources, a.all_sources').click(function () {
unselect_selection();
$('.list_link li').show();
$('.sources_hint').text('Click on the link below to visit the source.');
$('.all_sources').hide();
checkListHeader();
return false;
});
//Exuecute Sources
$('.list_link li a.show_texts').click(function () {
unselect_selection();
var classList = $(this).parent().attr('class').split(/\s+/);
var url=$(this).attr('href');
if(url!='Empty'){ window.open(url, '_blank'); }
return false;
});
function unselect_selection() {
$('.green_selection').removeClass('green_selection').addClass('red_selection');
}
function checkListHeader() {
$('.list_link').each(function () {
if ($(this).find('li:visible').length) {
$(this).find('h3').show();
} else {
$(this).find('h3').hide();
}
})
}
$(function () {
$.jatt();
//Reset Selection & Show Links
unselect_selection();
$('.all_sources').hide();
$('.list_link li').show();
checkListHeader();
});
</script>
</div>
</body></html>
<!--Footer_End-->