forked from thuxugang/doudizhu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
log.txt
2092 lines (2081 loc) · 194 KB
/
log.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
learning_rate = 0.001
episode: 0, epsilon: 0.5, loss: 0, win_rate: [ 0. 0. 1.]
episode: 2000, epsilon: 0.5250899999998858, loss: 0.2945, win_rate: [ 0.09445277 0.46676662 0.43878061]
episode: 4000, epsilon: 0.555159999999749, loss: 0.0483606, win_rate: [ 0.11897026 0.45188703 0.42914271]
episode: 6000, epsilon: 0.5846699999996147, loss: 0.0617657, win_rate: [ 0.12631228 0.44009332 0.4335944 ]
episode: 8000, epsilon: 0.6143199999994797, loss: 0.0887938, win_rate: [ 0.13773278 0.43232096 0.42994626]
episode: 10000, epsilon: 0.6440799999993443, loss: 0.0697613, win_rate: [ 0.1469853 0.4309569 0.42205779]
episode: 12000, epsilon: 0.6736599999992097, loss: 0.0504912, win_rate: [ 0.15507041 0.42479793 0.42013166]
episode: 14000, epsilon: 0.703459999999074, loss: 0.0890487, win_rate: [ 0.16213128 0.41975573 0.41811299]
episode: 16000, epsilon: 0.7327599999989407, loss: 0.0360363, win_rate: [ 0.16648959 0.41816136 0.41534904]
episode: 18000, epsilon: 0.7621099999988071, loss: 0.0938439, win_rate: [ 0.17132382 0.41331037 0.41536581]
episode: 20000, epsilon: 0.7914399999986736, loss: 0.0530558, win_rate: [ 0.17579121 0.41072946 0.41347933]
episode: 22000, epsilon: 0.8209299999985394, loss: 0.162449, win_rate: [ 0.18085542 0.40843598 0.4107086 ]
episode: 24000, epsilon: 0.8503999999984053, loss: 0.0342156, win_rate: [ 0.18653389 0.4054831 0.407983 ]
episode: 26000, epsilon: 0.879479999998273, loss: 0.104467, win_rate: [ 0.19176186 0.40225376 0.40598439]
episode: 28000, epsilon: 0.9, loss: 0.0211573, win_rate: [ 0.19745723 0.39859291 0.40394986]
episode: 30000, epsilon: 0.9, loss: 0.0851524, win_rate: [ 0.20195993 0.39672011 0.40131996]
episode: 32000, epsilon: 0.9, loss: 0.106228, win_rate: [ 0.20621231 0.39548764 0.39830005]
episode: 34000, epsilon: 0.9, loss: 0.0472208, win_rate: [ 0.20940561 0.39425311 0.39634128]
episode: 36000, epsilon: 0.9, loss: 0.0424313, win_rate: [ 0.21191078 0.39298908 0.39510014]
episode: 38000, epsilon: 0.9, loss: 0.137321, win_rate: [ 0.21486277 0.39088445 0.39425278]
episode: 40000, epsilon: 0.9, loss: 0.086276, win_rate: [ 0.21736957 0.38914027 0.39349016]
episode: 42000, epsilon: 0.9, loss: 0.0411554, win_rate: [ 0.21997095 0.38806219 0.39196686]
episode: 44000, epsilon: 0.9, loss: 0.0811864, win_rate: [ 0.2226313 0.38635486 0.39101384]
episode: 46000, epsilon: 0.9, loss: 0.0357934, win_rate: [ 0.22523423 0.38520902 0.38955675]
episode: 48000, epsilon: 0.9, loss: 0.0882131, win_rate: [ 0.22745359 0.38330451 0.38924189]
episode: 50000, epsilon: 0.9, loss: 0.0540792, win_rate: [ 0.22907542 0.38265235 0.38827223]
episode: 52000, epsilon: 0.9, loss: 0.0430649, win_rate: [ 0.22988019 0.38168497 0.38843484]
episode: 54000, epsilon: 0.9, loss: 0.0392573, win_rate: [ 0.23182904 0.38003 0.38814096]
episode: 56000, epsilon: 0.9, loss: 0.0997899, win_rate: [ 0.23404939 0.37901109 0.38693952]
episode: 58000, epsilon: 0.9, loss: 0.0652466, win_rate: [ 0.23599593 0.3778728 0.38613127]
episode: 60000, epsilon: 0.9, loss: 0.26159, win_rate: [ 0.23766271 0.37667706 0.38566024]
episode: 62000, epsilon: 0.9, loss: 0.0388225, win_rate: [ 0.23954452 0.37599394 0.38446154]
episode: 64000, epsilon: 0.9, loss: 0.0423146, win_rate: [ 0.2414181 0.37518164 0.38340026]
episode: 66000, epsilon: 0.9, loss: 0.0677372, win_rate: [ 0.24328419 0.37388827 0.38282753]
episode: 68000, epsilon: 0.9, loss: 0.0319142, win_rate: [ 0.24561404 0.37289158 0.38149439]
episode: 70000, epsilon: 0.9, loss: 0.0202877, win_rate: [ 0.2476536 0.37175183 0.38059456]
episode: 72000, epsilon: 0.9, loss: 0.0463033, win_rate: [ 0.24962153 0.37060596 0.3797725 ]
episode: 74000, epsilon: 0.9, loss: 0.249103, win_rate: [ 0.25090201 0.37013013 0.37896785]
episode: 76000, epsilon: 0.9, loss: 0.0254147, win_rate: [ 0.25229931 0.36931093 0.37838976]
episode: 78000, epsilon: 0.9, loss: 0.0734374, win_rate: [ 0.25362495 0.36879014 0.3775849 ]
episode: 80000, epsilon: 0.9, loss: 0.0659173, win_rate: [ 0.25448432 0.36878289 0.37673279]
episode: 82000, epsilon: 0.9, loss: 0.0234815, win_rate: [ 0.25548469 0.36789795 0.37661736]
episode: 84000, epsilon: 0.9, loss: 0.0362075, win_rate: [ 0.25672313 0.36747182 0.37580505]
episode: 86000, epsilon: 0.9, loss: 0.159669, win_rate: [ 0.25755514 0.36656551 0.37587935]
episode: 88000, epsilon: 0.9, loss: 0.0226907, win_rate: [ 0.25881524 0.36621175 0.37497301]
episode: 90000, epsilon: 0.9, loss: 0.03181, win_rate: [ 0.26007489 0.36567371 0.3742514 ]
episode: 92000, epsilon: 0.9, loss: 0.0713771, win_rate: [ 0.26102977 0.36521342 0.37375681]
episode: 94000, epsilon: 0.9, loss: 0.0475698, win_rate: [ 0.261827 0.36494293 0.37323007]
episode: 96000, epsilon: 0.9, loss: 0.131037, win_rate: [ 0.26272643 0.3643712 0.37290237]
episode: 98000, epsilon: 0.9, loss: 0.0853201, win_rate: [ 0.26351772 0.36355751 0.37292477]
episode: 100000, epsilon: 0.9, loss: 0.0277419, win_rate: [ 0.26419736 0.36332637 0.37247628]
episode: 102000, epsilon: 0.9, loss: 0.0351002, win_rate: [ 0.26505622 0.3631533 0.37179047]
episode: 104000, epsilon: 0.9, loss: 0.0918793, win_rate: [ 0.26575706 0.3626119 0.37163104]
episode: 106000, epsilon: 0.9, loss: 0.0439326, win_rate: [ 0.26678994 0.36208149 0.37112857]
episode: 108000, epsilon: 0.9, loss: 0.115934, win_rate: [ 0.26780308 0.36199665 0.37020028]
episode: 110000, epsilon: 0.9, loss: 0.0483172, win_rate: [ 0.26868847 0.3615058 0.36980573]
episode: 112000, epsilon: 0.9, loss: 0.0893005, win_rate: [ 0.26972973 0.36096106 0.3693092 ]
episode: 114000, epsilon: 0.9, loss: 0.102788, win_rate: [ 0.27062043 0.3603828 0.36899676]
episode: 116000, epsilon: 0.9, loss: 0.10074, win_rate: [ 0.27141145 0.36022103 0.36836751]
episode: 118000, epsilon: 0.9, loss: 0.0491942, win_rate: [ 0.2722943 0.35964102 0.36806468]
episode: 120000, epsilon: 0.9, loss: 0.0595602, win_rate: [ 0.27303939 0.35943034 0.36753027]
episode: 122000, epsilon: 0.9, loss: 0.0489912, win_rate: [ 0.27376825 0.35934132 0.36689044]
episode: 124000, epsilon: 0.9, loss: 0.0648632, win_rate: [ 0.27445746 0.35885194 0.36669059]
episode: 126000, epsilon: 0.9, loss: 0.05712, win_rate: [ 0.27527559 0.35842573 0.36629868]
episode: 128000, epsilon: 0.9, loss: 0.0556854, win_rate: [ 0.27605253 0.3583097 0.36563777]
episode: 130000, epsilon: 0.9, loss: 0.0302121, win_rate: [ 0.27698248 0.35775109 0.36526642]
episode: 132000, epsilon: 0.9, loss: 0.0261284, win_rate: [ 0.27745244 0.35743669 0.36511087]
episode: 134000, epsilon: 0.9, loss: 0.0359763, win_rate: [ 0.277983 0.35711674 0.36490026]
episode: 136000, epsilon: 0.9, loss: 0.0414283, win_rate: [ 0.27847589 0.35687973 0.36464438]
episode: 138000, epsilon: 0.9, loss: 0.137298, win_rate: [ 0.27898349 0.35661336 0.36440316]
episode: 140000, epsilon: 0.9, loss: 0.0483653, win_rate: [ 0.27941943 0.35636888 0.36421168]
episode: 142000, epsilon: 0.9, loss: 0.0606391, win_rate: [ 0.28004732 0.35611017 0.36384251]
episode: 144000, epsilon: 0.9, loss: 0.0778435, win_rate: [ 0.28051889 0.35593503 0.36354609]
episode: 146000, epsilon: 0.9, loss: 0.192776, win_rate: [ 0.28094328 0.35594277 0.36311395]
episode: 148000, epsilon: 0.9, loss: 0.0472505, win_rate: [ 0.28153188 0.35555841 0.36290971]
episode: 150000, epsilon: 0.9, loss: 0.0475737, win_rate: [ 0.28223145 0.35517763 0.36259092]
episode: 152000, epsilon: 0.9, loss: 0.0321817, win_rate: [ 0.28243893 0.35519503 0.36236604]
episode: 154000, epsilon: 0.9, loss: 0.14176, win_rate: [ 0.28317349 0.35461458 0.36221193]
episode: 156000, epsilon: 0.9, loss: 0.0289719, win_rate: [ 0.2836328 0.35436311 0.36200409]
episode: 158000, epsilon: 0.9, loss: 0.0755346, win_rate: [ 0.28411845 0.35403573 0.36184581]
episode: 160000, epsilon: 0.9, loss: 0.0690059, win_rate: [ 0.28456072 0.35392279 0.36151649]
episode: 162000, epsilon: 0.9, loss: 0.0761212, win_rate: [ 0.28487478 0.35386819 0.36125703]
episode: 164000, epsilon: 0.9, loss: 0.0786304, win_rate: [ 0.28544948 0.35356492 0.3609856 ]
episode: 166000, epsilon: 0.9, loss: 0.152283, win_rate: [ 0.28585972 0.35342558 0.36071469]
episode: 168000, epsilon: 0.9, loss: 0.0375376, win_rate: [ 0.28656972 0.35289671 0.36053357]
episode: 170000, epsilon: 0.9, loss: 0.0694601, win_rate: [ 0.28709243 0.35255087 0.3603567 ]
episode: 172000, epsilon: 0.9, loss: 0.0662952, win_rate: [ 0.28761461 0.35227702 0.36010837]
episode: 174000, epsilon: 0.9, loss: 0.126031, win_rate: [ 0.28794087 0.35187154 0.36018759]
episode: 176000, epsilon: 0.9, loss: 0.0441226, win_rate: [ 0.287987 0.35173664 0.36027636]
episode: 178000, epsilon: 0.9, loss: 0.0939541, win_rate: [ 0.28838602 0.3516778 0.35993618]
episode: 180000, epsilon: 0.9, loss: 0.0806355, win_rate: [ 0.28870951 0.35121472 0.36007578]
episode: 182000, epsilon: 0.9, loss: 0.225121, win_rate: [ 0.28930061 0.35102005 0.35967934]
episode: 184000, epsilon: 0.9, loss: 0.0905873, win_rate: [ 0.28973756 0.3508894 0.35937305]
episode: 186000, epsilon: 0.9, loss: 0.0462258, win_rate: [ 0.29015435 0.35055188 0.35929377]
episode: 188000, epsilon: 0.9, loss: 0.0449081, win_rate: [ 0.29040271 0.35033856 0.35925873]
episode: 190000, epsilon: 0.9, loss: 0.0201836, win_rate: [ 0.29060373 0.35022447 0.35917179]
episode: 192000, epsilon: 0.9, loss: 0.0239741, win_rate: [ 0.29076932 0.3501388 0.35909188]
episode: 194000, epsilon: 0.9, loss: 0.0344469, win_rate: [ 0.29091087 0.35013222 0.35895691]
episode: 196000, epsilon: 0.9, loss: 0.0569738, win_rate: [ 0.29098321 0.35012576 0.35889103]
episode: 198000, epsilon: 0.9, loss: 0.0272727, win_rate: [ 0.2912359 0.3501346 0.3586295]
episode: 200000, epsilon: 0.9, loss: 0.0319101, win_rate: [ 0.29155354 0.34988325 0.35856321]
episode: 202000, epsilon: 0.9, loss: 0.0657818, win_rate: [ 0.29199855 0.34967649 0.35832496]
episode: 204000, epsilon: 0.9, loss: 0.0822009, win_rate: [ 0.29224367 0.34940515 0.35835118]
episode: 206000, epsilon: 0.9, loss: 0.0827054, win_rate: [ 0.29271703 0.34924102 0.35804195]
episode: 208000, epsilon: 0.9, loss: 0.0876145, win_rate: [ 0.29290244 0.34921467 0.35788289]
episode: 210000, epsilon: 0.9, loss: 0.0300222, win_rate: [ 0.29317003 0.34905548 0.35777449]
episode: 212000, epsilon: 0.9, loss: 0.0452345, win_rate: [ 0.2934373 0.34886628 0.35769643]
episode: 214000, epsilon: 0.9, loss: 0.086108, win_rate: [ 0.29379302 0.34877874 0.35742824]
episode: 216000, epsilon: 0.9, loss: 0.130069, win_rate: [ 0.29425327 0.34847987 0.35726686]
episode: 218000, epsilon: 0.9, loss: 0.0776821, win_rate: [ 0.29454452 0.34826904 0.35718643]
episode: 220000, epsilon: 0.9, loss: 0.0832973, win_rate: [ 0.29468048 0.34832569 0.35699383]
episode: 222000, epsilon: 0.9, loss: 0.0427337, win_rate: [ 0.2950302 0.34815158 0.35681821]
episode: 224000, epsilon: 0.9, loss: 0.0352238, win_rate: [ 0.29522636 0.34787791 0.35689573]
episode: 226000, epsilon: 0.9, loss: 0.0466583, win_rate: [ 0.29551197 0.34767988 0.35680816]
episode: 228000, epsilon: 0.9, loss: 0.0672912, win_rate: [ 0.29570484 0.3476169 0.35667826]
episode: 230000, epsilon: 0.9, loss: 0.0366899, win_rate: [ 0.29593784 0.34758545 0.35647671]
episode: 232000, epsilon: 0.9, loss: 0.116823, win_rate: [ 0.29611941 0.34756747 0.35631312]
episode: 234000, epsilon: 0.9, loss: 0.0342191, win_rate: [ 0.29646027 0.34742159 0.35611814]
episode: 236000, epsilon: 0.9, loss: 0.254224, win_rate: [ 0.29668095 0.34732056 0.35599849]
episode: 238000, epsilon: 0.9, loss: 0.104247, win_rate: [ 0.29704077 0.34718342 0.35577582]
episode: 240000, epsilon: 0.9, loss: 0.0652708, win_rate: [ 0.29731126 0.34704855 0.35564018]
episode: 242000, epsilon: 0.9, loss: 0.132359, win_rate: [ 0.29762274 0.34680022 0.35557704]
episode: 244000, epsilon: 0.9, loss: 0.043645, win_rate: [ 0.29807665 0.34653547 0.35538789]
episode: 246000, epsilon: 0.9, loss: 0.056568, win_rate: [ 0.29840936 0.34641729 0.35517335]
episode: 248000, epsilon: 0.9, loss: 0.0308357, win_rate: [ 0.29852299 0.3461478 0.35532921]
episode: 250000, epsilon: 0.9, loss: 0.122293, win_rate: [ 0.29872281 0.34604262 0.35523458]
episode: 252000, epsilon: 0.9, loss: 0.104931, win_rate: [ 0.29897104 0.34582006 0.35520891]
episode: 254000, epsilon: 0.9, loss: 0.0407437, win_rate: [ 0.29919174 0.34562462 0.35518364]
episode: 256000, epsilon: 0.9, loss: 0.0428849, win_rate: [ 0.29919024 0.34560412 0.35520564]
episode: 258000, epsilon: 0.9, loss: 0.0982826, win_rate: [ 0.2992469 0.34556455 0.35518855]
episode: 260000, epsilon: 0.9, loss: 0.105309, win_rate: [ 0.29932962 0.34557559 0.35509479]
episode: 262000, epsilon: 0.9, loss: 0.0842547, win_rate: [ 0.29941489 0.34548723 0.35509788]
episode: 264000, epsilon: 0.9, loss: 0.0204638, win_rate: [ 0.29973371 0.34528278 0.3549835 ]
episode: 266000, epsilon: 0.9, loss: 0.0500088, win_rate: [ 0.29977331 0.34524682 0.35497987]
episode: 268000, epsilon: 0.9, loss: 0.0835725, win_rate: [ 0.29993545 0.34525244 0.35481211]
episode: 270000, epsilon: 0.9, loss: 0.108896, win_rate: [ 0.30027296 0.34519131 0.35453572]
episode: 272000, epsilon: 0.9, loss: 0.0243984, win_rate: [ 0.30034081 0.34516785 0.35449134]
episode: 274000, epsilon: 0.9, loss: 0.0340141, win_rate: [ 0.30054635 0.34498779 0.35446586]
episode: 276000, epsilon: 0.9, loss: 0.059728, win_rate: [ 0.30061123 0.34490455 0.35448422]
episode: 278000, epsilon: 0.9, loss: 0.0351508, win_rate: [ 0.3007759 0.34474336 0.35448074]
episode: 280000, epsilon: 0.9, loss: 0.13776, win_rate: [ 0.30092035 0.3447202 0.35435945]
episode: 282000, epsilon: 0.9, loss: 0.0271912, win_rate: [ 0.30106985 0.34472573 0.35420442]
episode: 284000, epsilon: 0.9, loss: 0.0923988, win_rate: [ 0.30113978 0.34470301 0.3541572 ]
episode: 286000, epsilon: 0.9, loss: 0.0309716, win_rate: [ 0.30132412 0.34461768 0.3540582 ]
episode: 288000, epsilon: 0.9, loss: 0.0927393, win_rate: [ 0.3014434 0.34452311 0.35403349]
episode: 290000, epsilon: 0.9, loss: 0.0178667, win_rate: [ 0.30149551 0.34442985 0.35407464]
episode: 292000, epsilon: 0.9, loss: 0.037057, win_rate: [ 0.30160856 0.34430704 0.3540844 ]
episode: 294000, epsilon: 0.9, loss: 0.0819265, win_rate: [ 0.30177108 0.34410427 0.35412465]
episode: 296000, epsilon: 0.9, loss: 0.0592715, win_rate: [ 0.30190776 0.34418803 0.35390421]
episode: 298000, epsilon: 0.9, loss: 0.0504315, win_rate: [ 0.30205603 0.34414985 0.35379411]
episode: 300000, epsilon: 0.9, loss: 0.0286516, win_rate: [ 0.30223899 0.34405219 0.35370882]
episode: 302000, epsilon: 0.9, loss: 0.204087, win_rate: [ 0.30239635 0.34396575 0.3536379 ]
episode: 304000, epsilon: 0.9, loss: 0.0958426, win_rate: [ 0.30261085 0.34384426 0.35354489]
episode: 306000, epsilon: 0.9, loss: 0.111883, win_rate: [ 0.30275064 0.34372763 0.35352172]
episode: 308000, epsilon: 0.9, loss: 0.199278, win_rate: [ 0.30299252 0.34356382 0.35344366]
episode: 310000, epsilon: 0.9, loss: 0.103614, win_rate: [ 0.30329257 0.34347631 0.35323112]
episode: 312000, epsilon: 0.9, loss: 0.0809423, win_rate: [ 0.30340608 0.34341877 0.35317515]
episode: 314000, epsilon: 0.9, loss: 0.0548263, win_rate: [ 0.30353725 0.34336515 0.3530976 ]
episode: 316000, epsilon: 0.9, loss: 0.0386117, win_rate: [ 0.30374587 0.34320145 0.35305268]
episode: 318000, epsilon: 0.9, loss: 0.0389285, win_rate: [ 0.30369716 0.34331024 0.3529926 ]
episode: 320000, epsilon: 0.9, loss: 0.136306, win_rate: [ 0.30387405 0.34315518 0.35297077]
episode: 322000, epsilon: 0.9, loss: 0.0162871, win_rate: [ 0.30394316 0.3432567 0.35280015]
episode: 324000, epsilon: 0.9, loss: 0.0123312, win_rate: [ 0.30395894 0.34322734 0.35281373]
episode: 326000, epsilon: 0.9, loss: 0.0509802, win_rate: [ 0.30403894 0.34321674 0.35274432]
episode: 328000, epsilon: 0.9, loss: 0.129181, win_rate: [ 0.30406615 0.3431392 0.35279466]
episode: 330000, epsilon: 0.9, loss: 0.0551915, win_rate: [ 0.30410211 0.34310199 0.3527959 ]
episode: 332000, epsilon: 0.9, loss: 0.0443393, win_rate: [ 0.30416475 0.34306523 0.35277002]
episode: 334000, epsilon: 0.9, loss: 0.136224, win_rate: [ 0.30417873 0.34301095 0.35281032]
episode: 336000, epsilon: 0.9, loss: 0.182517, win_rate: [ 0.30419255 0.34288588 0.35292157]
episode: 338000, epsilon: 0.9, loss: 0.0529755, win_rate: [ 0.30422987 0.34278597 0.35298416]
episode: 340000, epsilon: 0.9, loss: 0.0434194, win_rate: [ 0.30429911 0.34268135 0.35301955]
episode: 342000, epsilon: 0.9, loss: 0.0438239, win_rate: [ 0.30433829 0.34256917 0.35309253]
episode: 344000, epsilon: 0.9, loss: 0.0497061, win_rate: [ 0.30438284 0.34246993 0.35314723]
episode: 346000, epsilon: 0.9, loss: 0.207201, win_rate: [ 0.30436328 0.34241809 0.35321863]
episode: 348000, epsilon: 0.9, loss: 0.0810478, win_rate: [ 0.30446177 0.34228637 0.35325186]
episode: 350000, epsilon: 0.9, loss: 0.0232767, win_rate: [ 0.30446199 0.34224188 0.35329613]
episode: 352000, epsilon: 0.9, loss: 0.0669126, win_rate: [ 0.3044622 0.34229448 0.35324331]
episode: 354000, epsilon: 0.9, loss: 0.0989563, win_rate: [ 0.30453304 0.34231824 0.35314872]
episode: 356000, epsilon: 0.9, loss: 0.0757773, win_rate: [ 0.30465926 0.34224342 0.35309732]
episode: 358000, epsilon: 0.9, loss: 0.0277897, win_rate: [ 0.30474217 0.34224765 0.35301019]
episode: 360000, epsilon: 0.9, loss: 0.0965361, win_rate: [ 0.30481304 0.34222405 0.35296291]
episode: 362000, epsilon: 0.9, loss: 0.10665, win_rate: [ 0.30482789 0.34225596 0.35291615]
episode: 364000, epsilon: 0.9, loss: 0.0777033, win_rate: [ 0.3048618 0.34227653 0.35286167]
episode: 366000, epsilon: 0.9, loss: 0.0140081, win_rate: [ 0.30489261 0.34222857 0.35287882]
episode: 368000, epsilon: 0.9, loss: 0.0504723, win_rate: [ 0.30496656 0.34220831 0.35282513]
episode: 370000, epsilon: 0.9, loss: 0.111004, win_rate: [ 0.30503701 0.34218016 0.35278283]
episode: 372000, epsilon: 0.9, loss: 0.0991707, win_rate: [ 0.30524649 0.34213618 0.35261733]
episode: 374000, epsilon: 0.9, loss: 0.0450218, win_rate: [ 0.30537886 0.34205256 0.35256858]
episode: 376000, epsilon: 0.9, loss: 0.0190403, win_rate: [ 0.30541674 0.34210015 0.35248311]
episode: 378000, epsilon: 0.9, loss: 0.0242864, win_rate: [ 0.30541189 0.34207846 0.35250965]
episode: 380000, epsilon: 0.9, loss: 0.047657, win_rate: [ 0.3054492 0.34202542 0.35252539]
episode: 382000, epsilon: 0.9, loss: 0.0681747, win_rate: [ 0.30546517 0.34203052 0.35250431]
episode: 384000, epsilon: 0.9, loss: 0.0903952, win_rate: [ 0.30540285 0.34209025 0.35250689]
episode: 386000, epsilon: 0.9, loss: 0.0787342, win_rate: [ 0.30543185 0.34211051 0.35245764]
episode: 388000, epsilon: 0.9, loss: 0.17759, win_rate: [ 0.30555849 0.34207644 0.35236507]
episode: 390000, epsilon: 0.9, loss: 0.0811618, win_rate: [ 0.30561973 0.34199399 0.35238628]
episode: 392000, epsilon: 0.9, loss: 0.0246235, win_rate: [ 0.30562167 0.34198382 0.35239451]
episode: 394000, epsilon: 0.9, loss: 0.0724168, win_rate: [ 0.3057378 0.34196614 0.35229606]
episode: 396000, epsilon: 0.9, loss: 0.0261151, win_rate: [ 0.30576943 0.34202186 0.35220871]
episode: 398000, epsilon: 0.9, loss: 0.0419618, win_rate: [ 0.3057128 0.34215994 0.35212726]
episode: 400000, epsilon: 0.9, loss: 0.0389928, win_rate: [ 0.30571674 0.34216664 0.35211662]
episode: 402000, epsilon: 0.9, loss: 0.117443, win_rate: [ 0.30574302 0.34208621 0.35217077]
episode: 404000, epsilon: 0.9, loss: 0.0311898, win_rate: [ 0.30576162 0.34205113 0.35218725]
episode: 406000, epsilon: 0.9, loss: 0.0517739, win_rate: [ 0.30573324 0.34200162 0.35226514]
episode: 408000, epsilon: 0.9, loss: 0.0382845, win_rate: [ 0.3058767 0.34189867 0.35222463]
episode: 410000, epsilon: 0.9, loss: 0.0194194, win_rate: [ 0.3058773 0.34190648 0.35221621]
episode: 412000, epsilon: 0.9, loss: 0.0781807, win_rate: [ 0.30596042 0.34191907 0.3521205 ]
episode: 414000, epsilon: 0.9, loss: 0.0268315, win_rate: [ 0.306021 0.3419412 0.3520378]
episode: 416000, epsilon: 0.9, loss: 0.044673, win_rate: [ 0.3060834 0.34188139 0.35203521]
episode: 418000, epsilon: 0.9, loss: 0.331834, win_rate: [ 0.30618826 0.34174799 0.35206375]
episode: 420000, epsilon: 0.9, loss: 0.0536296, win_rate: [ 0.3061826 0.3417349 0.3520825]
episode: 422000, epsilon: 0.9, loss: 0.0378078, win_rate: [ 0.30621965 0.34171483 0.35206552]
episode: 424000, epsilon: 0.9, loss: 0.0435669, win_rate: [ 0.30626579 0.34173976 0.35199445]
episode: 426000, epsilon: 0.9, loss: 0.0972483, win_rate: [ 0.30624576 0.34165647 0.35209777]
episode: 428000, epsilon: 0.9, loss: 0.0452781, win_rate: [ 0.30627265 0.34173285 0.3519945 ]
episode: 430000, epsilon: 0.9, loss: 0.017068, win_rate: [ 0.30629696 0.34177827 0.35192476]
episode: 432000, epsilon: 0.9, loss: 0.0352527, win_rate: [ 0.30636735 0.3416543 0.35197835]
episode: 434000, epsilon: 0.9, loss: 0.0870793, win_rate: [ 0.30641634 0.34169737 0.35188629]
episode: 436000, epsilon: 0.9, loss: 0.0403177, win_rate: [ 0.30641443 0.34178591 0.35179965]
episode: 438000, epsilon: 0.9, loss: 0.0226224, win_rate: [ 0.3064719 0.34176863 0.35175947]
episode: 440000, epsilon: 0.9, loss: 0.0669311, win_rate: [ 0.30658339 0.3417265 0.35169011]
episode: 442000, epsilon: 0.9, loss: 0.0893306, win_rate: [ 0.30664637 0.34168927 0.35166436]
episode: 444000, epsilon: 0.9, loss: 0.0626216, win_rate: [ 0.306693 0.34163887 0.35166813]
episode: 446000, epsilon: 0.9, loss: 0.0357122, win_rate: [ 0.30677061 0.34159789 0.3516315 ]
episode: 448000, epsilon: 0.9, loss: 0.0583008, win_rate: [ 0.30680289 0.34154834 0.35164877]
episode: 450000, epsilon: 0.9, loss: 0.0850396, win_rate: [ 0.30681265 0.34147924 0.35170811]
episode: 452000, epsilon: 0.9, loss: 0.0890502, win_rate: [ 0.30686215 0.34144393 0.35169391]
episode: 454000, epsilon: 0.9, loss: 0.0435484, win_rate: [ 0.3068958 0.3413759 0.3517283]
episode: 456000, epsilon: 0.9, loss: 0.0614234, win_rate: [ 0.30695108 0.34133916 0.35170976]
episode: 458000, epsilon: 0.9, loss: 0.212143, win_rate: [ 0.30690326 0.34135297 0.35174377]
episode: 460000, epsilon: 0.9, loss: 0.0372072, win_rate: [ 0.30693846 0.34141448 0.35164706]
episode: 462000, epsilon: 0.9, loss: 0.050082, win_rate: [ 0.30698202 0.34139104 0.35162694]
episode: 464000, epsilon: 0.9, loss: 0.0700941, win_rate: [ 0.30709417 0.34131176 0.35159407]
episode: 466000, epsilon: 0.9, loss: 0.0825135, win_rate: [ 0.3071238 0.34135334 0.35152285]
episode: 468000, epsilon: 0.9, loss: 0.0523231, win_rate: [ 0.30710404 0.34135824 0.35153771]
episode: 470000, epsilon: 0.9, loss: 0.0856748, win_rate: [ 0.3071419 0.34133757 0.35152053]
episode: 472000, epsilon: 0.9, loss: 0.0650932, win_rate: [ 0.30721333 0.34127258 0.35151409]
episode: 474000, epsilon: 0.9, loss: 0.0386773, win_rate: [ 0.30730315 0.34120181 0.35149504]
episode: 476000, epsilon: 0.9, loss: 0.0475568, win_rate: [ 0.30729767 0.34118626 0.35151607]
episode: 478000, epsilon: 0.9, loss: 0.022826, win_rate: [ 0.30734873 0.34120431 0.35144696]
episode: 480000, epsilon: 0.9, loss: 0.0322246, win_rate: [ 0.30746186 0.34111804 0.3514201 ]
episode: 482000, epsilon: 0.9, loss: 0.0829151, win_rate: [ 0.30750351 0.34106776 0.35142873]
episode: 484000, epsilon: 0.9, loss: 0.073053, win_rate: [ 0.30759441 0.34104888 0.35135671]
episode: 486000, epsilon: 0.9, loss: 0.0508463, win_rate: [ 0.30760842 0.34107955 0.35131203]
episode: 488000, epsilon: 0.9, loss: 0.0432987, win_rate: [ 0.30775347 0.34100955 0.35123699]
episode: 490000, epsilon: 0.9, loss: 0.044695, win_rate: [ 0.30788917 0.34091155 0.35119928]
episode: 492000, epsilon: 0.9, loss: 0.0176088, win_rate: [ 0.3079445 0.34089362 0.35116189]
episode: 494000, epsilon: 0.9, loss: 0.0621942, win_rate: [ 0.30806415 0.34079081 0.35114504]
episode: 496000, epsilon: 0.9, loss: 0.0405262, win_rate: [ 0.30815865 0.34078964 0.35105171]
episode: 498000, epsilon: 0.9, loss: 0.0446638, win_rate: [ 0.30824637 0.34073626 0.35101737]
episode: 500000, epsilon: 0.9, loss: 0.0184623, win_rate: [ 0.30824138 0.34071332 0.3510453 ]
episode: 502000, epsilon: 0.9, loss: 0.0599133, win_rate: [ 0.30829421 0.34070848 0.35099731]
episode: 504000, epsilon: 0.9, loss: 0.0533356, win_rate: [ 0.30833272 0.34069972 0.35096756]
episode: 506000, epsilon: 0.9, loss: 0.0336033, win_rate: [ 0.30833338 0.34072265 0.35094397]
episode: 508000, epsilon: 0.9, loss: 0.171335, win_rate: [ 0.30835569 0.34068634 0.35095797]
episode: 510000, epsilon: 0.9, loss: 0.0260027, win_rate: [ 0.30837391 0.340666 0.3509601 ]
episode: 512000, epsilon: 0.9, loss: 0.0518393, win_rate: [ 0.30842127 0.34062043 0.3509583 ]
episode: 514000, epsilon: 0.9, loss: 0.0760449, win_rate: [ 0.30844882 0.34063747 0.35091371]
episode: 516000, epsilon: 0.9, loss: 0.0549758, win_rate: [ 0.30850522 0.34064081 0.35085397]
episode: 518000, epsilon: 0.9, loss: 0.0341754, win_rate: [ 0.3085689 0.34063255 0.35079855]
episode: 520000, epsilon: 0.9, loss: 0.0795886, win_rate: [ 0.30860325 0.34058781 0.35080894]
episode: 522000, epsilon: 0.9, loss: 0.0172003, win_rate: [ 0.30863159 0.34055107 0.35081734]
episode: 524000, epsilon: 0.9, loss: 0.0352191, win_rate: [ 0.30868071 0.34044592 0.35087338]
episode: 526000, epsilon: 0.9, loss: 0.0611481, win_rate: [ 0.30876937 0.34038528 0.35084534]
episode: 528000, epsilon: 0.9, loss: 0.0598626, win_rate: [ 0.30879487 0.34032133 0.35088381]
episode: 530000, epsilon: 0.9, loss: 0.0652519, win_rate: [ 0.30882772 0.34020502 0.35096726]
episode: 532000, epsilon: 0.9, loss: 0.0702173, win_rate: [ 0.30886784 0.34019485 0.35093731]
episode: 534000, epsilon: 0.9, loss: 0.131664, win_rate: [ 0.30892639 0.3401679 0.35090571]
episode: 536000, epsilon: 0.9, loss: 0.0701256, win_rate: [ 0.3090386 0.34009638 0.35086502]
episode: 538000, epsilon: 0.9, loss: 0.101609, win_rate: [ 0.30914627 0.34006628 0.35078745]
episode: 540000, epsilon: 0.9, loss: 0.0726768, win_rate: [ 0.30916239 0.34001604 0.35082157]
episode: 542000, epsilon: 0.9, loss: 0.0563153, win_rate: [ 0.30922452 0.33998461 0.35079087]
episode: 544000, epsilon: 0.9, loss: 0.107646, win_rate: [ 0.30926781 0.33992952 0.35080266]
episode: 546000, epsilon: 0.9, loss: 0.0245504, win_rate: [ 0.30928515 0.33990048 0.35081438]
episode: 548000, epsilon: 0.9, loss: 0.0412734, win_rate: [ 0.3093644 0.33981872 0.35081688]
episode: 550000, epsilon: 0.9, loss: 0.0432466, win_rate: [ 0.30946126 0.33976665 0.35077209]
episode: 552000, epsilon: 0.9, loss: 0.109766, win_rate: [ 0.30948857 0.33973308 0.35077835]
episode: 554000, epsilon: 0.9, loss: 0.0873546, win_rate: [ 0.30958247 0.33969614 0.35072139]
episode: 556000, epsilon: 0.9, loss: 0.0537797, win_rate: [ 0.30962714 0.33966666 0.3507062 ]
episode: 558000, epsilon: 0.9, loss: 0.0662519, win_rate: [ 0.30963744 0.33970907 0.35065349]
episode: 560000, epsilon: 0.9, loss: 0.0794032, win_rate: [ 0.30971909 0.33970475 0.35057616]
episode: 562000, epsilon: 0.9, loss: 0.0910351, win_rate: [ 0.30969696 0.33970936 0.35059368]
episode: 564000, epsilon: 0.9, loss: 0.190382, win_rate: [ 0.30976541 0.33969621 0.35053839]
episode: 566000, epsilon: 0.9, loss: 0.244887, win_rate: [ 0.30982277 0.33966724 0.35050998]
episode: 568000, epsilon: 0.9, loss: 0.128635, win_rate: [ 0.3098815 0.33968602 0.35043248]
episode: 570000, epsilon: 0.9, loss: 0.0368302, win_rate: [ 0.31002402 0.33962923 0.35034675]
episode: 572000, epsilon: 0.9, loss: 0.0742465, win_rate: [ 0.31009561 0.33958157 0.35032281]
episode: 574000, epsilon: 0.9, loss: 0.0892474, win_rate: [ 0.31009702 0.33954993 0.35035305]
episode: 576000, epsilon: 0.9, loss: 0.0298162, win_rate: [ 0.31023731 0.33950462 0.35025807]
episode: 578000, epsilon: 0.9, loss: 0.105125, win_rate: [ 0.31035586 0.33950633 0.3501378 ]
episode: 580000, epsilon: 0.9, loss: 0.0965056, win_rate: [ 0.31039084 0.33953045 0.35007871]
episode: 582000, epsilon: 0.9, loss: 0.0747379, win_rate: [ 0.31050462 0.33947536 0.35002002]
episode: 584000, epsilon: 0.9, loss: 0.0460892, win_rate: [ 0.3106005 0.33942408 0.34997543]
episode: 586000, epsilon: 0.9, loss: 0.0311771, win_rate: [ 0.3106923 0.33931171 0.34999599]
episode: 588000, epsilon: 0.9, loss: 0.0223139, win_rate: [ 0.31070015 0.33929534 0.35000451]
episode: 590000, epsilon: 0.9, loss: 0.014087, win_rate: [ 0.31079947 0.33918417 0.35001636]
episode: 592000, epsilon: 0.9, loss: 0.187469, win_rate: [ 0.31084914 0.33913963 0.35001123]
episode: 594000, epsilon: 0.9, loss: 0.364125, win_rate: [ 0.31090857 0.33907014 0.3500213 ]
episode: 596000, epsilon: 0.9, loss: 0.0955397, win_rate: [ 0.3109223 0.33905144 0.35002626]
episode: 598000, epsilon: 0.9, loss: 0.0208642, win_rate: [ 0.31095935 0.33902117 0.35001948]
episode: 600000, epsilon: 0.9, loss: 0.029309, win_rate: [ 0.31099448 0.3389611 0.35004442]
episode: 602000, epsilon: 0.9, loss: 0.099173, win_rate: [ 0.31103104 0.3389496 0.35001935]
episode: 604000, epsilon: 0.9, loss: 0.0447113, win_rate: [ 0.31105412 0.3388703 0.35007558]
episode: 606000, epsilon: 0.9, loss: 0.0925096, win_rate: [ 0.31107209 0.33888888 0.35003903]
episode: 608000, epsilon: 0.9, loss: 0.0316893, win_rate: [ 0.31108501 0.33884813 0.35006686]
episode: 610000, epsilon: 0.9, loss: 0.0273971, win_rate: [ 0.31109457 0.33885026 0.35005516]
episode: 612000, epsilon: 0.9, loss: 0.0550182, win_rate: [ 0.31120537 0.33882134 0.34997328]
episode: 614000, epsilon: 0.9, loss: 0.0381274, win_rate: [ 0.31122099 0.33878446 0.34999454]
episode: 616000, epsilon: 0.9, loss: 0.0663957, win_rate: [ 0.31123488 0.33879003 0.34997508]
episode: 618000, epsilon: 0.9, loss: 0.0196721, win_rate: [ 0.31125192 0.3387535 0.34999458]
episode: 620000, epsilon: 0.9, loss: 0.0293376, win_rate: [ 0.31129305 0.33872042 0.34998653]
episode: 622000, epsilon: 0.9, loss: 0.0543059, win_rate: [ 0.31133873 0.3386972 0.34996407]
episode: 624000, epsilon: 0.9, loss: 0.0673458, win_rate: [ 0.31130078 0.33868375 0.35001546]
episode: 626000, epsilon: 0.9, loss: 0.160115, win_rate: [ 0.31139088 0.33864802 0.3499611 ]
episode: 628000, epsilon: 0.9, loss: 0.165393, win_rate: [ 0.31151224 0.33858545 0.34990231]
episode: 630000, epsilon: 0.9, loss: 0.0453944, win_rate: [ 0.31154871 0.33855026 0.34990103]
episode: 632000, epsilon: 0.9, loss: 0.106184, win_rate: [ 0.31163558 0.33848681 0.34987761]
episode: 634000, epsilon: 0.9, loss: 0.0961473, win_rate: [ 0.31165881 0.33846477 0.34987642]
episode: 636000, epsilon: 0.9, loss: 0.0491502, win_rate: [ 0.31168662 0.33844286 0.34987052]
episode: 638000, epsilon: 0.9, loss: 0.102906, win_rate: [ 0.31173305 0.33841796 0.34984898]
episode: 640000, epsilon: 0.9, loss: 0.0613453, win_rate: [ 0.31170576 0.33842916 0.34986508]
episode: 642000, epsilon: 0.9, loss: 0.0369197, win_rate: [ 0.31170356 0.33839044 0.349906 ]
episode: 644000, epsilon: 0.9, loss: 0.0469973, win_rate: [ 0.31176504 0.33835041 0.34988455]
episode: 646000, epsilon: 0.9, loss: 0.0396432, win_rate: [ 0.31178125 0.33834313 0.34987562]
episode: 648000, epsilon: 0.9, loss: 0.0310869, win_rate: [ 0.31180044 0.3383683 0.34983125]
episode: 650000, epsilon: 0.9, loss: 0.0464561, win_rate: [ 0.3117949 0.33838102 0.34982408]
episode: 652000, epsilon: 0.9, loss: 0.0306221, win_rate: [ 0.31177099 0.33840592 0.34982308]
episode: 654000, epsilon: 0.9, loss: 0.13435, win_rate: [ 0.31176558 0.33843526 0.34979916]
episode: 656000, epsilon: 0.9, loss: 0.0832305, win_rate: [ 0.31176324 0.3384507 0.34978605]
episode: 658000, epsilon: 0.9, loss: 0.0866934, win_rate: [ 0.3117974 0.33844629 0.34975631]
episode: 660000, epsilon: 0.9, loss: 0.0252544, win_rate: [ 0.31187225 0.3384404 0.34968735]
episode: 662000, epsilon: 0.9, loss: 0.0666468, win_rate: [ 0.3118953 0.33839073 0.34971397]
episode: 664000, epsilon: 0.9, loss: 0.0194304, win_rate: [ 0.31192724 0.33834136 0.3497314 ]
episode: 666000, epsilon: 0.9, loss: 0.0971439, win_rate: [ 0.31194848 0.33833132 0.3497202 ]
episode: 668000, epsilon: 0.9, loss: 0.0482307, win_rate: [ 0.31195163 0.33833033 0.34971804]
episode: 670000, epsilon: 0.9, loss: 0.0418547, win_rate: [ 0.31195177 0.33834576 0.34970246]
episode: 672000, epsilon: 0.9, loss: 0.0740064, win_rate: [ 0.3119787 0.33833283 0.34968847]
episode: 674000, epsilon: 0.9, loss: 0.122351, win_rate: [ 0.31202476 0.3382814 0.34969384]
episode: 676000, epsilon: 0.9, loss: 0.0503296, win_rate: [ 0.31208682 0.33825985 0.34965333]
episode: 678000, epsilon: 0.9, loss: 0.0287418, win_rate: [ 0.31217801 0.33823549 0.34958651]
episode: 680000, epsilon: 0.9, loss: 0.0530789, win_rate: [ 0.31224807 0.33817303 0.3495789 ]
episode: 682000, epsilon: 0.9, loss: 0.0920546, win_rate: [ 0.31226054 0.33814173 0.34959773]
episode: 684000, epsilon: 0.9, loss: 0.118793, win_rate: [ 0.31229194 0.33812524 0.34958282]
episode: 686000, epsilon: 0.9, loss: 0.0354437, win_rate: [ 0.31238293 0.33805344 0.34956363]
episode: 688000, epsilon: 0.9, loss: 0.0437258, win_rate: [ 0.31239053 0.33802422 0.34958525]
episode: 690000, epsilon: 0.9, loss: 0.0571523, win_rate: [ 0.31244592 0.33797052 0.34958355]
episode: 692000, epsilon: 0.9, loss: 0.10405, win_rate: [ 0.31248076 0.33793593 0.34958331]
episode: 694000, epsilon: 0.9, loss: 0.0227007, win_rate: [ 0.3125082 0.33789865 0.34959316]
episode: 696000, epsilon: 0.9, loss: 0.0691725, win_rate: [ 0.31252685 0.3378975 0.34957565]
episode: 698000, epsilon: 0.9, loss: 0.0441649, win_rate: [ 0.31256116 0.33786055 0.3495783 ]
episode: 700000, epsilon: 0.9, loss: 0.0424862, win_rate: [ 0.31263241 0.33782952 0.34953807]
episode: 702000, epsilon: 0.9, loss: 0.110694, win_rate: [ 0.31274172 0.3377716 0.34948668]
episode: 704000, epsilon: 0.9, loss: 0.0643624, win_rate: [ 0.31285183 0.33773674 0.34941144]
episode: 706000, epsilon: 0.9, loss: 0.019661, win_rate: [ 0.31283667 0.33770207 0.34946126]
episode: 708000, epsilon: 0.9, loss: 0.0570467, win_rate: [ 0.31283995 0.33769303 0.34946702]
episode: 710000, epsilon: 0.9, loss: 0.0218826, win_rate: [ 0.31289111 0.33764882 0.34946007]
episode: 712000, epsilon: 0.9, loss: 0.111307, win_rate: [ 0.31298271 0.33762733 0.34938996]
episode: 714000, epsilon: 0.9, loss: 0.086058, win_rate: [ 0.31300796 0.33759477 0.34939727]
episode: 716000, epsilon: 0.9, loss: 0.0836945, win_rate: [ 0.31307359 0.33755819 0.34936823]
episode: 718000, epsilon: 0.9, loss: 0.0423315, win_rate: [ 0.31311656 0.33752181 0.34936163]
episode: 720000, epsilon: 0.9, loss: 0.0469504, win_rate: [ 0.31310095 0.33753842 0.34936063]
episode: 722000, epsilon: 0.9, loss: 0.153578, win_rate: [ 0.3132004 0.33750784 0.34929176]
episode: 724000, epsilon: 0.9, loss: 0.0501421, win_rate: [ 0.31328548 0.33744429 0.34927024]
episode: 726000, epsilon: 0.9, loss: 0.127317, win_rate: [ 0.31335632 0.33738659 0.34925709]
episode: 728000, epsilon: 0.9, loss: 0.0807194, win_rate: [ 0.31342402 0.3373402 0.34923578]
episode: 730000, epsilon: 0.9, loss: 0.0983167, win_rate: [ 0.31344067 0.33729817 0.34926117]
episode: 732000, epsilon: 0.9, loss: 0.097298, win_rate: [ 0.31349411 0.33734653 0.34915936]
episode: 734000, epsilon: 0.9, loss: 0.0408951, win_rate: [ 0.31354317 0.33730063 0.3491562 ]
episode: 736000, epsilon: 0.9, loss: 0.0891946, win_rate: [ 0.31358653 0.33730253 0.34911094]
episode: 738000, epsilon: 0.9, loss: 0.164258, win_rate: [ 0.31369605 0.33721228 0.34909167]
episode: 740000, epsilon: 0.9, loss: 0.158064, win_rate: [ 0.31377795 0.33716981 0.34905223]
episode: 742000, epsilon: 0.9, loss: 0.106334, win_rate: [ 0.31382303 0.33713027 0.3490467 ]
episode: 744000, epsilon: 0.9, loss: 0.10506, win_rate: [ 0.31389474 0.33709229 0.34901297]
episode: 746000, epsilon: 0.9, loss: 0.0523631, win_rate: [ 0.31395802 0.33708668 0.3489553 ]
episode: 748000, epsilon: 0.9, loss: 0.0371306, win_rate: [ 0.31399824 0.337045 0.34895675]
episode: 750000, epsilon: 0.9, loss: 0.0303204, win_rate: [ 0.31407025 0.33700222 0.34892753]
episode: 752000, epsilon: 0.9, loss: 0.050371, win_rate: [ 0.31407139 0.33699955 0.34892906]
episode: 754000, epsilon: 0.9, loss: 0.126404, win_rate: [ 0.31408844 0.33696772 0.34894383]
episode: 756000, epsilon: 0.9, loss: 0.108825, win_rate: [ 0.31420197 0.33687654 0.3489215 ]
episode: 758000, epsilon: 0.9, loss: 0.0923645, win_rate: [ 0.31425948 0.33684784 0.34889268]
episode: 760000, epsilon: 0.9, loss: 0.115459, win_rate: [ 0.31428248 0.33680087 0.34891665]
episode: 762000, epsilon: 0.9, loss: 0.104361, win_rate: [ 0.31434211 0.33674103 0.34891686]
episode: 764000, epsilon: 0.9, loss: 0.0571901, win_rate: [ 0.3144276 0.33673123 0.34884117]
episode: 766000, epsilon: 0.9, loss: 0.103351, win_rate: [ 0.31449959 0.33668755 0.34881286]
episode: 768000, epsilon: 0.9, loss: 0.105587, win_rate: [ 0.31458032 0.33660633 0.34881335]
episode: 770000, epsilon: 0.9, loss: 0.0284438, win_rate: [ 0.31461388 0.33659826 0.34878786]
episode: 772000, epsilon: 0.9, loss: 0.181051, win_rate: [ 0.31465374 0.33659412 0.34875214]
episode: 774000, epsilon: 0.9, loss: 0.0679293, win_rate: [ 0.31466626 0.33654608 0.34878766]
episode: 776000, epsilon: 0.9, loss: 0.0728733, win_rate: [ 0.31470965 0.33648153 0.34880883]
episode: 778000, epsilon: 0.9, loss: 0.0315993, win_rate: [ 0.31478623 0.33644044 0.34877333]
episode: 780000, epsilon: 0.9, loss: 0.145858, win_rate: [ 0.31484319 0.33640854 0.34874827]
episode: 782000, epsilon: 0.9, loss: 0.0476312, win_rate: [ 0.31483591 0.3364011 0.34876298]
episode: 784000, epsilon: 0.9, loss: 0.075858, win_rate: [ 0.31486057 0.33638605 0.34875338]
episode: 786000, epsilon: 0.9, loss: 0.0693947, win_rate: [ 0.31497288 0.33633036 0.34869676]
episode: 788000, epsilon: 0.9, loss: 0.127469, win_rate: [ 0.31496534 0.33637901 0.34865565]
episode: 790000, epsilon: 0.9, loss: 0.146511, win_rate: [ 0.31502618 0.33630717 0.34866665]
episode: 792000, epsilon: 0.9, loss: 0.0327443, win_rate: [ 0.31509051 0.33629882 0.34861067]
episode: 794000, epsilon: 0.9, loss: 0.122354, win_rate: [ 0.31514066 0.33632451 0.34853483]
episode: 796000, epsilon: 0.9, loss: 0.183059, win_rate: [ 0.31520438 0.33628727 0.34850836]
episode: 798000, epsilon: 0.9, loss: 0.0890774, win_rate: [ 0.31524522 0.33621386 0.34854092]
episode: 800000, epsilon: 0.9, loss: 0.119864, win_rate: [ 0.31524836 0.33622083 0.34853081]
episode: 802000, epsilon: 0.9, loss: 0.204856, win_rate: [ 0.31534749 0.33620781 0.3484447 ]
episode: 804000, epsilon: 0.9, loss: 0.114617, win_rate: [ 0.31540011 0.33617122 0.34842867]
episode: 806000, epsilon: 0.9, loss: 0.0747776, win_rate: [ 0.31542889 0.33612117 0.34844994]
episode: 808000, epsilon: 0.9, loss: 0.0746621, win_rate: [ 0.31547238 0.33606023 0.34846739]
episode: 810000, epsilon: 0.9, loss: 0.0772573, win_rate: [ 0.31551443 0.3360107 0.34847488]
episode: 812000, epsilon: 0.9, loss: 0.0218551, win_rate: [ 0.31558089 0.33598973 0.34842937]
episode: 814000, epsilon: 0.9, loss: 0.079478, win_rate: [ 0.3156286 0.3359615 0.34840989]
episode: 816000, epsilon: 0.9, loss: 0.132875, win_rate: [ 0.31565893 0.33595547 0.3483856 ]
episode: 818000, epsilon: 0.9, loss: 0.0548598, win_rate: [ 0.31567688 0.33594458 0.34837855]
episode: 820000, epsilon: 0.9, loss: 0.0506202, win_rate: [ 0.31569352 0.33592276 0.34838372]
episode: 822000, epsilon: 0.9, loss: 0.0910465, win_rate: [ 0.31576361 0.3358974 0.34833899]
episode: 824000, epsilon: 0.9, loss: 0.197816, win_rate: [ 0.31577146 0.3358928 0.34833574]
episode: 826000, epsilon: 0.9, loss: 0.15523, win_rate: [ 0.31577323 0.33592211 0.34830466]
episode: 828000, epsilon: 0.9, loss: 0.293722, win_rate: [ 0.31587039 0.33593438 0.34819523]
episode: 830000, epsilon: 0.9, loss: 0.32723, win_rate: [ 0.3159442 0.335902 0.3481538]
episode: 832000, epsilon: 0.9, loss: 0.0613427, win_rate: [ 0.31599481 0.33590464 0.34810054]
episode: 834000, epsilon: 0.9, loss: 0.0843161, win_rate: [ 0.31610154 0.3358653 0.34803316]
episode: 836000, epsilon: 0.9, loss: 0.0840188, win_rate: [ 0.31618981 0.33583333 0.34797686]
episode: 838000, epsilon: 0.9, loss: 0.223547, win_rate: [ 0.31626215 0.33579435 0.3479435 ]
episode: 840000, epsilon: 0.9, loss: 0.391604, win_rate: [ 0.31638296 0.33577103 0.34784601]
episode: 842000, epsilon: 0.9, loss: 0.149702, win_rate: [ 0.3165115 0.33573594 0.34775256]
episode: 844000, epsilon: 0.9, loss: 0.196039, win_rate: [ 0.31663825 0.33568207 0.34767968]
episode: 846000, epsilon: 0.9, loss: 0.678151, win_rate: [ 0.31674312 0.33564499 0.34761188]
episode: 848000, epsilon: 0.9, loss: 0.130465, win_rate: [ 0.3168534 0.33560338 0.34754322]
episode: 850000, epsilon: 0.9, loss: 0.530904, win_rate: [ 0.31698433 0.33555725 0.34745841]
episode: 852000, epsilon: 0.9, loss: 1.05845, win_rate: [ 0.31717686 0.33548317 0.34733997]
episode: 854000, epsilon: 0.9, loss: 0.294223, win_rate: [ 0.31731462 0.33541647 0.34726891]
episode: 856000, epsilon: 0.9, loss: 0.24601, win_rate: [ 0.31748211 0.33535475 0.34716315]
episode: 858000, epsilon: 0.9, loss: 0.140483, win_rate: [ 0.31763949 0.33532129 0.34703922]
episode: 860000, epsilon: 0.9, loss: 0.259827, win_rate: [ 0.31778451 0.33527868 0.34693681]
episode: 862000, epsilon: 0.9, loss: 0.569678, win_rate: [ 0.31796715 0.33519335 0.3468395 ]
episode: 864000, epsilon: 0.9, loss: 0.173579, win_rate: [ 0.31808528 0.33516281 0.34675191]
episode: 866000, epsilon: 0.9, loss: 0.122627, win_rate: [ 0.31815899 0.33514395 0.34669706]
episode: 868000, epsilon: 0.9, loss: 0.109343, win_rate: [ 0.31826922 0.33509754 0.34663324]
episode: 870000, epsilon: 0.9, loss: 0.063344, win_rate: [ 0.31835711 0.33501801 0.34662489]
episode: 872000, epsilon: 0.9, loss: 0.100988, win_rate: [ 0.31836775 0.33501796 0.34661428]
episode: 874000, epsilon: 0.9, loss: 0.0945025, win_rate: [ 0.31843785 0.33498474 0.34657741]
episode: 876000, epsilon: 0.9, loss: 0.0959731, win_rate: [ 0.31846197 0.33497108 0.34656696]
episode: 878000, epsilon: 0.9, loss: 0.0977433, win_rate: [ 0.31854406 0.3349495 0.34650644]
episode: 880000, epsilon: 0.9, loss: 0.0577249, win_rate: [ 0.31858487 0.33488598 0.34652915]
episode: 882000, epsilon: 0.9, loss: 0.0436054, win_rate: [ 0.31865157 0.33484429 0.34650414]
episode: 884000, epsilon: 0.9, loss: 0.180819, win_rate: [ 0.31869308 0.33483333 0.34647359]
episode: 886000, epsilon: 0.9, loss: 0.141343, win_rate: [ 0.31870619 0.33485402 0.34643979]
episode: 888000, epsilon: 0.9, loss: 0.0563448, win_rate: [ 0.31871698 0.33490503 0.34637799]
episode: 890000, epsilon: 0.9, loss: 0.0563546, win_rate: [ 0.31870751 0.33489625 0.34639624]
episode: 892000, epsilon: 0.9, loss: 0.080206, win_rate: [ 0.31877879 0.33487182 0.34634939]
episode: 894000, epsilon: 0.9, loss: 0.098336, win_rate: [ 0.31881061 0.33486763 0.34632176]
episode: 896000, epsilon: 0.9, loss: 0.103839, win_rate: [ 0.31886348 0.33483445 0.34630207]
episode: 898000, epsilon: 0.9, loss: 0.0273521, win_rate: [ 0.31889942 0.33479807 0.34630251]
episode: 900000, epsilon: 0.9, loss: 0.177287, win_rate: [ 0.3189752 0.33475629 0.3462685 ]
episode: 902000, epsilon: 0.9, loss: 0.106376, win_rate: [ 0.31904954 0.33473466 0.3462158 ]
episode: 904000, epsilon: 0.9, loss: 0.0525703, win_rate: [ 0.31914235 0.33474299 0.34611466]
episode: 906000, epsilon: 0.9, loss: 0.0541536, win_rate: [ 0.31919832 0.33471817 0.3460835 ]
episode: 908000, epsilon: 0.9, loss: 0.111993, win_rate: [ 0.31922212 0.33466593 0.34611195]
episode: 910000, epsilon: 0.9, loss: 0.115006, win_rate: [ 0.31923921 0.33463699 0.3461238 ]
episode: 912000, epsilon: 0.9, loss: 0.299983, win_rate: [ 0.31927926 0.33462463 0.34609611]
episode: 914000, epsilon: 0.9, loss: 0.0933144, win_rate: [ 0.31934648 0.3345981 0.34605542]
episode: 916000, epsilon: 0.9, loss: 0.0335358, win_rate: [ 0.31943742 0.33453784 0.34602473]
episode: 918000, epsilon: 0.9, loss: 0.0534133, win_rate: [ 0.31950292 0.33451924 0.34597784]
episode: 920000, epsilon: 0.9, loss: 0.298455, win_rate: [ 0.319604 0.33445181 0.34594419]
episode: 922000, epsilon: 0.9, loss: 0.0622094, win_rate: [ 0.31963197 0.33441287 0.34595516]
episode: 924000, epsilon: 0.9, loss: 0.0879177, win_rate: [ 0.31967173 0.33438925 0.34593902]
episode: 926000, epsilon: 0.9, loss: 0.0941045, win_rate: [ 0.31970376 0.33437005 0.34592619]
episode: 928000, epsilon: 0.9, loss: 0.0475058, win_rate: [ 0.31972703 0.3343671 0.34590588]
episode: 930000, epsilon: 0.9, loss: 0.0578048, win_rate: [ 0.31977385 0.3343448 0.34588135]
episode: 932000, epsilon: 0.9, loss: 0.0390147, win_rate: [ 0.31981082 0.33431616 0.34587302]
episode: 934000, epsilon: 0.9, loss: 0.0726372, win_rate: [ 0.3197866 0.33431228 0.34590113]
episode: 936000, epsilon: 0.9, loss: 0.111728, win_rate: [ 0.31982551 0.33427956 0.34589493]
episode: 938000, epsilon: 0.9, loss: 0.0625941, win_rate: [ 0.31986 0.33427043 0.34586957]
episode: 940000, epsilon: 0.9, loss: 0.0299137, win_rate: [ 0.31994753 0.33425496 0.3457975 ]
episode: 942000, epsilon: 0.9, loss: 0.0726181, win_rate: [ 0.3199986 0.33424911 0.34575229]
episode: 944000, epsilon: 0.9, loss: 0.038456, win_rate: [ 0.32001661 0.33422104 0.34576235]
episode: 946000, epsilon: 0.9, loss: 0.0437996, win_rate: [ 0.32001235 0.33424912 0.34573854]
episode: 948000, epsilon: 0.9, loss: 0.0963162, win_rate: [ 0.32005135 0.33425492 0.34569373]
episode: 950000, epsilon: 0.9, loss: 0.14459, win_rate: [ 0.32011861 0.33420596 0.34567543]
episode: 952000, epsilon: 0.9, loss: 0.0497625, win_rate: [ 0.32014462 0.33417402 0.34568136]
episode: 954000, epsilon: 0.9, loss: 0.167269, win_rate: [ 0.32016528 0.33414954 0.34568517]
episode: 956000, epsilon: 0.9, loss: 0.105935, win_rate: [ 0.32019841 0.33410007 0.34570152]
episode: 958000, epsilon: 0.9, loss: 0.0334828, win_rate: [ 0.32023662 0.33407481 0.34568857]
episode: 960000, epsilon: 0.9, loss: 0.023265, win_rate: [ 0.32026946 0.33406319 0.34566735]
episode: 962000, epsilon: 0.9, loss: 0.131661, win_rate: [ 0.32030632 0.33405059 0.34564309]
episode: 964000, epsilon: 0.9, loss: 0.0965433, win_rate: [ 0.32033473 0.33401833 0.34564694]
episode: 966000, epsilon: 0.9, loss: 0.101683, win_rate: [ 0.32037027 0.33399137 0.34563836]
episode: 968000, epsilon: 0.9, loss: 0.118052, win_rate: [ 0.32042942 0.33396866 0.34560192]
episode: 970000, epsilon: 0.9, loss: 0.0608048, win_rate: [ 0.32048936 0.33392852 0.34558212]
episode: 972000, epsilon: 0.9, loss: 0.177394, win_rate: [ 0.32050584 0.33390809 0.34558606]
episode: 974000, epsilon: 0.9, loss: 0.135373, win_rate: [ 0.32056641 0.3338744 0.34555919]
episode: 976000, epsilon: 0.9, loss: 0.0473096, win_rate: [ 0.32062262 0.33382548 0.3455519 ]
episode: 978000, epsilon: 0.9, loss: 0.0492055, win_rate: [ 0.32066532 0.33381459 0.3455201 ]
episode: 980000, epsilon: 0.9, loss: 0.0584045, win_rate: [ 0.32068641 0.33378537 0.34552822]
episode: 982000, epsilon: 0.9, loss: 0.0643864, win_rate: [ 0.3206789 0.33379905 0.34552205]
episode: 984000, epsilon: 0.9, loss: 0.0388458, win_rate: [ 0.32072224 0.33376287 0.34551489]
episode: 986000, epsilon: 0.9, loss: 0.0367076, win_rate: [ 0.32071874 0.33379784 0.34548342]
episode: 988000, epsilon: 0.9, loss: 0.0652551, win_rate: [ 0.32074158 0.33379015 0.34546827]
episode: 990000, epsilon: 0.9, loss: 0.0554728, win_rate: [ 0.32077139 0.33376936 0.34545925]
episode: 992000, epsilon: 0.9, loss: 0.124443, win_rate: [ 0.32079504 0.33376277 0.34544219]
episode: 994000, epsilon: 0.9, loss: 0.0114861, win_rate: [ 0.32081758 0.33374514 0.34543728]
episode: 996000, epsilon: 0.9, loss: 0.0524915, win_rate: [ 0.32084305 0.3337075 0.34544945]
episode: 998000, epsilon: 0.9, loss: 0.0256955, win_rate: [ 0.32087543 0.33369506 0.34542951]
episode: 1000000, epsilon: 0.9, loss: 0.0506189, win_rate: [ 0.32090168 0.33365167 0.34544665]
episode: 1002000, epsilon: 0.9, loss: 0.0637978, win_rate: [ 0.3209378 0.33363639 0.3454258 ]
episode: 1004000, epsilon: 0.9, loss: 0.133739, win_rate: [ 0.3209688 0.33362118 0.34541001]
episode: 1006000, epsilon: 0.9, loss: 0.0498048, win_rate: [ 0.32095992 0.33364679 0.34539329]
episode: 1008000, epsilon: 0.9, loss: 0.0782023, win_rate: [ 0.32100464 0.33361673 0.34537863]
episode: 1010000, epsilon: 0.9, loss: 0.0723313, win_rate: [ 0.32100859 0.3336066 0.34538481]
episode: 1012000, epsilon: 0.9, loss: 0.0862523, win_rate: [ 0.32100364 0.33364097 0.34535539]
episode: 1014000, epsilon: 0.9, loss: 0.0841308, win_rate: [ 0.32105984 0.33363774 0.34530242]
episode: 1016000, epsilon: 0.9, loss: 0.053612, win_rate: [ 0.32109319 0.33360991 0.3452969 ]
episode: 1018000, epsilon: 0.9, loss: 0.086102, win_rate: [ 0.32114409 0.33359889 0.34525703]
episode: 1020000, epsilon: 0.9, loss: 0.186626, win_rate: [ 0.32116733 0.33358595 0.34524672]
episode: 1022000, epsilon: 0.9, loss: 0.109487, win_rate: [ 0.32122865 0.33355643 0.34521493]
episode: 1024000, epsilon: 0.9, loss: 0.0703795, win_rate: [ 0.3212282 0.33352799 0.3452438 ]
episode: 1026000, epsilon: 0.9, loss: 0.197689, win_rate: [ 0.32124628 0.33353379 0.34521994]
episode: 1028000, epsilon: 0.9, loss: 0.0935955, win_rate: [ 0.32132848 0.333488 0.34518352]
episode: 1030000, epsilon: 0.9, loss: 0.125539, win_rate: [ 0.32138124 0.33347346 0.3451453 ]
episode: 1032000, epsilon: 0.9, loss: 0.0677419, win_rate: [ 0.32145124 0.33343572 0.34511304]
episode: 1034000, epsilon: 0.9, loss: 0.139671, win_rate: [ 0.32149099 0.33340974 0.34509928]
episode: 1036000, epsilon: 0.9, loss: 0.275071, win_rate: [ 0.32154892 0.33340701 0.34504407]
episode: 1038000, epsilon: 0.9, loss: 0.0970106, win_rate: [ 0.32165191 0.33337058 0.34497751]
episode: 1040000, epsilon: 0.9, loss: 0.198754, win_rate: [ 0.32170161 0.33334776 0.34495063]
episode: 1042000, epsilon: 0.9, loss: 0.124288, win_rate: [ 0.32175017 0.33330774 0.34494209]
episode: 1044000, epsilon: 0.9, loss: 0.128581, win_rate: [ 0.32182632 0.33327746 0.34489622]
episode: 1046000, epsilon: 0.9, loss: 0.0742144, win_rate: [ 0.32189071 0.33326259 0.34484671]
episode: 1048000, epsilon: 0.9, loss: 0.111558, win_rate: [ 0.32190236 0.33326399 0.34483364]
episode: 1050000, epsilon: 0.9, loss: 0.0842839, win_rate: [ 0.32191398 0.33327968 0.34480634]
episode: 1052000, epsilon: 0.9, loss: 0.0381279, win_rate: [ 0.32191794 0.33328961 0.34479245]
episode: 1054000, epsilon: 0.9, loss: 0.0547885, win_rate: [ 0.32196649 0.33327293 0.34476058]
episode: 1056000, epsilon: 0.9, loss: 0.0934835, win_rate: [ 0.32198738 0.3332762 0.34473642]
episode: 1058000, epsilon: 0.9, loss: 0.0308796, win_rate: [ 0.32200442 0.33327095 0.34472463]
episode: 1060000, epsilon: 0.9, loss: 0.0676955, win_rate: [ 0.32207045 0.33325063 0.34467892]
episode: 1062000, epsilon: 0.9, loss: 0.0999288, win_rate: [ 0.32209103 0.33324545 0.34466352]
episode: 1064000, epsilon: 0.9, loss: 0.0832386, win_rate: [ 0.32210214 0.33320927 0.34468859]
episode: 1066000, epsilon: 0.9, loss: 0.0532691, win_rate: [ 0.3221573 0.33319293 0.34464977]
episode: 1068000, epsilon: 0.9, loss: 0.0614725, win_rate: [ 0.32223284 0.33316823 0.34459893]
episode: 1070000, epsilon: 0.9, loss: 0.0927298, win_rate: [ 0.32226045 0.33315203 0.34458753]
episode: 1072000, epsilon: 0.9, loss: 0.0552366, win_rate: [ 0.32229821 0.33315454 0.34454725]
episode: 1074000, epsilon: 0.9, loss: 0.0715846, win_rate: [ 0.32230882 0.33316636 0.34452482]
episode: 1076000, epsilon: 0.9, loss: 0.124786, win_rate: [ 0.32234821 0.33317255 0.34447923]
episode: 1078000, epsilon: 0.9, loss: 0.0572503, win_rate: [ 0.32236612 0.33319542 0.34443846]
episode: 1080000, epsilon: 0.9, loss: 0.0553883, win_rate: [ 0.32240248 0.33317469 0.34442283]
episode: 1082000, epsilon: 0.9, loss: 0.485112, win_rate: [ 0.32246088 0.33314941 0.3443897 ]
episode: 1084000, epsilon: 0.9, loss: 0.062188, win_rate: [ 0.32251723 0.33312331 0.34435946]
episode: 1086000, epsilon: 0.9, loss: 0.148255, win_rate: [ 0.32255587 0.33314242 0.34430171]
episode: 1088000, epsilon: 0.9, loss: 0.0851735, win_rate: [ 0.32265687 0.33309252 0.3442506 ]
episode: 1090000, epsilon: 0.9, loss: 0.080351, win_rate: [ 0.32273732 0.33306483 0.34419785]
episode: 1092000, epsilon: 0.9, loss: 0.0814694, win_rate: [ 0.32285135 0.33301618 0.34413247]
episode: 1094000, epsilon: 0.9, loss: 0.100962, win_rate: [ 0.32291104 0.33297959 0.34410937]
episode: 1096000, epsilon: 0.9, loss: 0.207704, win_rate: [ 0.32300609 0.33291028 0.34408363]
episode: 1098000, epsilon: 0.9, loss: 0.142077, win_rate: [ 0.32308076 0.33289041 0.34402883]
episode: 1100000, epsilon: 0.9, loss: 0.0911651, win_rate: [ 0.32314971 0.33284242 0.34400787]
episode: 1102000, epsilon: 0.9, loss: 0.128663, win_rate: [ 0.32322384 0.33281095 0.34396521]
episode: 1104000, epsilon: 0.9, loss: 0.0518107, win_rate: [ 0.32327145 0.33277234 0.34395621]
episode: 1106000, epsilon: 0.9, loss: 0.218895, win_rate: [ 0.3232809 0.33274744 0.34397166]
episode: 1108000, epsilon: 0.9, loss: 0.0427323, win_rate: [ 0.32333545 0.3326766 0.34398796]
episode: 1110000, epsilon: 0.9, loss: 0.0568478, win_rate: [ 0.3233862 0.33264925 0.34396455]
episode: 1112000, epsilon: 0.9, loss: 0.130261, win_rate: [ 0.32344216 0.33259772 0.34396012]
episode: 1114000, epsilon: 0.9, loss: 0.14201, win_rate: [ 0.3234638 0.33259485 0.34394134]
episode: 1116000, epsilon: 0.9, loss: 0.119496, win_rate: [ 0.32352749 0.33254271 0.3439298 ]
episode: 1118000, epsilon: 0.9, loss: 0.0743658, win_rate: [ 0.32355874 0.33250239 0.34393887]
episode: 1120000, epsilon: 0.9, loss: 0.0912209, win_rate: [ 0.323589 0.33248095 0.34393005]
episode: 1122000, epsilon: 0.9, loss: 0.111783, win_rate: [ 0.32361647 0.33247118 0.34391235]
episode: 1124000, epsilon: 0.9, loss: 0.09344, win_rate: [ 0.32363405 0.33247301 0.34389293]
episode: 1126000, epsilon: 0.9, loss: 0.0460583, win_rate: [ 0.32366845 0.33243043 0.34390112]
episode: 1128000, epsilon: 0.9, loss: 0.304033, win_rate: [ 0.32370805 0.33241815 0.34387381]
episode: 1130000, epsilon: 0.9, loss: 0.119355, win_rate: [ 0.32377228 0.33241564 0.34381209]
episode: 1132000, epsilon: 0.9, loss: 0.36513, win_rate: [ 0.32380978 0.33238575 0.34380447]
episode: 1134000, epsilon: 0.9, loss: 0.528388, win_rate: [ 0.32387626 0.3323692 0.34375455]
episode: 1136000, epsilon: 0.9, loss: 0.0765519, win_rate: [ 0.3239557 0.33235094 0.34369336]
episode: 1138000, epsilon: 0.9, loss: 0.234509, win_rate: [ 0.32399093 0.33236702 0.34364205]
episode: 1140000, epsilon: 0.9, loss: 0.0926188, win_rate: [ 0.32406901 0.33231638 0.34361461]
episode: 1142000, epsilon: 0.9, loss: 0.121914, win_rate: [ 0.32413982 0.3323202 0.34353998]
episode: 1144000, epsilon: 0.9, loss: 0.281578, win_rate: [ 0.32419465 0.3323109 0.34349445]
episode: 1146000, epsilon: 0.9, loss: 0.34272, win_rate: [ 0.32425277 0.33231734 0.34342989]
episode: 1148000, epsilon: 0.9, loss: 0.119814, win_rate: [ 0.32432463 0.33230198 0.34337339]
episode: 1150000, epsilon: 0.9, loss: 0.44163, win_rate: [ 0.32439972 0.33223449 0.34336579]
episode: 1152000, epsilon: 0.9, loss: 0.137115, win_rate: [ 0.32444503 0.3322202 0.34333477]
episode: 1154000, epsilon: 0.9, loss: 0.192059, win_rate: [ 0.32451532 0.33219729 0.3432874 ]
episode: 1156000, epsilon: 0.9, loss: 0.274369, win_rate: [ 0.32461823 0.33215283 0.34322894]
episode: 1158000, epsilon: 0.9, loss: 0.345975, win_rate: [ 0.32472424 0.33209816 0.3431776 ]
episode: 1160000, epsilon: 0.9, loss: 0.224558, win_rate: [ 0.3247842 0.33206523 0.34315057]
episode: 1162000, epsilon: 0.9, loss: 0.314646, win_rate: [ 0.3248276 0.33205909 0.3431133 ]
episode: 1164000, epsilon: 0.9, loss: 0.248279, win_rate: [ 0.3248889 0.33201346 0.34309764]
episode: 1166000, epsilon: 0.9, loss: 0.112562, win_rate: [ 0.32495427 0.33199714 0.34304859]
episode: 1168000, epsilon: 0.9, loss: 0.573, win_rate: [ 0.32501513 0.3319552 0.34302967]
episode: 1170000, epsilon: 0.9, loss: 0.176453, win_rate: [ 0.32504502 0.33191937 0.3430356 ]
episode: 1172000, epsilon: 0.9, loss: 0.0706293, win_rate: [ 0.32510638 0.33190501 0.34298862]
episode: 1174000, epsilon: 0.9, loss: 0.165791, win_rate: [ 0.32516838 0.33188302 0.3429486 ]
episode: 1176000, epsilon: 0.9, loss: 0.277321, win_rate: [ 0.32521656 0.33187727 0.34290617]
episode: 1178000, epsilon: 0.9, loss: 0.176294, win_rate: [ 0.32529259 0.33186305 0.34284436]
episode: 1180000, epsilon: 0.9, loss: 0.189686, win_rate: [ 0.32538532 0.33184294 0.34277174]
episode: 1182000, epsilon: 0.9, loss: 0.104302, win_rate: [ 0.32543712 0.33180683 0.34275606]
episode: 1184000, epsilon: 0.9, loss: 0.0868109, win_rate: [ 0.32547608 0.33176999 0.34275393]
episode: 1186000, epsilon: 0.9, loss: 0.0652112, win_rate: [ 0.3255402 0.33174846 0.34271135]
episode: 1188000, epsilon: 0.9, loss: 0.135256, win_rate: [ 0.32558306 0.33171437 0.34270257]
episode: 1190000, epsilon: 0.9, loss: 0.0837329, win_rate: [ 0.32564847 0.33166947 0.34268206]
episode: 1192000, epsilon: 0.9, loss: 0.227607, win_rate: [ 0.32567674 0.33164318 0.34268008]
episode: 1194000, epsilon: 0.9, loss: 0.0755312, win_rate: [ 0.32570073 0.33162368 0.34267559]
episode: 1196000, epsilon: 0.9, loss: 0.0790938, win_rate: [ 0.32573886 0.33157832 0.34268282]
episode: 1198000, epsilon: 0.9, loss: 0.0662919, win_rate: [ 0.3258211 0.33153729 0.34264162]
episode: 1200000, epsilon: 0.9, loss: 0.163289, win_rate: [ 0.32587723 0.33148306 0.34263971]
episode: 1202000, epsilon: 0.9, loss: 0.0807239, win_rate: [ 0.32588991 0.33148725 0.34262284]
episode: 1204000, epsilon: 0.9, loss: 0.0745174, win_rate: [ 0.32590421 0.3314964 0.34259938]
episode: 1206000, epsilon: 0.9, loss: 0.112053, win_rate: [ 0.32589774 0.33148812 0.34261414]
episode: 1208000, epsilon: 0.9, loss: 0.139617, win_rate: [ 0.3259095 0.33148069 0.34260982]
episode: 1210000, epsilon: 0.9, loss: 0.0172084, win_rate: [ 0.32593609 0.33147576 0.34258815]
episode: 1212000, epsilon: 0.9, loss: 0.0569144, win_rate: [ 0.32593537 0.33146837 0.34259625]
episode: 1214000, epsilon: 0.9, loss: 0.0942227, win_rate: [ 0.32594866 0.33147749 0.34257385]
episode: 1216000, epsilon: 0.9, loss: 0.0475396, win_rate: [ 0.32597424 0.33143476 0.342591 ]
episode: 1218000, epsilon: 0.9, loss: 0.079552, win_rate: [ 0.32600137 0.33143076 0.34256786]
episode: 1220000, epsilon: 0.9, loss: 0.143652, win_rate: [ 0.32602514 0.33141284 0.34256201]
episode: 1222000, epsilon: 0.9, loss: 0.0665064, win_rate: [ 0.32604229 0.33140889 0.34254882]
episode: 1224000, epsilon: 0.9, loss: 0.057487, win_rate: [ 0.32606754 0.3313543 0.34257815]
episode: 1226000, epsilon: 0.9, loss: 0.0724877, win_rate: [ 0.32607641 0.33133986 0.34258373]
episode: 1228000, epsilon: 0.9, loss: 0.121565, win_rate: [ 0.32608035 0.33132302 0.34259663]
episode: 1230000, epsilon: 0.9, loss: 0.174685, win_rate: [ 0.32608347 0.33132168 0.34259484]
episode: 1232000, epsilon: 0.9, loss: 0.107148, win_rate: [ 0.32611987 0.33129356 0.34258657]
episode: 1234000, epsilon: 0.9, loss: 0.200395, win_rate: [ 0.32615533 0.3312704 0.34257428]
episode: 1236000, epsilon: 0.9, loss: 0.389671, win_rate: [ 0.32617368 0.33123679 0.34258953]
episode: 1238000, epsilon: 0.9, loss: 0.0708498, win_rate: [ 0.3261936 0.33123398 0.34257242]
episode: 1240000, epsilon: 0.9, loss: 0.111774, win_rate: [ 0.32622635 0.33121828 0.34255537]
episode: 1242000, epsilon: 0.9, loss: 0.117341, win_rate: [ 0.32625175 0.33118411 0.34256414]
episode: 1244000, epsilon: 0.9, loss: 0.176517, win_rate: [ 0.32631405 0.33116694 0.34251902]
episode: 1246000, epsilon: 0.9, loss: 0.0704494, win_rate: [ 0.32635207 0.3311466 0.34250133]
episode: 1248000, epsilon: 0.9, loss: 0.106966, win_rate: [ 0.32633788 0.33115759 0.34250453]
episode: 1250000, epsilon: 0.9, loss: 0.176652, win_rate: [ 0.32636214 0.33115254 0.34248533]
episode: 1252000, epsilon: 0.9, loss: 0.0328003, win_rate: [ 0.32638712 0.33112194 0.34249094]
episode: 1254000, epsilon: 0.9, loss: 0.0466095, win_rate: [ 0.32644791 0.33108427 0.34246783]
episode: 1256000, epsilon: 0.9, loss: 0.0586598, win_rate: [ 0.3265085 0.33107219 0.34241931]
episode: 1258000, epsilon: 0.9, loss: 0.0606739, win_rate: [ 0.32656969 0.33106412 0.34236618]
episode: 1260000, epsilon: 0.9, loss: 0.0840457, win_rate: [ 0.32658228 0.33104021 0.34237751]
episode: 1262000, epsilon: 0.9, loss: 0.171472, win_rate: [ 0.32660196 0.33102272 0.34237532]
episode: 1264000, epsilon: 0.9, loss: 0.0829915, win_rate: [ 0.3266374 0.33101635 0.34234625]
episode: 1266000, epsilon: 0.9, loss: 0.0695489, win_rate: [ 0.32669011 0.33098236 0.34232753]
episode: 1268000, epsilon: 0.9, loss: 0.161013, win_rate: [ 0.32670085 0.3309816 0.34231755]
episode: 1270000, epsilon: 0.9, loss: 0.118508, win_rate: [ 0.3267336 0.33097297 0.34229343]
episode: 1272000, epsilon: 0.9, loss: 0.154057, win_rate: [ 0.32674424 0.33097222 0.34228354]
episode: 1274000, epsilon: 0.9, loss: 0.0649897, win_rate: [ 0.3268145 0.330955 0.3422305]
episode: 1276000, epsilon: 0.9, loss: 0.10683, win_rate: [ 0.32684771 0.33093704 0.34221525]
episode: 1278000, epsilon: 0.9, loss: 0.0455086, win_rate: [ 0.32689255 0.33091445 0.342193 ]
episode: 1280000, epsilon: 0.9, loss: 0.151251, win_rate: [ 0.32695834 0.33086849 0.34217317]
episode: 1282000, epsilon: 0.9, loss: 0.084946, win_rate: [ 0.3269857 0.33088118 0.34213312]
episode: 1284000, epsilon: 0.9, loss: 0.0567544, win_rate: [ 0.32700753 0.33088058 0.34211188]
episode: 1286000, epsilon: 0.9, loss: 0.150667, win_rate: [ 0.3270464 0.33086055 0.34209305]
episode: 1288000, epsilon: 0.9, loss: 0.0427269, win_rate: [ 0.32709835 0.33083437 0.34206728]
episode: 1290000, epsilon: 0.9, loss: 0.052844, win_rate: [ 0.32714006 0.33082455 0.34203539]
episode: 1292000, epsilon: 0.9, loss: 0.0999404, win_rate: [ 0.32715222 0.33081631 0.34203147]
episode: 1294000, epsilon: 0.9, loss: 0.100458, win_rate: [ 0.32714967 0.33085446 0.34199587]
episode: 1296000, epsilon: 0.9, loss: 0.0162908, win_rate: [ 0.32716487 0.33084851 0.34198662]
episode: 1298000, epsilon: 0.9, loss: 0.0293832, win_rate: [ 0.32720622 0.33085028 0.3419435 ]
episode: 1300000, epsilon: 0.9, loss: 0.0712604, win_rate: [ 0.32723667 0.33084667 0.34191666]
episode: 1302000, epsilon: 0.9, loss: 0.0269455, win_rate: [ 0.32723631 0.33083231 0.34193138]
episode: 1304000, epsilon: 0.9, loss: 0.0648866, win_rate: [ 0.32727352 0.33081646 0.34191001]
episode: 1306000, epsilon: 0.9, loss: 0.12663, win_rate: [ 0.32731139 0.33079454 0.34189407]
episode: 1308000, epsilon: 0.9, loss: 0.0854803, win_rate: [ 0.32735067 0.33077115 0.34187818]
episode: 1310000, epsilon: 0.9, loss: 0.0435073, win_rate: [ 0.32739212 0.33076311 0.34184478]
episode: 1312000, epsilon: 0.9, loss: 0.0570287, win_rate: [ 0.32741515 0.33074746 0.34183739]
episode: 1314000, epsilon: 0.9, loss: 0.0342732, win_rate: [ 0.32743506 0.33073567 0.34182927]
episode: 1316000, epsilon: 0.9, loss: 0.0467514, win_rate: [ 0.32744504 0.33071783 0.34183713]
episode: 1318000, epsilon: 0.9, loss: 0.0426302, win_rate: [ 0.3274694 0.33072054 0.34181006]
episode: 1320000, epsilon: 0.9, loss: 0.0279334, win_rate: [ 0.32746263 0.33071717 0.3418202 ]
episode: 1322000, epsilon: 0.9, loss: 0.0408862, win_rate: [ 0.32748084 0.33071155 0.34180761]
episode: 1324000, epsilon: 0.9, loss: 0.100278, win_rate: [ 0.32752317 0.33070292 0.34177391]
episode: 1326000, epsilon: 0.9, loss: 0.136517, win_rate: [ 0.3275533 0.33066566 0.34178104]
episode: 1328000, epsilon: 0.9, loss: 0.0484754, win_rate: [ 0.32761045 0.3306368 0.34175275]
episode: 1330000, epsilon: 0.9, loss: 0.0814314, win_rate: [ 0.32762457 0.3306193 0.34175613]
episode: 1332000, epsilon: 0.9, loss: 0.277086, win_rate: [ 0.32762813 0.33063489 0.34173698]
episode: 1334000, epsilon: 0.9, loss: 0.0892482, win_rate: [ 0.32767367 0.33061894 0.34170739]
episode: 1336000, epsilon: 0.9, loss: 0.0639606, win_rate: [ 0.3277026 0.33059332 0.34170409]
episode: 1338000, epsilon: 0.9, loss: 0.0646907, win_rate: [ 0.32772845 0.33057225 0.3416993 ]
episode: 1340000, epsilon: 0.9, loss: 0.0388254, win_rate: [ 0.32776319 0.33057289 0.34166392]
episode: 1342000, epsilon: 0.9, loss: 0.0636873, win_rate: [ 0.32778515 0.33054446 0.34167039]
episode: 1344000, epsilon: 0.9, loss: 0.0374163, win_rate: [ 0.32776389 0.33054216 0.34169394]
episode: 1346000, epsilon: 0.9, loss: 0.076964, win_rate: [ 0.32778356 0.33054062 0.34167582]
episode: 1348000, epsilon: 0.9, loss: 0.0546578, win_rate: [ 0.32776459 0.33054501 0.3416904 ]
episode: 1350000, epsilon: 0.9, loss: 0.132656, win_rate: [ 0.32776272 0.33057013 0.34166715]
episode: 1352000, epsilon: 0.9, loss: 0.0389217, win_rate: [ 0.32782002 0.33054931 0.34163066]
episode: 1354000, epsilon: 0.9, loss: 0.105668, win_rate: [ 0.32782694 0.33053373 0.34163933]
episode: 1356000, epsilon: 0.9, loss: 0.0957844, win_rate: [ 0.32783899 0.33053884 0.34162217]
episode: 1358000, epsilon: 0.9, loss: 0.0583298, win_rate: [ 0.32788636 0.33051596 0.34159769]
episode: 1360000, epsilon: 0.9, loss: 0.0839057, win_rate: [ 0.32791226 0.3305049 0.34158284]
episode: 1362000, epsilon: 0.9, loss: 0.063126, win_rate: [ 0.32793001 0.33049388 0.34157611]
episode: 1364000, epsilon: 0.9, loss: 0.151012, win_rate: [ 0.32796237 0.33048143 0.3415562 ]
episode: 1366000, epsilon: 0.9, loss: 0.1048, win_rate: [ 0.32797926 0.33047926 0.34154148]
episode: 1368000, epsilon: 0.9, loss: 0.143023, win_rate: [ 0.32801365 0.33045005 0.3415363 ]
episode: 1370000, epsilon: 0.9, loss: 0.0938775, win_rate: [ 0.32806107 0.3304472 0.34149172]
episode: 1372000, epsilon: 0.9, loss: 0.0361067, win_rate: [ 0.32806755 0.33043781 0.34149465]
episode: 1374000, epsilon: 0.9, loss: 0.1212, win_rate: [ 0.32807618 0.33044736 0.34147646]
episode: 1376000, epsilon: 0.9, loss: 0.16099, win_rate: [ 0.32810369 0.33041982 0.3414765 ]
episode: 1378000, epsilon: 0.9, loss: 0.0868403, win_rate: [ 0.32812966 0.33043009 0.34144025]
episode: 1380000, epsilon: 0.9, loss: 0.0384476, win_rate: [ 0.32812585 0.33042585 0.3414483 ]
episode: 1382000, epsilon: 0.9, loss: 0.0260155, win_rate: [ 0.32813001 0.3304303 0.3414397 ]
episode: 1384000, epsilon: 0.9, loss: 0.103647, win_rate: [ 0.32816739 0.33042317 0.34140944]
episode: 1386000, epsilon: 0.9, loss: 0.0384848, win_rate: [ 0.32820106 0.33039154 0.3414074 ]
episode: 1388000, epsilon: 0.9, loss: 0.0797451, win_rate: [ 0.32825409 0.33038161 0.34136431]
episode: 1390000, epsilon: 0.9, loss: 0.0524445, win_rate: [ 0.32826955 0.33038178 0.34134868]
episode: 1392000, epsilon: 0.9, loss: 0.105575, win_rate: [ 0.32829143 0.33035537 0.3413532 ]
episode: 1394000, epsilon: 0.9, loss: 0.0964302, win_rate: [ 0.32832258 0.33036849 0.34130894]
episode: 1396000, epsilon: 0.9, loss: 0.660192, win_rate: [ 0.32837441 0.33034933 0.34127626]
episode: 1398000, epsilon: 0.9, loss: 0.440172, win_rate: [ 0.32842108 0.33032594 0.34125297]
episode: 1400000, epsilon: 0.9, loss: 0.401245, win_rate: [ 0.32849905 0.33032405 0.3411769 ]
episode: 1402000, epsilon: 0.9, loss: 0.138815, win_rate: [ 0.32858821 0.33029292 0.34111887]
episode: 1404000, epsilon: 0.9, loss: 0.199474, win_rate: [ 0.32867569 0.33026686 0.34105745]
episode: 1406000, epsilon: 0.9, loss: 0.0616103, win_rate: [ 0.32873945 0.33026292 0.34099762]
episode: 1408000, epsilon: 0.9, loss: 0.264443, win_rate: [ 0.32879522 0.33024124 0.34096354]
episode: 1410000, epsilon: 0.9, loss: 0.220737, win_rate: [ 0.32885509 0.33022033 0.34092458]
episode: 1412000, epsilon: 0.9, loss: 0.414904, win_rate: [ 0.32892611 0.33018744 0.34088644]
episode: 1414000, epsilon: 0.9, loss: 0.133153, win_rate: [ 0.32896653 0.33016243 0.34087105]
episode: 1416000, epsilon: 0.9, loss: 0.0521375, win_rate: [ 0.32902237 0.33012406 0.34085357]
episode: 1418000, epsilon: 0.9, loss: 0.101729, win_rate: [ 0.32905195 0.3301126 0.34083544]
episode: 1420000, epsilon: 0.9, loss: 0.0727152, win_rate: [ 0.32907723 0.33011878 0.34080399]
episode: 1422000, epsilon: 0.9, loss: 0.0580982, win_rate: [ 0.32911791 0.33009892 0.34078316]
episode: 1424000, epsilon: 0.9, loss: 0.0896811, win_rate: [ 0.32909176 0.33011915 0.34078909]
episode: 1426000, epsilon: 0.9, loss: 0.0547686, win_rate: [ 0.32909935 0.33010917 0.34079149]
episode: 1428000, epsilon: 0.9, loss: 0.203545, win_rate: [ 0.32911181 0.3300873 0.34080088]
episode: 1430000, epsilon: 0.9, loss: 0.0455078, win_rate: [ 0.32914872 0.33005711 0.34079417]
episode: 1432000, epsilon: 0.9, loss: 0.0676725, win_rate: [ 0.32918622 0.33003818 0.3407756 ]
episode: 1434000, epsilon: 0.9, loss: 0.101674, win_rate: [ 0.32921386 0.33002836 0.34075778]
episode: 1436000, epsilon: 0.9, loss: 0.111394, win_rate: [ 0.32925465 0.33001509 0.34073026]
episode: 1438000, epsilon: 0.9, loss: 0.114793, win_rate: [ 0.32925707 0.33001855 0.34072438]
episode: 1440000, epsilon: 0.9, loss: 0.084687, win_rate: [ 0.32927477 0.3299956 0.34072962]
episode: 1442000, epsilon: 0.9, loss: 0.0312413, win_rate: [ 0.32928549 0.32999076 0.34072376]
episode: 1444000, epsilon: 0.9, loss: 0.0771281, win_rate: [ 0.32932595 0.32997276 0.34070129]
episode: 1446000, epsilon: 0.9, loss: 0.0480501, win_rate: [ 0.32934555 0.32996519 0.34068925]
episode: 1448000, epsilon: 0.9, loss: 0.0368343, win_rate: [ 0.32936787 0.329939 0.34069313]
episode: 1450000, epsilon: 0.9, loss: 0.0611446, win_rate: [ 0.32939908 0.32992322 0.3406777 ]
episode: 1452000, epsilon: 0.9, loss: 0.185039, win_rate: [ 0.32942746 0.32990817 0.34066437]
episode: 1454000, epsilon: 0.9, loss: 0.127926, win_rate: [ 0.32947364 0.32989936 0.340627 ]
episode: 1456000, epsilon: 0.9, loss: 0.296776, win_rate: [ 0.32949291 0.32989675 0.34061034]
episode: 1458000, epsilon: 0.9, loss: 0.23728, win_rate: [ 0.329508 0.32988592 0.34060608]
episode: 1460000, epsilon: 0.9, loss: 0.140718, win_rate: [ 0.32952238 0.32989566 0.34058196]
episode: 1462000, epsilon: 0.9, loss: 0.137101, win_rate: [ 0.32954765 0.32989581 0.34055654]
episode: 1464000, epsilon: 0.9, loss: 0.195209, win_rate: [ 0.32957901 0.32988161 0.34053938]
episode: 1466000, epsilon: 0.9, loss: 0.111786, win_rate: [ 0.32960892 0.32986403 0.34052705]
episode: 1468000, epsilon: 0.9, loss: 0.138932, win_rate: [ 0.32963057 0.32982811 0.34054132]
episode: 1470000, epsilon: 0.9, loss: 0.143663, win_rate: [ 0.32967937 0.32979773 0.3405229 ]
episode: 1472000, epsilon: 0.9, loss: 0.211779, win_rate: [ 0.32973279 0.32976676 0.34050045]
episode: 1474000, epsilon: 0.9, loss: 0.149264, win_rate: [ 0.32975419 0.3297569 0.34048891]
episode: 1476000, epsilon: 0.9, loss: 0.065541, win_rate: [ 0.3298094 0.32972403 0.34046657]
episode: 1478000, epsilon: 0.9, loss: 0.249781, win_rate: [ 0.32984281 0.32971223 0.34044497]
episode: 1480000, epsilon: 0.9, loss: 0.276795, win_rate: [ 0.3298741 0.32969707 0.34042882]
episode: 1482000, epsilon: 0.9, loss: 0.227075, win_rate: [ 0.32994512 0.32968601 0.34036887]
episode: 1484000, epsilon: 0.9, loss: 0.0795458, win_rate: [ 0.32999506 0.32964803 0.34035691]
episode: 1486000, epsilon: 0.9, loss: 0.0401089, win_rate: [ 0.33005025 0.32960543 0.34034432]
episode: 1488000, epsilon: 0.9, loss: 0.0542639, win_rate: [ 0.33009857 0.32959521 0.34030622]
episode: 1490000, epsilon: 0.9, loss: 0.141791, win_rate: [ 0.33014542 0.32958233 0.34027225]
episode: 1492000, epsilon: 0.9, loss: 0.451235, win_rate: [ 0.33016265 0.32959093 0.34024642]
episode: 1494000, epsilon: 0.9, loss: 0.0919776, win_rate: [ 0.33018987 0.32960018 0.34020995]
episode: 1496000, epsilon: 0.9, loss: 0.343012, win_rate: [ 0.33025847 0.32960072 0.34014082]
episode: 1498000, epsilon: 0.9, loss: 0.587568, win_rate: [ 0.33031153 0.32957388 0.34011459]
episode: 1500000, epsilon: 0.9, loss: 0.150008, win_rate: [ 0.33037511 0.32955045 0.34007444]
episode: 1502000, epsilon: 0.9, loss: 0.183544, win_rate: [ 0.33044785 0.32950311 0.34004904]
episode: 1504000, epsilon: 0.9, loss: 0.134447, win_rate: [ 0.33048116 0.32949513 0.34002371]
episode: 1506000, epsilon: 0.9, loss: 0.127789, win_rate: [ 0.33052302 0.32949447 0.33998251]
episode: 1508000, epsilon: 0.9, loss: 0.193506, win_rate: [ 0.3305714 0.32947127 0.33995733]
episode: 1510000, epsilon: 0.9, loss: 0.66819, win_rate: [ 0.33062892 0.32944945 0.33992163]
episode: 1512000, epsilon: 0.9, loss: 0.511293, win_rate: [ 0.33067505 0.32945018 0.33987478]
episode: 1514000, epsilon: 0.9, loss: 0.117619, win_rate: [ 0.33073558 0.3294225 0.33984192]
episode: 1516000, epsilon: 0.9, loss: 0.0548076, win_rate: [ 0.33078474 0.32941271 0.33980255]
learning_rate = 0.0001
episode: 1518000, epsilon: 0.9, loss: 0, win_rate: [ 0.39666667 0.28333333 0.32 ]
episode: 1520000, epsilon: 0.9, loss: 0.271768, win_rate: [ 0.3773913 0.31434783 0.30826087]
episode: 1522000, epsilon: 0.9, loss: 0.203776, win_rate: [ 0.3744186 0.3155814 0.31 ]
episode: 1524000, epsilon: 0.9, loss: 0.117541, win_rate: [ 0.37507937 0.3152381 0.30968254]
episode: 1526000, epsilon: 0.9, loss: 0.372409, win_rate: [ 0.37614458 0.31843373 0.30542169]
episode: 1528000, epsilon: 0.9, loss: 0.838946, win_rate: [ 0.37621359 0.31466019 0.30912621]
episode: 1530000, epsilon: 0.9, loss: 0.267074, win_rate: [ 0.37544715 0.31585366 0.30869919]
episode: 1532000, epsilon: 0.9, loss: 0.262787, win_rate: [ 0.37265734 0.31902098 0.30832168]
episode: 1534000, epsilon: 0.9, loss: 0.383413, win_rate: [ 0.37312883 0.31588957 0.3109816 ]
episode: 1536000, epsilon: 0.9, loss: 0.193851, win_rate: [ 0.37174863 0.31590164 0.31234973]
episode: 1538000, epsilon: 0.9, loss: 0.694638, win_rate: [ 0.37408867 0.31453202 0.31137931]
episode: 1540000, epsilon: 0.9, loss: 0.393239, win_rate: [ 0.37251121 0.3167713 0.31071749]
episode: 1542000, epsilon: 0.9, loss: 0.394909, win_rate: [ 0.37386831 0.31641975 0.30971193]
episode: 1544000, epsilon: 0.9, loss: 0.174821, win_rate: [ 0.37429658 0.31596958 0.30973384]
episode: 1546000, epsilon: 0.9, loss: 0.197844, win_rate: [ 0.37459364 0.31660777 0.30879859]
episode: 1548000, epsilon: 0.9, loss: 0.160928, win_rate: [ 0.37405941 0.31706271 0.30887789]
episode: 1550000, epsilon: 0.9, loss: 0.214234, win_rate: [ 0.37532508 0.31668731 0.30798762]
episode: 1552000, epsilon: 0.9, loss: 0.303717, win_rate: [ 0.37548105 0.31629738 0.30822157]
episode: 1554000, epsilon: 0.9, loss: 0.431463, win_rate: [ 0.37752066 0.31553719 0.30694215]
episode: 1556000, epsilon: 0.9, loss: 0.154005, win_rate: [ 0.37879896 0.31331593 0.30788512]
episode: 1558000, epsilon: 0.9, loss: 0.171552, win_rate: [ 0.378933 0.31344913 0.30761787]
episode: 1560000, epsilon: 0.9, loss: 0.0993965, win_rate: [ 0.37992908 0.3127896 0.30728132]
episode: 1562000, epsilon: 0.9, loss: 0.201021, win_rate: [ 0.37923251 0.3131377 0.3076298 ]
episode: 1564000, epsilon: 0.9, loss: 0.421224, win_rate: [ 0.37928726 0.3137581 0.30695464]
episode: 1566000, epsilon: 0.9, loss: 0.120805, win_rate: [ 0.37929607 0.31397516 0.30672878]
episode: 1568000, epsilon: 0.9, loss: 0.107305, win_rate: [ 0.37942346 0.31367793 0.30689861]
episode: 1570000, epsilon: 0.9, loss: 0.0587219, win_rate: [ 0.37873805 0.31418738 0.30707457]
episode: 1572000, epsilon: 0.9, loss: 0.220529, win_rate: [ 0.3779558 0.31499079 0.30705341]
episode: 1574000, epsilon: 0.9, loss: 0.242412, win_rate: [ 0.37721137 0.31516874 0.30761989]
episode: 1576000, epsilon: 0.9, loss: 0.184262, win_rate: [ 0.37636364 0.31524871 0.30838765]
episode: 1578000, epsilon: 0.9, loss: 0.0950626, win_rate: [ 0.37573798 0.31505804 0.30920398]
episode: 1580000, epsilon: 0.9, loss: 0.20343, win_rate: [ 0.37536116 0.31483146 0.30980738]
episode: 1582000, epsilon: 0.9, loss: 0.13025, win_rate: [ 0.3748056 0.31482115 0.31037325]
episode: 1584000, epsilon: 0.9, loss: 0.0490923, win_rate: [ 0.37457014 0.31485671 0.31057315]
episode: 1586000, epsilon: 0.9, loss: 0.0573395, win_rate: [ 0.37440703 0.31487555 0.31071742]
episode: 1588000, epsilon: 0.9, loss: 0.0926311, win_rate: [ 0.37401138 0.31516358 0.31082504]
episode: 1590000, epsilon: 0.9, loss: 0.162171, win_rate: [ 0.37318119 0.31536653 0.31145228]
episode: 1592000, epsilon: 0.9, loss: 0.0699946, win_rate: [ 0.37269179 0.31522207 0.31208614]
episode: 1594000, epsilon: 0.9, loss: 0.201559, win_rate: [ 0.37218873 0.31503277 0.31277851]
episode: 1596000, epsilon: 0.9, loss: 0.163621, win_rate: [ 0.37162197 0.31526181 0.31311622]
episode: 1598000, epsilon: 0.9, loss: 0.0798386, win_rate: [ 0.37085928 0.31572852 0.3134122 ]
episode: 1600000, epsilon: 0.9, loss: 0.135935, win_rate: [ 0.37076549 0.31562576 0.31360875]
episode: 1602000, epsilon: 0.9, loss: 0.0463097, win_rate: [ 0.3709134 0.31559905 0.31348754]
episode: 1604000, epsilon: 0.9, loss: 0.279938, win_rate: [ 0.37041715 0.31543453 0.31414832]
episode: 1606000, epsilon: 0.9, loss: 0.113932, win_rate: [ 0.36983012 0.31558324 0.31458664]
episode: 1608000, epsilon: 0.9, loss: 0.176885, win_rate: [ 0.36952381 0.31578073 0.31469546]
episode: 1610000, epsilon: 0.9, loss: 0.29787, win_rate: [ 0.36937161 0.31600217 0.31462622]
episode: 1612000, epsilon: 0.9, loss: 0.498863, win_rate: [ 0.36949099 0.31583245 0.31467656]
episode: 1614000, epsilon: 0.9, loss: 0.257677, win_rate: [ 0.36970924 0.31559709 0.31469367]
episode: 1616000, epsilon: 0.9, loss: 0.295472, win_rate: [ 0.36957274 0.31547304 0.31495422]
episode: 1618000, epsilon: 0.9, loss: 0.111942, win_rate: [ 0.36936191 0.31547358 0.31516451]
episode: 1620000, epsilon: 0.9, loss: 0.142979, win_rate: [ 0.36923754 0.31542522 0.31533724]
episode: 1622000, epsilon: 0.9, loss: 0.165517, win_rate: [ 0.36889741 0.31563758 0.315465 ]
episode: 1624000, epsilon: 0.9, loss: 0.134166, win_rate: [ 0.36862653 0.31552211 0.31585136]
episode: 1626000, epsilon: 0.9, loss: 0.129667, win_rate: [ 0.36855032 0.31551247 0.31593721]
episode: 1628000, epsilon: 0.9, loss: 0.114139, win_rate: [ 0.36827743 0.31567543 0.31604714]
episode: 1630000, epsilon: 0.9, loss: 0.266667, win_rate: [ 0.36785396 0.31612645 0.31601959]
episode: 1632000, epsilon: 0.9, loss: 0.0479422, win_rate: [ 0.36776903 0.31622922 0.31600175]
episode: 1634000, epsilon: 0.9, loss: 0.0508065, win_rate: [ 0.36769561 0.31651763 0.31578676]
episode: 1636000, epsilon: 0.9, loss: 0.130412, win_rate: [ 0.36797126 0.3162891 0.31573964]
episode: 1638000, epsilon: 0.9, loss: 0.202116, win_rate: [ 0.36788861 0.31621779 0.3158936 ]
episode: 1640000, epsilon: 0.9, loss: 0.0633848, win_rate: [ 0.36780049 0.31615699 0.31604252]
episode: 1642000, epsilon: 0.9, loss: 0.314197, win_rate: [ 0.36759453 0.31625101 0.31615447]
episode: 1644000, epsilon: 0.9, loss: 0.111878, win_rate: [ 0.36769596 0.31585115 0.31645289]
episode: 1646000, epsilon: 0.9, loss: 0.0471003, win_rate: [ 0.36749805 0.3158145 0.31668745]
episode: 1648000, epsilon: 0.9, loss: 0.292445, win_rate: [ 0.36710668 0.31607061 0.31682272]
episode: 1650000, epsilon: 0.9, loss: 0.0798865, win_rate: [ 0.36705215 0.31606198 0.31688587]
episode: 1652000, epsilon: 0.9, loss: 0.138114, win_rate: [ 0.36708861 0.31576322 0.31714818]
episode: 1654000, epsilon: 0.9, loss: 0.315876, win_rate: [ 0.36717535 0.31562729 0.31719736]
episode: 1656000, epsilon: 0.9, loss: 0.171169, win_rate: [ 0.36699928 0.31540853 0.31759219]
episode: 1658000, epsilon: 0.9, loss: 0.28435, win_rate: [ 0.36685674 0.31547398 0.31766928]
episode: 1660000, epsilon: 0.9, loss: 0.0685767, win_rate: [ 0.36645116 0.31550246 0.31804638]
episode: 1662000, epsilon: 0.9, loss: 0.109366, win_rate: [ 0.36604297 0.31550243 0.31845461]
episode: 1664000, epsilon: 0.9, loss: 0.131903, win_rate: [ 0.36588517 0.31558442 0.31853042]
episode: 1666000, epsilon: 0.9, loss: 0.0375194, win_rate: [ 0.36572488 0.3158058 0.31846932]
episode: 1668000, epsilon: 0.9, loss: 0.0401955, win_rate: [ 0.36566201 0.31580173 0.31853626]
episode: 1670000, epsilon: 0.9, loss: 0.105899, win_rate: [ 0.3657912 0.31575181 0.31845699]
episode: 1672000, epsilon: 0.9, loss: 0.0251195, win_rate: [ 0.36550227 0.31577447 0.31872327]
episode: 1674000, epsilon: 0.9, loss: 0.0564443, win_rate: [ 0.36548305 0.3156238 0.31889315]
episode: 1676000, epsilon: 0.9, loss: 0.0894084, win_rate: [ 0.36509792 0.3156475 0.31925458]
episode: 1678000, epsilon: 0.9, loss: 0.151455, win_rate: [ 0.36530256 0.31557704 0.3191204 ]
episode: 1680000, epsilon: 0.9, loss: 0.171882, win_rate: [ 0.36548367 0.31556377 0.31895256]
episode: 1682000, epsilon: 0.9, loss: 0.0587437, win_rate: [ 0.36537432 0.31550822 0.31911747]
episode: 1684000, epsilon: 0.9, loss: 0.196431, win_rate: [ 0.36549008 0.31546001 0.31904991]
episode: 1686000, epsilon: 0.9, loss: 0.129804, win_rate: [ 0.36551396 0.3154486 0.31903743]
episode: 1688000, epsilon: 0.9, loss: 0.0954819, win_rate: [ 0.36531415 0.31564885 0.31903699]
episode: 1690000, epsilon: 0.9, loss: 0.0985823, win_rate: [ 0.36512478 0.31583285 0.31904237]
episode: 1692000, epsilon: 0.9, loss: 0.109748, win_rate: [ 0.36508319 0.31596672 0.31895009]
episode: 1694000, epsilon: 0.9, loss: 0.164812, win_rate: [ 0.36457175 0.31633012 0.31909813]
episode: 1696000, epsilon: 0.9, loss: 0.0174163, win_rate: [ 0.36443634 0.31632081 0.31924285]
episode: 1698000, epsilon: 0.9, loss: 0.0357966, win_rate: [ 0.36426511 0.31642818 0.31930671]
episode: 1700000, epsilon: 0.9, loss: 0.122127, win_rate: [ 0.36443225 0.31629183 0.31927592]
episode: 1702000, epsilon: 0.9, loss: 0.199405, win_rate: [ 0.36436788 0.31628323 0.31934889]
episode: 1704000, epsilon: 0.9, loss: 0.0911045, win_rate: [ 0.36439077 0.31624799 0.31936125]
episode: 1706000, epsilon: 0.9, loss: 0.0538207, win_rate: [ 0.36431227 0.31631439 0.31937334]
episode: 1708000, epsilon: 0.9, loss: 0.106255, win_rate: [ 0.36423542 0.31644246 0.31932212]
episode: 1710000, epsilon: 0.9, loss: 0.0608051, win_rate: [ 0.36426937 0.31624025 0.31949038]
episode: 1712000, epsilon: 0.9, loss: 0.282458, win_rate: [ 0.36427689 0.31613999 0.31958312]
episode: 1714000, epsilon: 0.9, loss: 0.277402, win_rate: [ 0.36412634 0.31624045 0.31963321]
episode: 1716000, epsilon: 0.9, loss: 0.0979924, win_rate: [ 0.36420071 0.31609682 0.31970247]
episode: 1718000, epsilon: 0.9, loss: 0.0389021, win_rate: [ 0.36429855 0.316001 0.31970045]
episode: 1720000, epsilon: 0.9, loss: 0.109107, win_rate: [ 0.36451804 0.31593673 0.31954523]
episode: 1722000, epsilon: 0.9, loss: 0.0496547, win_rate: [ 0.36456192 0.31586882 0.31956926]
episode: 1724000, epsilon: 0.9, loss: 0.151681, win_rate: [ 0.36449346 0.31571013 0.31979641]
episode: 1726000, epsilon: 0.9, loss: 0.0802187, win_rate: [ 0.36451752 0.31567451 0.31980797]
episode: 1728000, epsilon: 0.9, loss: 0.268461, win_rate: [ 0.36440799 0.31573466 0.31985735]
episode: 1730000, epsilon: 0.9, loss: 0.0275447, win_rate: [ 0.36450306 0.31564767 0.31984927]
episode: 1732000, epsilon: 0.9, loss: 0.433142, win_rate: [ 0.36448437 0.31559963 0.31991601]
episode: 1734000, epsilon: 0.9, loss: 0.0565796, win_rate: [ 0.36442441 0.31576514 0.31981045]
episode: 1736000, epsilon: 0.9, loss: 0.102539, win_rate: [ 0.36434265 0.31573523 0.31992213]
episode: 1738000, epsilon: 0.9, loss: 0.0571739, win_rate: [ 0.36425783 0.31588743 0.31985474]
episode: 1740000, epsilon: 0.9, loss: 0.428564, win_rate: [ 0.36408007 0.31613135 0.31978857]
episode: 1742000, epsilon: 0.9, loss: 0.0735125, win_rate: [ 0.36425769 0.31592956 0.31981275]
episode: 1744000, epsilon: 0.9, loss: 0.176237, win_rate: [ 0.36422448 0.31610252 0.319673 ]
episode: 1746000, epsilon: 0.9, loss: 0.0640948, win_rate: [ 0.36420061 0.31618484 0.31961454]
episode: 1748000, epsilon: 0.9, loss: 0.0436814, win_rate: [ 0.36428571 0.31591837 0.31979592]
episode: 1750000, epsilon: 0.9, loss: 0.141155, win_rate: [ 0.36436935 0.3158545 0.31977615]
episode: 1752000, epsilon: 0.9, loss: 0.488514, win_rate: [ 0.36438754 0.31580452 0.31980794]
episode: 1754000, epsilon: 0.9, loss: 0.118051, win_rate: [ 0.36439695 0.3157427 0.31986035]
episode: 1756000, epsilon: 0.9, loss: 0.0881868, win_rate: [ 0.36444398 0.31572388 0.31983214]
episode: 1758000, epsilon: 0.9, loss: 0.0655707, win_rate: [ 0.36454016 0.31565959 0.31980025]
episode: 1760000, epsilon: 0.9, loss: 0.160123, win_rate: [ 0.36459348 0.31572431 0.31968221]
episode: 1762000, epsilon: 0.9, loss: 0.0511614, win_rate: [ 0.36468277 0.31553418 0.31978305]
episode: 1764000, epsilon: 0.9, loss: 0.102161, win_rate: [ 0.36482745 0.31549736 0.31967519]
episode: 1766000, epsilon: 0.9, loss: 0.156944, win_rate: [ 0.36475634 0.31550946 0.31973419]
episode: 1768000, epsilon: 0.9, loss: 0.0964088, win_rate: [ 0.36471434 0.3156692 0.31961646]
episode: 1770000, epsilon: 0.9, loss: 0.245925, win_rate: [ 0.36473246 0.3157828 0.31948474]
episode: 1772000, epsilon: 0.9, loss: 0.347117, win_rate: [ 0.36472277 0.31582776 0.31944947]
episode: 1774000, epsilon: 0.9, loss: 0.111248, win_rate: [ 0.36474054 0.31577448 0.31948498]
episode: 1776000, epsilon: 0.9, loss: 0.094318, win_rate: [ 0.36464576 0.31572977 0.31962447]
episode: 1778000, epsilon: 0.9, loss: 0.488849, win_rate: [ 0.36460238 0.3155705 0.31982712]
episode: 1780000, epsilon: 0.9, loss: 0.0401163, win_rate: [ 0.3647236 0.3153679 0.3199085]
episode: 1782000, epsilon: 0.9, loss: 0.161964, win_rate: [ 0.36478244 0.3153235 0.31989406]
episode: 1784000, epsilon: 0.9, loss: 0.0703916, win_rate: [ 0.36464138 0.31533609 0.32002253]
episode: 1786000, epsilon: 0.9, loss: 0.117756, win_rate: [ 0.36469624 0.31538949 0.31991428]
episode: 1788000, epsilon: 0.9, loss: 0.768996, win_rate: [ 0.36472438 0.3154458 0.31982982]
episode: 1790000, epsilon: 0.9, loss: 0.197246, win_rate: [ 0.3647264 0.31534704 0.31992655]
episode: 1792000, epsilon: 0.9, loss: 0.174123, win_rate: [ 0.36462268 0.31542836 0.31994896]
episode: 1794000, epsilon: 0.9, loss: 0.0557632, win_rate: [ 0.36467608 0.31540717 0.31991676]
episode: 1796000, epsilon: 0.9, loss: 0.069438, win_rate: [ 0.36467481 0.31531082 0.32001437]
episode: 1798000, epsilon: 0.9, loss: 0.0502161, win_rate: [ 0.36476632 0.31514449 0.32008919]
episode: 1800000, epsilon: 0.9, loss: 0.142659, win_rate: [ 0.36491321 0.31506199 0.3200248 ]
episode: 1802000, epsilon: 0.9, loss: 0.0907863, win_rate: [ 0.36465353 0.31533591 0.32001055]
episode: 1804000, epsilon: 0.9, loss: 0.0863359, win_rate: [ 0.36455816 0.31543137 0.32001048]
episode: 1806000, epsilon: 0.9, loss: 0.108273, win_rate: [ 0.36460284 0.31535553 0.32004162]
episode: 1808000, epsilon: 0.9, loss: 0.0247519, win_rate: [ 0.36444368 0.31537375 0.32018257]
episode: 1810000, epsilon: 0.9, loss: 0.0219992, win_rate: [ 0.36437906 0.3154362 0.32018474]
episode: 1812000, epsilon: 0.9, loss: 0.076335, win_rate: [ 0.36413863 0.31558953 0.32027183]
episode: 1814000, epsilon: 0.9, loss: 0.0567257, win_rate: [ 0.36403645 0.31563618 0.32032737]
episode: 1816000, epsilon: 0.9, loss: 0.236187, win_rate: [ 0.36372779 0.31570902 0.32056319]
episode: 1818000, epsilon: 0.9, loss: 0.117301, win_rate: [ 0.36368298 0.31587746 0.32043956]
episode: 1820000, epsilon: 0.9, loss: 0.105495, win_rate: [ 0.36368177 0.31588819 0.32043004]
episode: 1822000, epsilon: 0.9, loss: 0.0317167, win_rate: [ 0.36361157 0.31589878 0.32048965]
episode: 1824000, epsilon: 0.9, loss: 0.256244, win_rate: [ 0.36351616 0.31591903 0.32056481]
episode: 1826000, epsilon: 0.9, loss: 0.0709017, win_rate: [ 0.36345443 0.31593578 0.3206098 ]
episode: 1828000, epsilon: 0.9, loss: 0.0558197, win_rate: [ 0.36320657 0.31608443 0.32070899]
episode: 1830000, epsilon: 0.9, loss: 0.277857, win_rate: [ 0.3631284 0.3160935 0.3207781]
episode: 1832000, epsilon: 0.9, loss: 0.067033, win_rate: [ 0.36293032 0.31614699 0.32092269]
episode: 1834000, epsilon: 0.9, loss: 0.0423557, win_rate: [ 0.3629276 0.31609864 0.32097376]
episode: 1836000, epsilon: 0.9, loss: 0.0785001, win_rate: [ 0.36307257 0.31608231 0.32084511]
episode: 1838000, epsilon: 0.9, loss: 0.26868, win_rate: [ 0.36313768 0.31614112 0.3207212 ]
episode: 1840000, epsilon: 0.9, loss: 0.14206, win_rate: [ 0.36311511 0.31619919 0.3206857 ]
episode: 1842000, epsilon: 0.9, loss: 0.0585054, win_rate: [ 0.36292939 0.31638915 0.32068147]
episode: 1844000, epsilon: 0.9, loss: 0.282725, win_rate: [ 0.36297579 0.31633466 0.32068955]
episode: 1846000, epsilon: 0.9, loss: 0.0894541, win_rate: [ 0.36293329 0.31628693 0.32077977]
episode: 1848000, epsilon: 0.9, loss: 0.111059, win_rate: [ 0.36296397 0.31634575 0.32069028]
episode: 1850000, epsilon: 0.9, loss: 0.119615, win_rate: [ 0.36301234 0.31633163 0.32065603]
episode: 1852000, epsilon: 0.9, loss: 0.0815712, win_rate: [ 0.36298534 0.3163117 0.32070296]
episode: 1854000, epsilon: 0.9, loss: 0.032212, win_rate: [ 0.36296759 0.31641392 0.3206185 ]
episode: 1856000, epsilon: 0.9, loss: 0.0587608, win_rate: [ 0.36303281 0.31640556 0.32056163]
episode: 1858000, epsilon: 0.9, loss: 0.301586, win_rate: [ 0.36303262 0.31636497 0.32060241]
episode: 1860000, epsilon: 0.9, loss: 0.261731, win_rate: [ 0.3630967 0.31635408 0.32054923]
episode: 1862000, epsilon: 0.9, loss: 0.0796917, win_rate: [ 0.3632704 0.31624455 0.32048504]
episode: 1864000, epsilon: 0.9, loss: 0.303258, win_rate: [ 0.36330927 0.31615362 0.32053711]
episode: 1866000, epsilon: 0.9, loss: 0.0417984, win_rate: [ 0.36332759 0.31608671 0.3205857 ]
episode: 1868000, epsilon: 0.9, loss: 0.141981, win_rate: [ 0.36327434 0.31615187 0.32057379]
episode: 1870000, epsilon: 0.9, loss: 0.115921, win_rate: [ 0.3632671 0.31625319 0.3204797 ]
episode: 1872000, epsilon: 0.9, loss: 0.764199, win_rate: [ 0.36333333 0.31618402 0.32048264]
episode: 1874000, epsilon: 0.9, loss: 0.135364, win_rate: [ 0.36330059 0.31612125 0.32057816]
episode: 1876000, epsilon: 0.9, loss: 0.0612705, win_rate: [ 0.36335752 0.31615964 0.32048284]
episode: 1878000, epsilon: 0.9, loss: 0.0445548, win_rate: [ 0.36341382 0.31620039 0.32038579]
episode: 1880000, epsilon: 0.9, loss: 0.07744, win_rate: [ 0.36332597 0.31625449 0.32041954]
episode: 1882000, epsilon: 0.9, loss: 0.0353805, win_rate: [ 0.36331595 0.31614329 0.32054076]
episode: 1884000, epsilon: 0.9, loss: 0.183266, win_rate: [ 0.36329511 0.31619165 0.32051324]
episode: 1886000, epsilon: 0.9, loss: 0.123042, win_rate: [ 0.36333152 0.31619061 0.32047787]
episode: 1888000, epsilon: 0.9, loss: 0.0445418, win_rate: [ 0.36330543 0.31613557 0.32055901]
episode: 1890000, epsilon: 0.9, loss: 0.0726518, win_rate: [ 0.36326887 0.31620199 0.32052914]
episode: 1892000, epsilon: 0.9, loss: 0.374038, win_rate: [ 0.36328079 0.31615282 0.32056639]
episode: 1894000, epsilon: 0.9, loss: 0.26418, win_rate: [ 0.36330056 0.31611214 0.3205873 ]
episode: 1896000, epsilon: 0.9, loss: 0.149343, win_rate: [ 0.36332012 0.31611684 0.32056305]
episode: 1898000, epsilon: 0.9, loss: 0.220352, win_rate: [ 0.36319222 0.31626085 0.32054694]
episode: 1900000, epsilon: 0.9, loss: 0.326055, win_rate: [ 0.36312059 0.3162124 0.32066702]
episode: 1902000, epsilon: 0.9, loss: 0.213899, win_rate: [ 0.36318241 0.3162191 0.32059849]
episode: 1904000, epsilon: 0.9, loss: 0.171206, win_rate: [ 0.3631737 0.31619467 0.32063163]
episode: 1906000, epsilon: 0.9, loss: 0.0658701, win_rate: [ 0.36314448 0.31629925 0.32055627]
episode: 1908000, epsilon: 0.9, loss: 0.213199, win_rate: [ 0.36317704 0.31634896 0.32047399]
episode: 1910000, epsilon: 0.9, loss: 0.0911226, win_rate: [ 0.36311241 0.31641856 0.32046903]
episode: 1912000, epsilon: 0.9, loss: 0.295585, win_rate: [ 0.36314228 0.31634035 0.32051737]
episode: 1914000, epsilon: 0.9, loss: 0.0531822, win_rate: [ 0.36311128 0.3163462 0.32054252]
episode: 1916000, epsilon: 0.9, loss: 0.0795867, win_rate: [ 0.36309566 0.31644238 0.32046196]
episode: 1918000, epsilon: 0.9, loss: 0.071706, win_rate: [ 0.36300525 0.31654259 0.32045216]
episode: 1920000, epsilon: 0.9, loss: 0.0346108, win_rate: [ 0.36291822 0.31649018 0.3205916 ]
episode: 1922000, epsilon: 0.9, loss: 0.0286025, win_rate: [ 0.362884 0.31644324 0.32067277]
episode: 1924000, epsilon: 0.9, loss: 0.108578, win_rate: [ 0.3628698 0.3164066 0.3207236]
episode: 1926000, epsilon: 0.9, loss: 0.0697566, win_rate: [ 0.36286554 0.31634337 0.32079108]
episode: 1928000, epsilon: 0.9, loss: 0.0515931, win_rate: [ 0.36281258 0.31635876 0.32082866]
episode: 1930000, epsilon: 0.9, loss: 0.146385, win_rate: [ 0.36291778 0.31627941 0.32080281]
episode: 1932000, epsilon: 0.9, loss: 0.130552, win_rate: [ 0.36298335 0.31619117 0.32082549]
episode: 1934000, epsilon: 0.9, loss: 0.116463, win_rate: [ 0.36304588 0.31621427 0.32073985]
episode: 1936000, epsilon: 0.9, loss: 0.283969, win_rate: [ 0.3630098 0.31625149 0.3207387 ]
episode: 1938000, epsilon: 0.9, loss: 0.0865868, win_rate: [ 0.36306924 0.31617178 0.32075898]
episode: 1940000, epsilon: 0.9, loss: 0.0557935, win_rate: [ 0.3631068 0.31611414 0.32077907]
episode: 1942000, epsilon: 0.9, loss: 0.203831, win_rate: [ 0.36308744 0.31610653 0.32080603]
episode: 1944000, epsilon: 0.9, loss: 0.174596, win_rate: [ 0.36308937 0.31610368 0.32080694]
episode: 1946000, epsilon: 0.9, loss: 0.110832, win_rate: [ 0.36303292 0.3161102 0.32085688]
episode: 1948000, epsilon: 0.9, loss: 0.0461634, win_rate: [ 0.36312805 0.31606554 0.32080641]
episode: 1950000, epsilon: 0.9, loss: 0.178508, win_rate: [ 0.36310664 0.31611612 0.32077724]
episode: 1952000, epsilon: 0.9, loss: 0.0582016, win_rate: [ 0.36314299 0.31602579 0.32083122]
episode: 1954000, epsilon: 0.9, loss: 0.115294, win_rate: [ 0.36316525 0.31595233 0.32088242]
episode: 1956000, epsilon: 0.9, loss: 0.0483957, win_rate: [ 0.36312343 0.31595483 0.32092174]
episode: 1958000, epsilon: 0.9, loss: 0.0341936, win_rate: [ 0.36318419 0.31591869 0.32089712]
episode: 1960000, epsilon: 0.9, loss: 0.133591, win_rate: [ 0.36331223 0.31584219 0.32084558]
episode: 1962000, epsilon: 0.9, loss: 0.0936955, win_rate: [ 0.36331308 0.31586316 0.32082377]
episode: 1964000, epsilon: 0.9, loss: 0.112547, win_rate: [ 0.36326462 0.31587273 0.32086265]
episode: 1966000, epsilon: 0.9, loss: 0.0504683, win_rate: [ 0.36322329 0.31585768 0.32091903]
episode: 1968000, epsilon: 0.9, loss: 0.0981208, win_rate: [ 0.36313791 0.31592716 0.32093493]
episode: 1970000, epsilon: 0.9, loss: 0.0804626, win_rate: [ 0.36313288 0.31585231 0.32101481]
episode: 1972000, epsilon: 0.9, loss: 0.165911, win_rate: [ 0.3631433 0.31580453 0.32105217]
episode: 1974000, epsilon: 0.9, loss: 0.219728, win_rate: [ 0.36316897 0.31583388 0.32099715]
episode: 1976000, epsilon: 0.9, loss: 0.185247, win_rate: [ 0.3632206 0.31581279 0.32096662]
episode: 1978000, epsilon: 0.9, loss: 0.175881, win_rate: [ 0.36318053 0.31586357 0.3209559 ]
episode: 1980000, epsilon: 0.9, loss: 0.11631, win_rate: [ 0.36325546 0.31582955 0.32091499]
episode: 1982000, epsilon: 0.9, loss: 0.130356, win_rate: [ 0.36328667 0.31580013 0.3209132 ]
episode: 1984000, epsilon: 0.9, loss: 0.0408855, win_rate: [ 0.36332404 0.31574737 0.32092859]
episode: 1986000, epsilon: 0.9, loss: 0.151832, win_rate: [ 0.36329063 0.31571429 0.32099509]
episode: 1988000, epsilon: 0.9, loss: 0.0768109, win_rate: [ 0.36329152 0.315707 0.32100149]
episode: 1990000, epsilon: 0.9, loss: 0.0918728, win_rate: [ 0.36323311 0.31576117 0.32100572]