diff --git a/.github/designs/blastoise/t1emu.json b/.github/designs/blastoise/t1emu.json index bb5b4fdda..c970c3f93 100644 --- a/.github/designs/blastoise/t1emu.json +++ b/.github/designs/blastoise/t1emu.json @@ -1,11 +1,11 @@ { "pytorch.demo": 77, - "pytorch.lenet": 1116581, + "pytorch.lenet": 1117417, "pytorch.matmul": 14556, - "mlir.rvv_vp_intrinsic_add": 448, + "mlir.rvv_vp_intrinsic_add": 445, "mlir.rvv_vp_intrinsic_add_scalable": 729, "mlir.hello": 137, - "mlir.stripmining": 8869, + "mlir.stripmining": 8871, "asm.mmm": 50444, "asm.smoke": 7991, "intrinsic.conv2d_less_m2": 2647, @@ -13,11 +13,11 @@ "intrinsic.softmax": 7032, "rvv_bench.ascii_to_utf16": 11282, "rvv_bench.ascii_to_utf32": 4694, - "rvv_bench.byteswap": 19955, - "rvv_bench.mandelbrot": 247231, + "rvv_bench.byteswap": 19954, + "rvv_bench.mandelbrot": 230851, "rvv_bench.memcpy": 34534, "rvv_bench.memset": 11501, - "rvv_bench.mergelines": 24911, + "rvv_bench.mergelines": 24842, "rvv_bench.strlen": 22697, - "rvv_bench.utf8_count": 151124 + "rvv_bench.utf8_count": 151155 } \ No newline at end of file diff --git a/.github/designs/blastoise/t1rocketemu.json b/.github/designs/blastoise/t1rocketemu.json index 5c41e56cb..4941279e3 100644 --- a/.github/designs/blastoise/t1rocketemu.json +++ b/.github/designs/blastoise/t1rocketemu.json @@ -5,30 +5,30 @@ "asm.strlen": 7986, "asm.utf8_count": 205, "codegen.vaadd_vv": 170452, - "codegen.vaadd_vx": 490423, + "codegen.vaadd_vx": 539363, "codegen.vaaddu_vv": 170452, - "codegen.vaaddu_vx": 490423, - "codegen.vadc_vim": 45186, + "codegen.vaaddu_vx": 539363, + "codegen.vadc_vim": 45919, "codegen.vadc_vvm": 21491, - "codegen.vadc_vxm": 61706, - "codegen.vadd_vi": 89522, + "codegen.vadc_vxm": 65655, + "codegen.vadd_vi": 92286, "codegen.vadd_vv": 43282, - "codegen.vadd_vx": 123663, - "codegen.vand_vi": 89612, + "codegen.vadd_vx": 135925, + "codegen.vand_vi": 92591, "codegen.vand_vv": 43282, - "codegen.vand_vx": 123814, + "codegen.vand_vx": 136518, "codegen.vasub_vv": 170452, - "codegen.vasub_vx": 490423, + "codegen.vasub_vx": 539363, "codegen.vasubu_vv": 170452, - "codegen.vasubu_vx": 490423, - "codegen.vcompress_vm": 19341, + "codegen.vasubu_vx": 539363, + "codegen.vcompress_vm": 20612, "codegen.vcpop_m": 3603, "codegen.vdiv_vv": 70806, - "codegen.vdiv_vx": 595233, + "codegen.vdiv_vx": 600887, "codegen.vdivu_vv": 71250, - "codegen.vdivu_vx": 627278, - "codegen.vfadd_vf": 659445, - "codegen.vfadd_vv": 152058, + "codegen.vdivu_vx": 632468, + "codegen.vfadd_vf": 739578, + "codegen.vfadd_vv": 154009, "codegen.vfclass_v": 12627, "codegen.vfcvt_f_x_v": 12623, "codegen.vfcvt_f_xu_v": 12623, @@ -36,132 +36,132 @@ "codegen.vfcvt_rtz_xu_f_v": 12623, "codegen.vfcvt_x_f_v": 12623, "codegen.vfcvt_xu_f_v": 12623, - "codegen.vfdiv_vf": 2322335, - "codegen.vfdiv_vv": 302637, + "codegen.vfdiv_vf": 2361323, + "codegen.vfdiv_vv": 321742, "codegen.vfirst_m": 3474, - "codegen.vfmacc_vf": 697013, - "codegen.vfmacc_vv": 152821, - "codegen.vfmadd_vf": 697013, - "codegen.vfmadd_vv": 152821, - "codegen.vfmax_vf": 659445, - "codegen.vfmax_vv": 152058, - "codegen.vfmerge_vfm": 496779, - "codegen.vfmin_vf": 659445, - "codegen.vfmin_vv": 152058, - "codegen.vfmsac_vf": 697013, - "codegen.vfmsac_vv": 152821, - "codegen.vfmsub_vf": 697013, - "codegen.vfmsub_vv": 152821, - "codegen.vfmul_vf": 659445, - "codegen.vfmul_vv": 152058, + "codegen.vfmacc_vf": 722309, + "codegen.vfmacc_vv": 162059, + "codegen.vfmadd_vf": 864949, + "codegen.vfmadd_vv": 162059, + "codegen.vfmax_vf": 678972, + "codegen.vfmax_vv": 161707, + "codegen.vfmerge_vfm": 499920, + "codegen.vfmin_vf": 739578, + "codegen.vfmin_vv": 161707, + "codegen.vfmsac_vf": 864949, + "codegen.vfmsac_vv": 162059, + "codegen.vfmsub_vf": 864949, + "codegen.vfmsub_vv": 162059, + "codegen.vfmul_vf": 739578, + "codegen.vfmul_vv": 160038, "codegen.vfmv_f_s": 13054, - "codegen.vfmv_s_f": 1818, - "codegen.vfmv_v_f": 4141, - "codegen.vfnmacc_vf": 697013, - "codegen.vfnmacc_vv": 152821, - "codegen.vfnmadd_vf": 697013, - "codegen.vfnmadd_vv": 152821, - "codegen.vfnmsac_vf": 697013, - "codegen.vfnmsac_vv": 152821, - "codegen.vfnmsub_vf": 697078, - "codegen.vfnmsub_vv": 152821, - "codegen.vfrdiv_vf": 2322335, + "codegen.vfmv_s_f": 1920, + "codegen.vfmv_v_f": 4269, + "codegen.vfnmacc_vf": 808049, + "codegen.vfnmacc_vv": 162059, + "codegen.vfnmadd_vf": 779209, + "codegen.vfnmadd_vv": 162059, + "codegen.vfnmsac_vf": 779209, + "codegen.vfnmsac_vv": 162059, + "codegen.vfnmsub_vf": 779209, + "codegen.vfnmsub_vv": 162059, + "codegen.vfrdiv_vf": 2361323, "codegen.vfrec7_v": 12751, - "codegen.vfredmax_vs": 225887, - "codegen.vfredmin_vs": 225887, - "codegen.vfredosum_vs": 309223, - "codegen.vfredusum_vs": 225887, + "codegen.vfredmax_vs": 242694, + "codegen.vfredmin_vs": 237306, + "codegen.vfredosum_vs": 324079, + "codegen.vfredusum_vs": 242694, "codegen.vfrsqrt7_v": 12623, - "codegen.vfrsub_vf": 659445, - "codegen.vfsgnj_vf": 557860, - "codegen.vfsgnj_vv": 152058, - "codegen.vfsgnjn_vf": 557860, - "codegen.vfsgnjn_vv": 152058, - "codegen.vfsgnjx_vf": 557860, - "codegen.vfsgnjx_vv": 153048, - "codegen.vfslide1down_vf": 1030070, - "codegen.vfslide1up_vf": 1081477, + "codegen.vfrsub_vf": 678972, + "codegen.vfsgnj_vf": 626528, + "codegen.vfsgnj_vv": 161707, + "codegen.vfsgnjn_vf": 626528, + "codegen.vfsgnjn_vv": 154009, + "codegen.vfsgnjx_vf": 626528, + "codegen.vfsgnjx_vv": 154009, + "codegen.vfslide1down_vf": 1046438, + "codegen.vfslide1up_vf": 1087754, "codegen.vfsqrt_v": 29261, - "codegen.vfsub_vf": 659445, - "codegen.vfsub_vv": 152118, + "codegen.vfsub_vf": 739578, + "codegen.vfsub_vv": 152340, "codegen.vid_v": 27213, - "codegen.viota_m": 38960, - "codegen.vl1re16_v": 1844, - "codegen.vl1re32_v": 1844, - "codegen.vl1re8_v": 1844, + "codegen.viota_m": 39261, + "codegen.vl1re16_v": 1908, + "codegen.vl1re32_v": 1908, + "codegen.vl1re8_v": 1908, "codegen.vl2re16_v": 1973, - "codegen.vl2re32_v": 1973, - "codegen.vl2re8_v": 1973, - "codegen.vl4re16_v": 2229, - "codegen.vl4re32_v": 2229, + "codegen.vl2re32_v": 2035, + "codegen.vl2re8_v": 2035, + "codegen.vl4re16_v": 2289, + "codegen.vl4re32_v": 2289, "codegen.vl4re8_v": 2449, "codegen.vl8re16_v": 2803, "codegen.vl8re32_v": 2741, "codegen.vl8re8_v": 2741, - "codegen.vle16_v": 8861, - "codegen.vle16ff_v": 25407, + "codegen.vle16_v": 9141, + "codegen.vle16ff_v": 25724, "codegen.vle32_v": 7594, - "codegen.vle32ff_v": 15259, + "codegen.vle32ff_v": 15620, "codegen.vle8_v": 10366, - "codegen.vle8ff_v": 47015, - "codegen.vlm_v": 2027, - "codegen.vloxei16_v": 70166, - "codegen.vloxei32_v": 38066, - "codegen.vloxei8_v": 109654, - "codegen.vloxseg2ei16_v": 72886, - "codegen.vloxseg2ei32_v": 39913, - "codegen.vloxseg2ei8_v": 92475, - "codegen.vloxseg3ei16_v": 50116, - "codegen.vloxseg3ei32_v": 31720, - "codegen.vloxseg3ei8_v": 69073, - "codegen.vloxseg4ei16_v": 60795, - "codegen.vloxseg4ei32_v": 36872, - "codegen.vloxseg4ei8_v": 85693, - "codegen.vloxseg5ei16_v": 36868, - "codegen.vloxseg5ei32_v": 17437, - "codegen.vloxseg5ei8_v": 49894, - "codegen.vloxseg6ei16_v": 41843, - "codegen.vloxseg6ei32_v": 19149, - "codegen.vloxseg6ei8_v": 57242, - "codegen.vloxseg7ei16_v": 46818, - "codegen.vloxseg7ei32_v": 20861, - "codegen.vloxseg7ei8_v": 64590, - "codegen.vloxseg8ei16_v": 51793, - "codegen.vloxseg8ei32_v": 22573, - "codegen.vloxseg8ei8_v": 71938, - "codegen.vlse16_v": 117039, - "codegen.vlse32_v": 67298, - "codegen.vlse8_v": 224575, - "codegen.vlseg2e16_v": 7600, - "codegen.vlseg2e32_v": 6113, - "codegen.vlseg2e8_v": 8892, - "codegen.vlseg3e16_v": 5991, - "codegen.vlseg3e32_v": 4931, - "codegen.vlseg3e8_v": 7418, + "codegen.vle8ff_v": 47441, + "codegen.vlm_v": 2029, + "codegen.vloxei16_v": 70296, + "codegen.vloxei32_v": 38176, + "codegen.vloxei8_v": 109790, + "codegen.vloxseg2ei16_v": 72994, + "codegen.vloxseg2ei32_v": 40030, + "codegen.vloxseg2ei8_v": 92582, + "codegen.vloxseg3ei16_v": 50214, + "codegen.vloxseg3ei32_v": 31833, + "codegen.vloxseg3ei8_v": 69174, + "codegen.vloxseg4ei16_v": 60893, + "codegen.vloxseg4ei32_v": 36985, + "codegen.vloxseg4ei8_v": 85794, + "codegen.vloxseg5ei16_v": 36930, + "codegen.vloxseg5ei32_v": 17456, + "codegen.vloxseg5ei8_v": 49965, + "codegen.vloxseg6ei16_v": 41905, + "codegen.vloxseg6ei32_v": 19168, + "codegen.vloxseg6ei8_v": 57313, + "codegen.vloxseg7ei16_v": 46880, + "codegen.vloxseg7ei32_v": 20880, + "codegen.vloxseg7ei8_v": 64661, + "codegen.vloxseg8ei16_v": 51855, + "codegen.vloxseg8ei32_v": 22592, + "codegen.vloxseg8ei8_v": 72009, + "codegen.vlse16_v": 117195, + "codegen.vlse32_v": 67665, + "codegen.vlse8_v": 225387, + "codegen.vlseg2e16_v": 7961, + "codegen.vlseg2e32_v": 6643, + "codegen.vlseg2e8_v": 9291, + "codegen.vlseg3e16_v": 7101, + "codegen.vlseg3e32_v": 5437, + "codegen.vlseg3e8_v": 8720, "codegen.vlseg4e16_v": 6369, "codegen.vlseg4e32_v": 5246, - "codegen.vlseg4e8_v": 7626, - "codegen.vlseg5e16_v": 5492, - "codegen.vlseg5e32_v": 3650, - "codegen.vlseg5e8_v": 7135, - "codegen.vlseg6e16_v": 5677, - "codegen.vlseg6e32_v": 3762, - "codegen.vlseg6e8_v": 7350, - "codegen.vlseg7e16_v": 5865, - "codegen.vlseg7e32_v": 3874, - "codegen.vlseg7e8_v": 7574, - "codegen.vlseg8e16_v": 6056, - "codegen.vlseg8e32_v": 3986, - "codegen.vlseg8e8_v": 7811, - "codegen.vlsseg2e16_v": 111747, - "codegen.vlsseg2e32_v": 56705, + "codegen.vlseg4e8_v": 9110, + "codegen.vlseg5e16_v": 5528, + "codegen.vlseg5e32_v": 3676, + "codegen.vlseg5e8_v": 7161, + "codegen.vlseg6e16_v": 5719, + "codegen.vlseg6e32_v": 3788, + "codegen.vlseg6e8_v": 7376, + "codegen.vlseg7e16_v": 5908, + "codegen.vlseg7e32_v": 3900, + "codegen.vlseg7e8_v": 7600, + "codegen.vlseg8e16_v": 6082, + "codegen.vlseg8e32_v": 4012, + "codegen.vlseg8e8_v": 7849, + "codegen.vlsseg2e16_v": 112253, + "codegen.vlsseg2e32_v": 57056, "codegen.vlsseg2e8_v": 204285, - "codegen.vlsseg3e16_v": 80332, - "codegen.vlsseg3e32_v": 39679, + "codegen.vlsseg3e16_v": 80712, + "codegen.vlsseg3e32_v": 39870, "codegen.vlsseg3e8_v": 162954, - "codegen.vlsseg4e16_v": 100437, + "codegen.vlsseg4e16_v": 100817, "codegen.vlsseg4e32_v": 48259, - "codegen.vlsseg4e8_v": 207563, + "codegen.vlsseg4e8_v": 208148, "codegen.vlsseg5e16_v": 64763, "codegen.vlsseg5e32_v": 23189, "codegen.vlsseg5e8_v": 134835, @@ -174,186 +174,186 @@ "codegen.vlsseg8e16_v": 93473, "codegen.vlsseg8e32_v": 31739, "codegen.vlsseg8e8_v": 200085, - "codegen.vluxei16_v": 70166, - "codegen.vluxei32_v": 38066, - "codegen.vluxei8_v": 109654, - "codegen.vluxseg2ei16_v": 72886, - "codegen.vluxseg2ei32_v": 39913, - "codegen.vluxseg2ei8_v": 92475, - "codegen.vluxseg3ei16_v": 50116, - "codegen.vluxseg3ei32_v": 31720, - "codegen.vluxseg3ei8_v": 69073, - "codegen.vluxseg4ei16_v": 60795, - "codegen.vluxseg4ei32_v": 36872, - "codegen.vluxseg4ei8_v": 85693, - "codegen.vluxseg5ei16_v": 36868, - "codegen.vluxseg5ei32_v": 17437, - "codegen.vluxseg5ei8_v": 49894, - "codegen.vluxseg6ei16_v": 41843, - "codegen.vluxseg6ei32_v": 19149, - "codegen.vluxseg6ei8_v": 57242, - "codegen.vluxseg7ei16_v": 46818, - "codegen.vluxseg7ei32_v": 20861, - "codegen.vluxseg7ei8_v": 64590, - "codegen.vluxseg8ei16_v": 51793, - "codegen.vluxseg8ei32_v": 22573, - "codegen.vluxseg8ei8_v": 71938, + "codegen.vluxei16_v": 70296, + "codegen.vluxei32_v": 38176, + "codegen.vluxei8_v": 109790, + "codegen.vluxseg2ei16_v": 72994, + "codegen.vluxseg2ei32_v": 40030, + "codegen.vluxseg2ei8_v": 92582, + "codegen.vluxseg3ei16_v": 50214, + "codegen.vluxseg3ei32_v": 31833, + "codegen.vluxseg3ei8_v": 69174, + "codegen.vluxseg4ei16_v": 60893, + "codegen.vluxseg4ei32_v": 36985, + "codegen.vluxseg4ei8_v": 85794, + "codegen.vluxseg5ei16_v": 36930, + "codegen.vluxseg5ei32_v": 17456, + "codegen.vluxseg5ei8_v": 49965, + "codegen.vluxseg6ei16_v": 41905, + "codegen.vluxseg6ei32_v": 19168, + "codegen.vluxseg6ei8_v": 57313, + "codegen.vluxseg7ei16_v": 46880, + "codegen.vluxseg7ei32_v": 20880, + "codegen.vluxseg7ei8_v": 64661, + "codegen.vluxseg8ei16_v": 51855, + "codegen.vluxseg8ei32_v": 22592, + "codegen.vluxseg8ei8_v": 72009, "codegen.vmacc_vv": 43698, - "codegen.vmacc_vx": 132377, - "codegen.vmadc_vi": 52961, - "codegen.vmadc_vim": 55596, - "codegen.vmadc_vv": 17232, - "codegen.vmadc_vvm": 20130, - "codegen.vmadc_vx": 74385, - "codegen.vmadc_vxm": 76284, + "codegen.vmacc_vx": 160470, + "codegen.vmadc_vi": 55485, + "codegen.vmadc_vim": 56097, + "codegen.vmadc_vv": 18266, + "codegen.vmadc_vvm": 20641, + "codegen.vmadc_vx": 76456, + "codegen.vmadc_vxm": 76845, "codegen.vmadd_vv": 43698, - "codegen.vmadd_vx": 132377, - "codegen.vmand_mm": 16488, - "codegen.vmandn_mm": 16488, + "codegen.vmadd_vx": 147017, + "codegen.vmand_mm": 17003, + "codegen.vmandn_mm": 17003, "codegen.vmax_vv": 43282, - "codegen.vmax_vx": 123663, + "codegen.vmax_vx": 135925, "codegen.vmaxu_vv": 43282, - "codegen.vmaxu_vx": 123663, - "codegen.vmerge_vim": 81882, - "codegen.vmerge_vvm": 26334, - "codegen.vmerge_vxm": 113256, - "codegen.vmfeq_vf": 941663, - "codegen.vmfeq_vv": 155759, - "codegen.vmfge_vf": 941663, - "codegen.vmfgt_vf": 941663, - "codegen.vmfle_vf": 941663, - "codegen.vmfle_vv": 155759, - "codegen.vmflt_vf": 941663, - "codegen.vmflt_vv": 155759, - "codegen.vmfne_vf": 941663, - "codegen.vmfne_vv": 155759, + "codegen.vmaxu_vx": 135925, + "codegen.vmerge_vim": 82193, + "codegen.vmerge_vvm": 26341, + "codegen.vmerge_vxm": 115847, + "codegen.vmfeq_vf": 955379, + "codegen.vmfeq_vv": 169709, + "codegen.vmfge_vf": 964197, + "codegen.vmfgt_vf": 960809, + "codegen.vmfle_vf": 950549, + "codegen.vmfle_vv": 169709, + "codegen.vmflt_vf": 958767, + "codegen.vmflt_vv": 160277, + "codegen.vmfne_vf": 960809, + "codegen.vmfne_vv": 155775, "codegen.vmin_vv": 43282, - "codegen.vmin_vx": 123663, + "codegen.vmin_vx": 135925, "codegen.vminu_vv": 43282, - "codegen.vminu_vx": 123663, - "codegen.vmnand_mm": 16488, - "codegen.vmnor_mm": 16488, - "codegen.vmor_mm": 16488, - "codegen.vmorn_mm": 16488, - "codegen.vmsbc_vv": 17232, - "codegen.vmsbc_vvm": 20130, - "codegen.vmsbc_vx": 74021, - "codegen.vmsbc_vxm": 76284, + "codegen.vminu_vx": 135925, + "codegen.vmnand_mm": 17003, + "codegen.vmnor_mm": 17003, + "codegen.vmor_mm": 17003, + "codegen.vmorn_mm": 17003, + "codegen.vmsbc_vv": 18266, + "codegen.vmsbc_vvm": 20641, + "codegen.vmsbc_vx": 76696, + "codegen.vmsbc_vxm": 77484, "codegen.vmsbf_m": 2675, - "codegen.vmseq_vi": 109496, - "codegen.vmseq_vv": 39180, - "codegen.vmseq_vx": 150872, - "codegen.vmsgt_vi": 109496, - "codegen.vmsgt_vv": 39180, - "codegen.vmsgt_vx": 150872, - "codegen.vmsgtu_vi": 109496, - "codegen.vmsgtu_vv": 39180, - "codegen.vmsgtu_vx": 150872, - "codegen.vmsif_m": 2675, - "codegen.vmsle_vi": 109496, - "codegen.vmsle_vv": 39180, - "codegen.vmsle_vx": 150872, - "codegen.vmsleu_vi": 109496, + "codegen.vmseq_vi": 112226, + "codegen.vmseq_vv": 40186, + "codegen.vmseq_vx": 153920, + "codegen.vmsgt_vi": 109902, + "codegen.vmsgt_vv": 40184, + "codegen.vmsgt_vx": 153920, + "codegen.vmsgtu_vi": 109902, + "codegen.vmsgtu_vv": 40184, + "codegen.vmsgtu_vx": 153920, + "codegen.vmsif_m": 2736, + "codegen.vmsle_vi": 111628, + "codegen.vmsle_vv": 40186, + "codegen.vmsle_vx": 153920, + "codegen.vmsleu_vi": 110500, "codegen.vmsleu_vv": 39180, - "codegen.vmsleu_vx": 150872, + "codegen.vmsleu_vx": 153920, "codegen.vmslt_vv": 39180, - "codegen.vmslt_vx": 150872, - "codegen.vmsltu_vv": 39180, - "codegen.vmsltu_vx": 150872, - "codegen.vmsne_vi": 109496, + "codegen.vmslt_vx": 153920, + "codegen.vmsltu_vv": 40186, + "codegen.vmsltu_vx": 153920, + "codegen.vmsne_vi": 111628, "codegen.vmsne_vv": 39180, - "codegen.vmsne_vx": 150872, - "codegen.vmsof_m": 2675, + "codegen.vmsne_vx": 153920, + "codegen.vmsof_m": 2736, "codegen.vmul_vv": 43454, - "codegen.vmul_vx": 167668, + "codegen.vmul_vx": 185727, "codegen.vmulh_vv": 43454, - "codegen.vmulh_vx": 167668, + "codegen.vmulh_vx": 185727, "codegen.vmulhsu_vv": 43454, - "codegen.vmulhsu_vx": 167668, + "codegen.vmulhsu_vx": 185727, "codegen.vmulhu_vv": 43454, - "codegen.vmulhu_vx": 167668, - "codegen.vmv1r_v": 2863, + "codegen.vmulhu_vx": 185727, + "codegen.vmv1r_v": 2891, "codegen.vmv2r_v": 2951, - "codegen.vmv4r_v": 4236, + "codegen.vmv4r_v": 4678, "codegen.vmv8r_v": 6838, "codegen.vmv_s_x": 2512, - "codegen.vmv_v_i": 42927, - "codegen.vmv_v_v": 16446, - "codegen.vmv_v_x": 13951, + "codegen.vmv_v_i": 43019, + "codegen.vmv_v_v": 18120, + "codegen.vmv_v_x": 14633, "codegen.vmv_x_s": 3835, - "codegen.vmxnor_mm": 16488, - "codegen.vmxor_mm": 16488, - "codegen.vnclip_wi": 327597, - "codegen.vnclip_wv": 106245, - "codegen.vnclip_wx": 459101, - "codegen.vnclipu_wi": 327597, - "codegen.vnclipu_wv": 106245, - "codegen.vnclipu_wx": 459101, + "codegen.vmxnor_mm": 17003, + "codegen.vmxor_mm": 17003, + "codegen.vnclip_wi": 336781, + "codegen.vnclip_wv": 106276, + "codegen.vnclip_wx": 464072, + "codegen.vnclipu_wi": 336781, + "codegen.vnclipu_wv": 106276, + "codegen.vnclipu_wx": 464072, "codegen.vnmsac_vv": 43698, - "codegen.vnmsac_vx": 132377, + "codegen.vnmsac_vx": 147017, "codegen.vnmsub_vv": 43698, - "codegen.vnmsub_vx": 132377, - "codegen.vnsra_wi": 82390, - "codegen.vnsra_wv": 26974, - "codegen.vnsra_wx": 114953, - "codegen.vnsrl_wi": 82390, - "codegen.vnsrl_wv": 26974, - "codegen.vnsrl_wx": 114953, - "codegen.vor_vi": 89612, + "codegen.vnmsub_vx": 137471, + "codegen.vnsra_wi": 85095, + "codegen.vnsra_wv": 26982, + "codegen.vnsra_wx": 116362, + "codegen.vnsrl_wi": 85095, + "codegen.vnsrl_wv": 26982, + "codegen.vnsrl_wx": 116362, + "codegen.vor_vi": 99404, "codegen.vor_vv": 43282, - "codegen.vor_vx": 123814, - "codegen.vredand_vs": 46038, - "codegen.vredmax_vs": 46038, - "codegen.vredmaxu_vs": 46038, - "codegen.vredmin_vs": 46038, - "codegen.vredminu_vs": 46038, + "codegen.vor_vx": 136518, + "codegen.vredand_vs": 46305, + "codegen.vredmax_vs": 46305, + "codegen.vredmaxu_vs": 46305, + "codegen.vredmin_vs": 47323, + "codegen.vredminu_vs": 47323, "codegen.vredor_vs": 46038, - "codegen.vredsum_vs": 46038, - "codegen.vredxor_vs": 46038, + "codegen.vredsum_vs": 46305, + "codegen.vredxor_vs": 47056, "codegen.vrem_vv": 70806, - "codegen.vrem_vx": 595233, + "codegen.vrem_vx": 600887, "codegen.vremu_vv": 71250, - "codegen.vremu_vx": 627278, - "codegen.vrgather_vi": 154310, - "codegen.vrgather_vv": 59125, - "codegen.vrgather_vx": 208229, - "codegen.vrgatherei16_vv": 43906, - "codegen.vrsub_vi": 89522, - "codegen.vrsub_vx": 123663, - "codegen.vs1r_v": 1872, - "codegen.vs2r_v": 1960, + "codegen.vremu_vx": 632468, + "codegen.vrgather_vi": 155978, + "codegen.vrgather_vv": 59135, + "codegen.vrgather_vx": 212658, + "codegen.vrgatherei16_vv": 45073, + "codegen.vrsub_vi": 92286, + "codegen.vrsub_vx": 135925, + "codegen.vs1r_v": 1900, + "codegen.vs2r_v": 2019, "codegen.vs4r_v": 2216, "codegen.vs8r_v": 3018, - "codegen.vsadd_vi": 90482, + "codegen.vsadd_vi": 97115, "codegen.vsadd_vv": 44237, - "codegen.vsadd_vx": 124243, - "codegen.vsaddu_vi": 90482, + "codegen.vsadd_vx": 136290, + "codegen.vsaddu_vi": 92957, "codegen.vsaddu_vv": 44237, - "codegen.vsaddu_vx": 124243, + "codegen.vsaddu_vx": 136290, "codegen.vsbc_vvm": 21491, - "codegen.vsbc_vxm": 91809, + "codegen.vsbc_vxm": 98421, "codegen.vse16_v": 8888, "codegen.vse32_v": 7681, "codegen.vse8_v": 10145, "codegen.vsetivli": 6005, "codegen.vsetvl": 1649, "codegen.vsetvli": 18626, - "codegen.vsext_vf2": 13454, + "codegen.vsext_vf2": 14020, "codegen.vsext_vf4": 4260, - "codegen.vslide1down_vx": 379997, - "codegen.vslide1up_vx": 379961, - "codegen.vslidedown_vi": 266862, - "codegen.vslidedown_vx": 313411, - "codegen.vslideup_vi": 268568, - "codegen.vslideup_vx": 296567, - "codegen.vsll_vi": 143160, - "codegen.vsll_vv": 50559, - "codegen.vsll_vx": 201308, + "codegen.vslide1down_vx": 381813, + "codegen.vslide1up_vx": 381808, + "codegen.vslidedown_vi": 268775, + "codegen.vslidedown_vx": 318470, + "codegen.vslideup_vi": 271550, + "codegen.vslideup_vx": 299359, + "codegen.vsll_vi": 149697, + "codegen.vsll_vv": 50551, + "codegen.vsll_vx": 208708, "codegen.vsm_v": 2062, "codegen.vsmul_vv": 174750, - "codegen.vsmul_vx": 502614, + "codegen.vsmul_vx": 552247, "codegen.vsoxei16_v": 78491, - "codegen.vsoxei32_v": 43319, + "codegen.vsoxei32_v": 43330, "codegen.vsoxei8_v": 120521, "codegen.vsoxseg2ei16_v": 80978, "codegen.vsoxseg2ei32_v": 45428, @@ -376,24 +376,24 @@ "codegen.vsoxseg8ei16_v": 58689, "codegen.vsoxseg8ei32_v": 25934, "codegen.vsoxseg8ei8_v": 80910, - "codegen.vsra_vi": 143160, - "codegen.vsra_vv": 50551, - "codegen.vsra_vx": 201308, - "codegen.vsrl_vi": 143160, - "codegen.vsrl_vv": 50551, - "codegen.vsrl_vx": 201308, + "codegen.vsra_vi": 149697, + "codegen.vsra_vv": 50625, + "codegen.vsra_vx": 208708, + "codegen.vsrl_vi": 149697, + "codegen.vsrl_vv": 50625, + "codegen.vsrl_vx": 208708, "codegen.vsse16_v": 137067, "codegen.vsse32_v": 92941, "codegen.vsse8_v": 228853, - "codegen.vsseg2e16_v": 7664, - "codegen.vsseg2e32_v": 6672, + "codegen.vsseg2e16_v": 7899, + "codegen.vsseg2e32_v": 6936, "codegen.vsseg2e8_v": 8870, - "codegen.vsseg3e16_v": 7136, - "codegen.vsseg3e32_v": 5678, + "codegen.vsseg3e16_v": 7310, + "codegen.vsseg3e32_v": 5819, "codegen.vsseg3e8_v": 8594, "codegen.vsseg4e16_v": 7721, "codegen.vsseg4e32_v": 6154, - "codegen.vsseg4e8_v": 9334, + "codegen.vsseg4e8_v": 9622, "codegen.vsseg5e16_v": 6821, "codegen.vsseg5e32_v": 4193, "codegen.vsseg5e8_v": 9431, @@ -406,12 +406,12 @@ "codegen.vsseg8e16_v": 7660, "codegen.vsseg8e32_v": 4735, "codegen.vsseg8e8_v": 10420, - "codegen.vssra_vi": 570699, + "codegen.vssra_vi": 597088, "codegen.vssra_vv": 199512, - "codegen.vssra_vx": 1238496, - "codegen.vssrl_vi": 570699, - "codegen.vssrl_vv": 199512, - "codegen.vssrl_vx": 1238496, + "codegen.vssra_vx": 1284209, + "codegen.vssrl_vi": 597088, + "codegen.vssrl_vv": 199756, + "codegen.vssrl_vx": 1284209, "codegen.vssseg2e16_v": 130577, "codegen.vssseg2e32_v": 80467, "codegen.vssseg2e8_v": 211152, @@ -434,13 +434,13 @@ "codegen.vssseg8e32_v": 44831, "codegen.vssseg8e8_v": 218268, "codegen.vssub_vv": 43282, - "codegen.vssub_vx": 184911, + "codegen.vssub_vx": 205043, "codegen.vssubu_vv": 43282, - "codegen.vssubu_vx": 184911, + "codegen.vssubu_vx": 205043, "codegen.vsub_vv": 43282, - "codegen.vsub_vx": 184911, + "codegen.vsub_vx": 205043, "codegen.vsuxei16_v": 78491, - "codegen.vsuxei32_v": 43319, + "codegen.vsuxei32_v": 43330, "codegen.vsuxei8_v": 120521, "codegen.vsuxseg2ei16_v": 80978, "codegen.vsuxseg2ei32_v": 45428, @@ -463,65 +463,65 @@ "codegen.vsuxseg8ei16_v": 58689, "codegen.vsuxseg8ei32_v": 25934, "codegen.vsuxseg8ei8_v": 80910, - "codegen.vwadd_vv": 24676, - "codegen.vwadd_vx": 75676, + "codegen.vwadd_vv": 24656, + "codegen.vwadd_vx": 84125, "codegen.vwadd_wv": 27204, - "codegen.vwadd_wx": 91440, - "codegen.vwaddu_vv": 24676, - "codegen.vwaddu_vx": 75676, - "codegen.vwaddu_wv": 27204, - "codegen.vwaddu_wx": 91440, + "codegen.vwadd_wx": 96683, + "codegen.vwaddu_vv": 24656, + "codegen.vwaddu_vx": 84125, + "codegen.vwaddu_wv": 27237, + "codegen.vwaddu_wx": 96683, "codegen.vwmacc_vv": 26382, - "codegen.vwmacc_vx": 99430, + "codegen.vwmacc_vx": 116645, "codegen.vwmaccsu_vv": 26382, - "codegen.vwmaccsu_vx": 99430, + "codegen.vwmaccsu_vx": 116645, "codegen.vwmaccu_vv": 26382, - "codegen.vwmaccu_vx": 99430, - "codegen.vwmaccus_vx": 99430, - "codegen.vwmul_vv": 24726, - "codegen.vwmul_vx": 101274, - "codegen.vwmulsu_vv": 24726, - "codegen.vwmulsu_vx": 101274, - "codegen.vwmulu_vv": 24726, - "codegen.vwmulu_vx": 101274, - "codegen.vwredsum_vs": 26838, - "codegen.vwredsumu_vs": 26838, - "codegen.vwsub_vv": 24676, - "codegen.vwsub_vx": 75676, + "codegen.vwmaccu_vx": 112918, + "codegen.vwmaccus_vx": 112917, + "codegen.vwmul_vv": 24703, + "codegen.vwmul_vx": 113307, + "codegen.vwmulsu_vv": 24703, + "codegen.vwmulsu_vx": 113307, + "codegen.vwmulu_vv": 24703, + "codegen.vwmulu_vx": 113307, + "codegen.vwredsum_vs": 27746, + "codegen.vwredsumu_vs": 27746, + "codegen.vwsub_vv": 24656, + "codegen.vwsub_vx": 84125, "codegen.vwsub_wv": 27204, - "codegen.vwsub_wx": 91440, - "codegen.vwsubu_vv": 24676, - "codegen.vwsubu_vx": 75676, - "codegen.vwsubu_wv": 27204, - "codegen.vwsubu_wx": 91440, - "codegen.vxor_vi": 89612, + "codegen.vwsub_wx": 96683, + "codegen.vwsubu_vv": 24656, + "codegen.vwsubu_vx": 84125, + "codegen.vwsubu_wv": 27237, + "codegen.vwsubu_wx": 96683, + "codegen.vxor_vi": 96668, "codegen.vxor_vv": 43282, - "codegen.vxor_vx": 123814, - "codegen.vzext_vf2": 13454, - "codegen.vzext_vf4": 4260, + "codegen.vxor_vx": 136518, + "codegen.vzext_vf2": 14020, + "codegen.vzext_vf4": 4333, "intrinsic.conv2d_less_m2": 2498, "intrinsic.linear_normalization": 3350, - "intrinsic.matmul": 61748, - "intrinsic.softmax": 6793, + "intrinsic.matmul": 65866, + "intrinsic.softmax": 6795, "mlir.axpy_masked": 4048, "mlir.conv": 125859, "mlir.hello": 131, "mlir.matmul": 56059, "mlir.maxvl_tail_setvl_front": 700, - "mlir.rvv_vp_intrinsic_add": 469, + "mlir.rvv_vp_intrinsic_add": 466, "mlir.rvv_vp_intrinsic_add_scalable": 807, "mlir.stripmining": 8882, - "mlir.vectoradd": 15530, + "mlir.vectoradd": 13236, "pytorch.demo": 31521, "pytorch.matmul": 69793, "rvv_bench.ascii_to_utf16": 677090, - "rvv_bench.ascii_to_utf32": 226910, + "rvv_bench.ascii_to_utf32": 226918, "rvv_bench.byteswap": 399524, "rvv_bench.chacha20": 39957, - "rvv_bench.mandelbrot": 529063, + "rvv_bench.mandelbrot": 512683, "rvv_bench.memcpy": 671955, "rvv_bench.memset": 290725, - "rvv_bench.mergelines": 564228, + "rvv_bench.mergelines": 564159, "rvv_bench.poly1305": 39957, "rvv_bench.strlen": 219139, "rvv_bench.utf8_count": 2283382 diff --git a/.github/designs/rookidee/t1emu.json b/.github/designs/rookidee/t1emu.json index 8267f8936..cbb3b39f9 100644 --- a/.github/designs/rookidee/t1emu.json +++ b/.github/designs/rookidee/t1emu.json @@ -1,17 +1,17 @@ { - "asm.mmm": 54905, + "asm.mmm": 54912, "asm.smoke": 7770, "intrinsic.conv2d_less_m2": 2710, "mlir.hello": 136, - "mlir.rvv_vp_intrinsic_add": 445, + "mlir.rvv_vp_intrinsic_add": 442, "mlir.rvv_vp_intrinsic_add_scalable": 637, "mlir.stripmining": 27810, "rvv_bench.ascii_to_utf16": 16070, "rvv_bench.ascii_to_utf32": 6057, - "rvv_bench.byteswap": 43275, + "rvv_bench.byteswap": 43274, "rvv_bench.memcpy": 46320, "rvv_bench.memset": 19350, - "rvv_bench.mergelines": 38578, + "rvv_bench.mergelines": 38541, "rvv_bench.strlen": 34517, - "rvv_bench.utf8_count": 206393 + "rvv_bench.utf8_count": 206417 } \ No newline at end of file diff --git a/.github/designs/rookidee/t1rocketemu.json b/.github/designs/rookidee/t1rocketemu.json index 043fc72ca..fe76769f3 100644 --- a/.github/designs/rookidee/t1rocketemu.json +++ b/.github/designs/rookidee/t1rocketemu.json @@ -2,9 +2,9 @@ "asm.mmm": 51749, "asm.smoke": 4564, "codegen.vaadd_vv": 118067, - "codegen.vaadd_vx": 354504, + "codegen.vaadd_vx": 354288, "codegen.vaaddu_vv": 118067, - "codegen.vaaddu_vx": 354504, + "codegen.vaaddu_vx": 354288, "codegen.vadc_vim": 31478, "codegen.vadc_vvm": 14555, "codegen.vadc_vxm": 44308, @@ -15,64 +15,64 @@ "codegen.vand_vv": 29207, "codegen.vand_vx": 88767, "codegen.vasub_vv": 118067, - "codegen.vasub_vx": 354504, + "codegen.vasub_vx": 354288, "codegen.vasubu_vv": 118067, - "codegen.vasubu_vx": 354504, - "codegen.vcompress_vm": 11918, + "codegen.vasubu_vx": 354288, + "codegen.vcompress_vm": 11933, "codegen.vcpop_m": 2479, - "codegen.vdiv_vv": 33501, - "codegen.vdiv_vx": 200221, + "codegen.vdiv_vv": 33503, + "codegen.vdiv_vx": 200038, "codegen.vdivu_vv": 33544, - "codegen.vdivu_vx": 207584, + "codegen.vdivu_vx": 207151, "codegen.vfirst_m": 2350, "codegen.vid_v": 18586, "codegen.viota_m": 22745, - "codegen.vl1re16_v": 641, - "codegen.vl1re32_v": 634, - "codegen.vl1re8_v": 634, + "codegen.vl1re16_v": 705, + "codegen.vl1re32_v": 690, + "codegen.vl1re8_v": 690, "codegen.vl2re16_v": 636, - "codegen.vl2re32_v": 636, - "codegen.vl2re8_v": 636, - "codegen.vl4re16_v": 692, - "codegen.vl4re32_v": 692, + "codegen.vl2re32_v": 706, + "codegen.vl2re8_v": 706, + "codegen.vl4re16_v": 756, + "codegen.vl4re32_v": 756, "codegen.vl4re8_v": 874, "codegen.vl8re16_v": 824, "codegen.vl8re32_v": 821, "codegen.vl8re8_v": 821, "codegen.vle16_v": 7129, - "codegen.vle16ff_v": 8995, + "codegen.vle16ff_v": 8996, "codegen.vle32_v": 5697, - "codegen.vle32ff_v": 6158, + "codegen.vle32ff_v": 6159, "codegen.vle8_v": 8556, - "codegen.vle8ff_v": 14696, - "codegen.vlm_v": 699, - "codegen.vloxei16_v": 27373, - "codegen.vloxei32_v": 16937, - "codegen.vloxei8_v": 37917, - "codegen.vloxseg2ei16_v": 24428, + "codegen.vle8ff_v": 14793, + "codegen.vlm_v": 719, + "codegen.vloxei16_v": 27447, + "codegen.vloxei32_v": 16960, + "codegen.vloxei8_v": 38062, + "codegen.vloxseg2ei16_v": 24579, "codegen.vloxseg2ei32_v": 15505, - "codegen.vloxseg2ei8_v": 29869, + "codegen.vloxseg2ei8_v": 30081, "codegen.vloxseg3ei16_v": 16575, "codegen.vloxseg3ei32_v": 10757, - "codegen.vloxseg3ei8_v": 21016, - "codegen.vloxseg4ei16_v": 18111, + "codegen.vloxseg3ei8_v": 21181, + "codegen.vloxseg4ei16_v": 18221, "codegen.vloxseg4ei32_v": 11200, - "codegen.vloxseg4ei8_v": 24271, - "codegen.vloxseg5ei16_v": 9690, - "codegen.vloxseg5ei32_v": 5490, - "codegen.vloxseg5ei8_v": 14631, - "codegen.vloxseg6ei16_v": 10242, - "codegen.vloxseg6ei32_v": 5877, - "codegen.vloxseg6ei8_v": 16435, - "codegen.vloxseg7ei16_v": 10967, - "codegen.vloxseg7ei32_v": 6277, - "codegen.vloxseg7ei8_v": 18239, - "codegen.vloxseg8ei16_v": 11692, - "codegen.vloxseg8ei32_v": 6677, - "codegen.vloxseg8ei8_v": 20043, + "codegen.vloxseg4ei8_v": 24600, + "codegen.vloxseg5ei16_v": 9730, + "codegen.vloxseg5ei32_v": 5492, + "codegen.vloxseg5ei8_v": 14860, + "codegen.vloxseg6ei16_v": 10282, + "codegen.vloxseg6ei32_v": 5879, + "codegen.vloxseg6ei8_v": 16664, + "codegen.vloxseg7ei16_v": 11007, + "codegen.vloxseg7ei32_v": 6279, + "codegen.vloxseg7ei8_v": 18468, + "codegen.vloxseg8ei16_v": 11817, + "codegen.vloxseg8ei32_v": 6679, + "codegen.vloxseg8ei8_v": 20272, "codegen.vlse16_v": 39602, "codegen.vlse32_v": 25930, - "codegen.vlse8_v": 68022, + "codegen.vlse8_v": 68294, "codegen.vlseg2e16_v": 5739, "codegen.vlseg2e32_v": 4294, "codegen.vlseg2e8_v": 7151, @@ -94,15 +94,15 @@ "codegen.vlseg8e16_v": 2930, "codegen.vlseg8e32_v": 1504, "codegen.vlseg8e8_v": 4365, - "codegen.vlsseg2e16_v": 33260, + "codegen.vlsseg2e16_v": 33383, "codegen.vlsseg2e32_v": 19897, "codegen.vlsseg2e8_v": 56876, "codegen.vlsseg3e16_v": 23073, "codegen.vlsseg3e32_v": 11971, "codegen.vlsseg3e8_v": 43076, - "codegen.vlsseg4e16_v": 26562, + "codegen.vlsseg4e16_v": 26593, "codegen.vlsseg4e32_v": 12588, - "codegen.vlsseg4e8_v": 53340, + "codegen.vlsseg4e8_v": 53655, "codegen.vlsseg5e16_v": 14443, "codegen.vlsseg5e32_v": 6441, "codegen.vlsseg5e8_v": 35770, @@ -115,49 +115,49 @@ "codegen.vlsseg8e16_v": 18919, "codegen.vlsseg8e32_v": 8421, "codegen.vlsseg8e8_v": 51955, - "codegen.vluxei16_v": 27373, - "codegen.vluxei32_v": 16937, - "codegen.vluxei8_v": 37917, - "codegen.vluxseg2ei16_v": 24428, + "codegen.vluxei16_v": 27447, + "codegen.vluxei32_v": 16960, + "codegen.vluxei8_v": 38062, + "codegen.vluxseg2ei16_v": 24579, "codegen.vluxseg2ei32_v": 15505, - "codegen.vluxseg2ei8_v": 29869, + "codegen.vluxseg2ei8_v": 30081, "codegen.vluxseg3ei16_v": 16575, "codegen.vluxseg3ei32_v": 10757, - "codegen.vluxseg3ei8_v": 21016, - "codegen.vluxseg4ei16_v": 18111, + "codegen.vluxseg3ei8_v": 21181, + "codegen.vluxseg4ei16_v": 18221, "codegen.vluxseg4ei32_v": 11200, - "codegen.vluxseg4ei8_v": 24271, - "codegen.vluxseg5ei16_v": 9690, - "codegen.vluxseg5ei32_v": 5490, - "codegen.vluxseg5ei8_v": 14631, - "codegen.vluxseg6ei16_v": 10242, - "codegen.vluxseg6ei32_v": 5877, - "codegen.vluxseg6ei8_v": 16435, - "codegen.vluxseg7ei16_v": 10967, - "codegen.vluxseg7ei32_v": 6277, - "codegen.vluxseg7ei8_v": 18239, - "codegen.vluxseg8ei16_v": 11692, - "codegen.vluxseg8ei32_v": 6677, - "codegen.vluxseg8ei8_v": 20043, + "codegen.vluxseg4ei8_v": 24600, + "codegen.vluxseg5ei16_v": 9730, + "codegen.vluxseg5ei32_v": 5492, + "codegen.vluxseg5ei8_v": 14860, + "codegen.vluxseg6ei16_v": 10282, + "codegen.vluxseg6ei32_v": 5879, + "codegen.vluxseg6ei8_v": 16664, + "codegen.vluxseg7ei16_v": 11007, + "codegen.vluxseg7ei32_v": 6279, + "codegen.vluxseg7ei8_v": 18468, + "codegen.vluxseg8ei16_v": 11817, + "codegen.vluxseg8ei32_v": 6679, + "codegen.vluxseg8ei8_v": 20272, "codegen.vmacc_vv": 29209, - "codegen.vmacc_vx": 89389, - "codegen.vmadc_vi": 33249, - "codegen.vmadc_vim": 35158, + "codegen.vmacc_vx": 91585, + "codegen.vmadc_vi": 34276, + "codegen.vmadc_vim": 35248, "codegen.vmadc_vv": 11916, "codegen.vmadc_vvm": 14566, - "codegen.vmadc_vx": 47863, - "codegen.vmadc_vxm": 49236, + "codegen.vmadc_vx": 48502, + "codegen.vmadc_vxm": 49312, "codegen.vmadd_vv": 29209, - "codegen.vmadd_vx": 89389, + "codegen.vmadd_vx": 89397, "codegen.vmand_mm": 11907, "codegen.vmandn_mm": 11907, "codegen.vmax_vv": 29207, "codegen.vmax_vx": 88767, "codegen.vmaxu_vv": 29207, "codegen.vmaxu_vx": 88767, - "codegen.vmerge_vim": 36304, + "codegen.vmerge_vim": 36335, "codegen.vmerge_vvm": 15126, - "codegen.vmerge_vxm": 50678, + "codegen.vmerge_vxm": 50731, "codegen.vmin_vv": 29207, "codegen.vmin_vx": 88767, "codegen.vminu_vv": 29207, @@ -168,32 +168,32 @@ "codegen.vmorn_mm": 11907, "codegen.vmsbc_vv": 11916, "codegen.vmsbc_vvm": 14566, - "codegen.vmsbc_vx": 47474, - "codegen.vmsbc_vxm": 49236, + "codegen.vmsbc_vx": 48387, + "codegen.vmsbc_vxm": 49351, "codegen.vmsbf_m": 1583, - "codegen.vmseq_vi": 70004, + "codegen.vmseq_vi": 70351, "codegen.vmseq_vv": 29212, - "codegen.vmseq_vx": 98083, - "codegen.vmsgt_vi": 70004, + "codegen.vmseq_vx": 98456, + "codegen.vmsgt_vi": 70050, "codegen.vmsgt_vv": 29212, - "codegen.vmsgt_vx": 98083, - "codegen.vmsgtu_vi": 70004, + "codegen.vmsgt_vx": 98456, + "codegen.vmsgtu_vi": 70050, "codegen.vmsgtu_vv": 29212, - "codegen.vmsgtu_vx": 98083, + "codegen.vmsgtu_vx": 98456, "codegen.vmsif_m": 1583, - "codegen.vmsle_vi": 70004, + "codegen.vmsle_vi": 70222, "codegen.vmsle_vv": 29212, - "codegen.vmsle_vx": 98083, - "codegen.vmsleu_vi": 70004, + "codegen.vmsle_vx": 98456, + "codegen.vmsleu_vi": 70179, "codegen.vmsleu_vv": 29212, - "codegen.vmsleu_vx": 98083, + "codegen.vmsleu_vx": 98456, "codegen.vmslt_vv": 29212, - "codegen.vmslt_vx": 98083, + "codegen.vmslt_vx": 98456, "codegen.vmsltu_vv": 29212, - "codegen.vmsltu_vx": 98083, - "codegen.vmsne_vi": 70004, + "codegen.vmsltu_vx": 98456, + "codegen.vmsne_vi": 70222, "codegen.vmsne_vv": 29212, - "codegen.vmsne_vx": 98083, + "codegen.vmsne_vx": 98456, "codegen.vmsof_m": 1583, "codegen.vmul_vv": 29207, "codegen.vmul_vx": 117138, @@ -210,26 +210,26 @@ "codegen.vmv_s_x": 1446, "codegen.vmv_v_i": 18573, "codegen.vmv_v_v": 9593, - "codegen.vmv_v_x": 6885, + "codegen.vmv_v_x": 6893, "codegen.vmv_x_s": 2721, "codegen.vmxnor_mm": 11907, "codegen.vmxor_mm": 11907, - "codegen.vnclip_wi": 165634, + "codegen.vnclip_wi": 166662, "codegen.vnclip_wv": 74314, - "codegen.vnclip_wx": 232622, - "codegen.vnclipu_wi": 165634, + "codegen.vnclip_wx": 232911, + "codegen.vnclipu_wi": 166662, "codegen.vnclipu_wv": 74314, - "codegen.vnclipu_wx": 232622, + "codegen.vnclipu_wx": 232911, "codegen.vnmsac_vv": 29209, - "codegen.vnmsac_vx": 89389, + "codegen.vnmsac_vx": 89397, "codegen.vnmsub_vv": 29209, - "codegen.vnmsub_vx": 89389, - "codegen.vnsra_wi": 40554, - "codegen.vnsra_wv": 17487, - "codegen.vnsra_wx": 57159, - "codegen.vnsrl_wi": 40554, - "codegen.vnsrl_wv": 17487, - "codegen.vnsrl_wx": 57159, + "codegen.vnmsub_vx": 89397, + "codegen.vnsra_wi": 40865, + "codegen.vnsra_wv": 17493, + "codegen.vnsra_wx": 57303, + "codegen.vnsrl_wi": 40865, + "codegen.vnsrl_wv": 17493, + "codegen.vnsrl_wx": 57303, "codegen.vor_vi": 62877, "codegen.vor_vv": 29207, "codegen.vor_vx": 88767, @@ -241,18 +241,18 @@ "codegen.vredor_vs": 29601, "codegen.vredsum_vs": 29601, "codegen.vredxor_vs": 29601, - "codegen.vrem_vv": 33501, - "codegen.vrem_vx": 200221, - "codegen.vremu_vv": 33544, - "codegen.vremu_vx": 207584, - "codegen.vrgather_vi": 73225, + "codegen.vrem_vv": 33503, + "codegen.vrem_vx": 200038, + "codegen.vremu_vv": 33545, + "codegen.vremu_vx": 207151, + "codegen.vrgather_vi": 73165, "codegen.vrgather_vv": 31781, - "codegen.vrgather_vx": 100955, + "codegen.vrgather_vx": 100619, "codegen.vrgatherei16_vv": 27635, "codegen.vrsub_vi": 62877, "codegen.vrsub_vx": 88767, "codegen.vs1r_v": 686, - "codegen.vs2r_v": 688, + "codegen.vs2r_v": 702, "codegen.vs4r_v": 720, "codegen.vs8r_v": 1011, "codegen.vsadd_vi": 64453, @@ -271,18 +271,18 @@ "codegen.vsetvli": 18626, "codegen.vsext_vf2": 11223, "codegen.vsext_vf4": 3241, - "codegen.vslide1down_vx": 137587, - "codegen.vslide1up_vx": 139331, - "codegen.vslidedown_vi": 96987, - "codegen.vslidedown_vx": 118424, - "codegen.vslideup_vi": 96319, - "codegen.vslideup_vx": 111346, - "codegen.vsll_vi": 69759, + "codegen.vslide1down_vx": 137746, + "codegen.vslide1up_vx": 139516, + "codegen.vslidedown_vi": 97123, + "codegen.vslidedown_vx": 118461, + "codegen.vslideup_vi": 96628, + "codegen.vslideup_vx": 111442, + "codegen.vsll_vi": 69826, "codegen.vsll_vv": 30002, - "codegen.vsll_vx": 98144, + "codegen.vsll_vx": 98267, "codegen.vsm_v": 734, "codegen.vsmul_vv": 124295, - "codegen.vsmul_vx": 359954, + "codegen.vsmul_vx": 359943, "codegen.vsoxei16_v": 27188, "codegen.vsoxei32_v": 16473, "codegen.vsoxei8_v": 38355, @@ -307,12 +307,12 @@ "codegen.vsoxseg8ei16_v": 13236, "codegen.vsoxseg8ei32_v": 7488, "codegen.vsoxseg8ei8_v": 22200, - "codegen.vsra_vi": 69759, + "codegen.vsra_vi": 69826, "codegen.vsra_vv": 30002, - "codegen.vsra_vx": 98144, - "codegen.vsrl_vi": 69759, + "codegen.vsra_vx": 98267, + "codegen.vsrl_vi": 69826, "codegen.vsrl_vv": 30002, - "codegen.vsrl_vx": 98144, + "codegen.vsrl_vx": 98267, "codegen.vsse16_v": 51557, "codegen.vsse32_v": 37244, "codegen.vsse8_v": 77684, @@ -337,12 +337,12 @@ "codegen.vsseg8e16_v": 2604, "codegen.vsseg8e32_v": 1542, "codegen.vsseg8e8_v": 3827, - "codegen.vssra_vi": 280647, + "codegen.vssra_vi": 281075, "codegen.vssra_vv": 121087, - "codegen.vssra_vx": 588914, - "codegen.vssrl_vi": 280647, + "codegen.vssra_vx": 589169, + "codegen.vssrl_vi": 281075, "codegen.vssrl_vv": 121087, - "codegen.vssrl_vx": 588914, + "codegen.vssrl_vx": 589169, "codegen.vssseg2e16_v": 42589, "codegen.vssseg2e32_v": 28551, "codegen.vssseg2e8_v": 64477, @@ -397,18 +397,18 @@ "codegen.vwadd_vv": 17508, "codegen.vwadd_vx": 53494, "codegen.vwadd_wv": 17438, - "codegen.vwadd_wx": 54403, + "codegen.vwadd_wx": 54415, "codegen.vwaddu_vv": 17508, "codegen.vwaddu_vx": 53494, "codegen.vwaddu_wv": 17438, - "codegen.vwaddu_wx": 54403, + "codegen.vwaddu_wx": 54415, "codegen.vwmacc_vv": 17518, - "codegen.vwmacc_vx": 57596, + "codegen.vwmacc_vx": 62168, "codegen.vwmaccsu_vv": 17518, - "codegen.vwmaccsu_vx": 57596, + "codegen.vwmaccsu_vx": 62168, "codegen.vwmaccu_vv": 17518, - "codegen.vwmaccu_vx": 57596, - "codegen.vwmaccus_vx": 57596, + "codegen.vwmaccu_vx": 59962, + "codegen.vwmaccus_vx": 59962, "codegen.vwmul_vv": 17508, "codegen.vwmul_vx": 70303, "codegen.vwmulsu_vv": 17508, @@ -420,11 +420,11 @@ "codegen.vwsub_vv": 17508, "codegen.vwsub_vx": 53494, "codegen.vwsub_wv": 17438, - "codegen.vwsub_wx": 54403, + "codegen.vwsub_wx": 54415, "codegen.vwsubu_vv": 17508, "codegen.vwsubu_vx": 53494, "codegen.vwsubu_wv": 17438, - "codegen.vwsubu_wx": 54403, + "codegen.vwsubu_wx": 54415, "codegen.vxor_vi": 62877, "codegen.vxor_vv": 29207, "codegen.vxor_vx": 88767, @@ -432,16 +432,16 @@ "codegen.vzext_vf4": 3241, "intrinsic.conv2d_less_m2": 2498, "mlir.hello": 130, - "mlir.rvv_vp_intrinsic_add": 442, + "mlir.rvv_vp_intrinsic_add": 439, "mlir.rvv_vp_intrinsic_add_scalable": 640, "mlir.stripmining": 27798, - "rvv_bench.ascii_to_utf16": 676092, - "rvv_bench.ascii_to_utf32": 225585, + "rvv_bench.ascii_to_utf16": 676284, + "rvv_bench.ascii_to_utf32": 225713, "rvv_bench.byteswap": 408359, "rvv_bench.chacha20": 39957, "rvv_bench.memcpy": 675881, "rvv_bench.memset": 291438, - "rvv_bench.mergelines": 575240, + "rvv_bench.mergelines": 575219, "rvv_bench.poly1305": 39957, "rvv_bench.strlen": 231237, "rvv_bench.utf8_count": 2350031 diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index f0ec3ad29..7af7f3800 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -1142,7 +1142,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ lastWriteOH ) - val selectMask: UInt = Mux( + val selectMask: UInt = Mux( segmentLS, segmentMask, Mux( @@ -1151,13 +1151,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ lastWriteOH ) ) - // 8 register - val paddingSize: Int = elementSizeForOneRegister * 8 - val shifterMask: UInt = (((selectMask ## Fill(paddingSize, true.B)) - << laneRequest.bits.vd(2, 0) ## 0.U(log2Ceil(elementSizeForOneRegister).W)) - >> paddingSize).asUInt - vrf.instructionWriteReport.bits.elementMask := shifterMask + vrf.instructionWriteReport.bits.elementMask := selectMask // clear record by instructionFinished vrf.instructionLastReport := instructionFinished diff --git a/t1/src/vrf/ChainingCheck.scala b/t1/src/vrf/ChainingCheck.scala index 45873e738..28624e583 100644 --- a/t1/src/vrf/ChainingCheck.scala +++ b/t1/src/vrf/ChainingCheck.scala @@ -30,9 +30,26 @@ class ChainingCheck(val parameter: VRFParam) extends Module { val sameInst: Bool = read.instructionIndex === record.bits.instIndex // 3: 8 register - val readOH: UInt = UIntToOH((read.vs ## read.offset)(parameter.vrfOffsetBits + 3 - 1, 0)) - val hitElement: Bool = (readOH & record.bits.elementMask) === 0.U + val readOH: UInt = UIntToOH((read.vs ## read.offset)(parameter.vrfOffsetBits + 3 - 1, 0)) - val raw: Bool = record.bits.vd.valid && (read.vs(4, 3) === record.bits.vd.bits(4, 3)) && hitElement + // todo: def + val elementSizeForOneRegister: Int = parameter.vLen / parameter.datapathWidth / parameter.laneNumber + val paddingSize: Int = elementSizeForOneRegister * 8 + + // elementMask records the relative position of the relative instruction. + // Let's calculate the absolute position. + val maskShifter: UInt = (((Fill(paddingSize, true.B) ## record.bits.elementMask ## Fill(paddingSize, true.B)) + << record.bits.vd.bits(2, 0) ## 0.U(log2Ceil(elementSizeForOneRegister).W)) + >> paddingSize).asUInt(2 * paddingSize - 1, 0) + // mask for vd's group + val maskForVD: UInt = cutUIntBySize(maskShifter, 2)(0) + // Due to the existence of segment load, writes may cross register groups + // So we need the mask of the previous set of registers + val maskForVD1: UInt = cutUIntBySize(maskShifter, 2)(1) + + val hitVd: Bool = (readOH & maskForVD) === 0.U && read.vs(4, 3) === record.bits.vd.bits(4, 3) + val hitVd1: Bool = (readOH & maskForVD1) === 0.U && read.vs(4, 3) === (record.bits.vd.bits(4, 3) + 1.U) + + val raw: Bool = record.bits.vd.valid && (hitVd || hitVd1) checkResult := !(!older && raw && !sameInst && recordValid) } diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 13ab43401..cafaeb5e4 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -504,27 +504,30 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar val freeRecord: UInt = VecInit(chainingRecord.map(!_.valid)).asUInt val recordFFO: UInt = ffo(freeRecord) val recordEnq: UInt = Wire(UInt((parameter.chainingSize + 1).W)) - val olderCheck = chainingRecord - .map(re => !re.valid || instIndexL(re.bits.instIndex, instructionWriteReport.bits.instIndex)) - .reduce(_ && _) + val olderCheck = chainingRecord.map { re => + // The same lsb will make it difficult to distinguish between the new and the old + val notSameLSB: Bool = re.bits.instIndex(parameter.instructionIndexBits - 2, 0) =/= + instructionWriteReport.bits.instIndex(parameter.instructionIndexBits - 2, 0) + !re.valid || (instIndexL(re.bits.instIndex, instructionWriteReport.bits.instIndex) && notSameLSB) + }.reduce(_ && _) // handle VRF hazard // @todo @Clo91eaf VRF ready signal for performance. instructionWriteReport.ready := freeRecord.orR && olderCheck - recordEnq := Mux( + recordEnq := Mux( // 纯粹的lsu指令的记录不需要ready instructionWriteReport.valid, recordFFO, 0.U((parameter.chainingSize + 1).W) ) - vrfAllocateIssue := freeRecord.orR && olderCheck + vrfAllocateIssue := freeRecord.orR && olderCheck - val writePort: Seq[ValidIO[VRFWriteRequest]] = Seq(writePipe) - val writeOH = writePort.map(p => UIntToOH((p.bits.vd ## p.bits.offset)(parameter.vrfOffsetBits + 3 - 1, 0))) + val writePort: Seq[ValidIO[VRFWriteRequest]] = Seq(writePipe) val loadUnitReadPorts: Seq[DecoupledIO[VRFReadRequest]] = Seq(readRequests.last) - val loadReadOH: Seq[UInt] = - loadUnitReadPorts.map(p => UIntToOH((p.bits.vs ## p.bits.offset)(parameter.vrfOffsetBits + 3 - 1, 0))) Seq(chainingRecord, chainingRecordCopy).foreach { recordVec => recordVec.zipWithIndex.foreach { case (record, i) => + // read write one hot base on base address + val writeOH = writePort.map(p => UIntToOH((p.bits.vd - record.bits.vd.bits)(2, 0) ## p.bits.offset)) + val loadReadOH = loadUnitReadPorts.map(p => UIntToOH((p.bits.vs - record.bits.vs2)(2, 0) ## p.bits.offset)) val dataInLsuQueue = ohCheck(loadDataInLSUWriteQueue, record.bits.instIndex, parameter.chainingSize) // elementMask update by write val writeUpdateValidVec: Seq[Bool] = diff --git a/t1/src/vrf/WriteCheck.scala b/t1/src/vrf/WriteCheck.scala index b454782b7..321fa16de 100644 --- a/t1/src/vrf/WriteCheck.scala +++ b/t1/src/vrf/WriteCheck.scala @@ -32,35 +32,41 @@ class WriteCheck(val parameter: VRFParam) extends Module { val sameInst: Bool = check.instructionIndex === record.bits.instIndex val checkOH: UInt = UIntToOH((check.vd ## check.offset)(parameter.vrfOffsetBits + 3 - 1, 0)) - // this element in record not execute - val notHitMaskVd: Bool = (checkOH & record.bits.elementMask) === 0.U - val waw: Bool = record.bits.vd.valid && check.vd(4, 3) === record.bits.vd.bits(4, 3) && notHitMaskVd - // inst eg: vadd v0, v1, v1 (lmul = 1) - // We only recorded vd-related masks. - // 0 base: 11111111111111xx eg vs = 0 off=2 - // As above, using vd as the perspective, - // we will access the lowest two elements of the register group where vd is located. - // But from the perspective of vs1: - // 1 base: 111111111111xx11 eg vs = 1 off=2 - // Apparently. Our mask has shifted - // 0 base => 1 base << (1 * off) - // we need vd%8 base => vs1%8 base => vd base mask << (vs1 - vd) * off - // => vd base mask >> 8 * off << (8 + vs1 - vd) * off - // => vd base mask << (8 + vs1 - vd) * off >> 8 * off - val vs1Mask: UInt = (((-1.S(parameter.elementSize.W)).asUInt ## record.bits.elementMask) << - ((8.U + record.bits.vs1.bits(2, 0) - record.bits.vd.bits(2, 0)) << parameter.vrfOffsetBits).asUInt).asUInt( - 2 * 8 * parameter.singleGroupSize - 1, - 8 * parameter.singleGroupSize - ) - val notHitVs1: Bool = (checkOH & vs1Mask) === 0.U - val war1: Bool = record.bits.vs1.valid && check.vd(4, 3) === record.bits.vs1.bits(4, 3) && notHitVs1 - val maskForVs2: UInt = record.bits.elementMask & Fill(parameter.elementSize, !record.bits.onlyRead) - val vs2Mask: UInt = (((-1.S(parameter.elementSize.W)).asUInt ## maskForVs2) << - ((8.U + record.bits.vs2(2, 0) - record.bits.vd.bits(2, 0)) << parameter.vrfOffsetBits).asUInt).asUInt( - 2 * 8 * parameter.singleGroupSize - 1, - 8 * parameter.singleGroupSize - ) - val notHitVs2: Bool = (checkOH & vs2Mask) === 0.U - val war2: Bool = check.vd(4, 3) === record.bits.vs2(4, 3) && notHitVs2 + val elementSizeForOneRegister: Int = parameter.vLen / parameter.datapathWidth / parameter.laneNumber + val paddingSize: Int = elementSizeForOneRegister * 8 + + // elementMask records the relative position of the relative instruction. + // Let's calculate the absolute position. + val maskShifter: UInt = (((Fill(paddingSize, true.B) ## record.bits.elementMask ## Fill(paddingSize, true.B)) + << record.bits.vd.bits(2, 0) ## 0.U(log2Ceil(elementSizeForOneRegister).W)) + >> paddingSize).asUInt(2 * paddingSize - 1, 0) + // mask for vd's group + val maskForVD: UInt = cutUIntBySize(maskShifter, 2)(0) + // Due to the existence of segment load, writes may cross register groups + // So we need the mask of the previous set of registers + val maskForVD1: UInt = cutUIntBySize(maskShifter, 2)(1) + + val hitVd: Bool = (checkOH & maskForVD) === 0.U && check.vd(4, 3) === record.bits.vd.bits(4, 3) + val hitVd1: Bool = (checkOH & maskForVD1) === 0.U && check.vd(4, 3) === (record.bits.vd.bits(4, 3) + 1.U) + val waw: Bool = record.bits.vd.valid && (hitVd || hitVd1) + + // calculate the absolute position for vs1 + val vs1Mask: UInt = (((record.bits.elementMask ## Fill(paddingSize, true.B)) + << record.bits.vs1.bits(2, 0) ## 0.U(log2Ceil(elementSizeForOneRegister).W)) + >> paddingSize).asUInt + val notHitVs1: Bool = (checkOH & vs1Mask) === 0.U + val war1: Bool = record.bits.vs1.valid && check.vd(4, 3) === record.bits.vs1.bits(4, 3) && notHitVs1 + + // calculate the absolute position for vs2 + val maskShifterForVs2: UInt = (((Fill(paddingSize, true.B) ## record.bits.elementMask ## Fill(paddingSize, true.B)) + << record.bits.vs2(2, 0) ## 0.U(log2Ceil(elementSizeForOneRegister).W)) + >> paddingSize).asUInt(2 * paddingSize - 1, 0) + + val maskForVs2: UInt = cutUIntBySize(maskShifterForVs2, 2)(0) & Fill(parameter.elementSize, !record.bits.onlyRead) + val maskForVs21: UInt = cutUIntBySize(maskShifterForVs2, 2)(1) + val hitVs2: Bool = (checkOH & maskForVs2) === 0.U && check.vd(4, 3) === record.bits.vs2(4, 3) + val hitVs21: Bool = (checkOH & maskForVs21) === 0.U && check.vd(4, 3) === (record.bits.vs2(4, 3) + 1.U) + val war2: Bool = hitVs2 || hitVs21 + checkResult := !((!older && (waw || war1 || war2)) && !sameInst && record.valid) }