Improve Partial Evaluation of Floating Point Conversion Operations #46

l-kent · 2024-02-19T05:58:38Z

The instructions fcvt, ucvtf, and scvtf are all floating point conversion instructions, and the currently produced semantics are not ideal for BASIL, containing a number of unusual constructs compared to most of the output. These instructions appear in cntlm.

Here are the gtirb-semantics output for them, since that's what I have on hand.

"ucvtf d0, w2": [
  "Stmt_ConstDecl(Type_Register(\"32\",([ Slice_HiLo(Expr_LitInt(\"26\"),Expr_LitInt(\"26\")) ], AHP),\n([ Slice_HiLo(Expr_LitInt(\"25\"),Expr_LitInt(\"25\")) ], DN),\n([ Slice_HiLo(Expr_LitInt(\"24\"),Expr_LitInt(\"24\")) ], FZ),\n([ Slice_HiLo(Expr_LitInt(\"23\"),Expr_LitInt(\"22\")) ], RMode),\n([ Slice_HiLo(Expr_LitInt(\"21\"),Expr_LitInt(\"20\")) ], Stride),\n([ Slice_HiLo(Expr_LitInt(\"19\"),Expr_LitInt(\"19\")) ], FZ16),\n([ Slice_HiLo(Expr_LitInt(\"18\"),Expr_LitInt(\"16\")) ], Len),\n([ Slice_HiLo(Expr_LitInt(\"15\"),Expr_LitInt(\"15\")) ], IDE),\n([ Slice_HiLo(Expr_LitInt(\"12\"),Expr_LitInt(\"12\")) ], IXE),\n([ Slice_HiLo(Expr_LitInt(\"11\"),Expr_LitInt(\"11\")) ], UFE),\n([ Slice_HiLo(Expr_LitInt(\"10\"),Expr_LitInt(\"10\")) ], OFE),\n([ Slice_HiLo(Expr_LitInt(\"9\"),Expr_LitInt(\"9\")) ], DZE),\n([ Slice_HiLo(Expr_LitInt(\"8\"),Expr_LitInt(\"8\")) ], IOE)),Exp3__3,Expr_Var(FPCR))",
  "Stmt_VarDeclsNoInit(Type_Constructor(FPRounding),[(FPDecodeRounding5__5)])",
  "Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"00\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"0\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"01\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"1\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"10\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"2\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"11\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"3\"))\n],[],(else Stmt_Throw(UNMATCHED CASE)))))))))",
  "Stmt_ConstDecl(Type_Bits(Expr_LitInt(\"64\")),Exp9__5,Expr_TApply(FixedToFP.0,[(Expr_LitInt(\"32\"));(Expr_LitInt(\"64\"))],[(Expr_Slices(Expr_Array(Expr_Var(_R),Expr_LitInt(\"2\")),[(Slice_LoWd(Expr_LitInt(\"0\"),Expr_LitInt(\"32\")))]));(Expr_LitInt(\"0\"));(Expr_Var(TRUE));(Expr_Var(Exp3__3));(Expr_Var(FPDecodeRounding5__5))]))",
  "Stmt_Assign(LExpr_Array(LExpr_Var(_Z),Expr_LitInt(\"0\")),Expr_TApply(ZeroExtend.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"128\"))],[(Expr_Var(Exp9__5));(Expr_LitInt(\"128\"))]))"
]

"fcvt d0, s0": [
  "Stmt_ConstDecl(Type_Bits(Expr_LitInt(\"128\")),Exp4__5,Expr_Array(Expr_Var(_Z),Expr_LitInt(\"0\")))",
  "Stmt_ConstDecl(Type_Register(\"32\",([ Slice_HiLo(Expr_LitInt(\"26\"),Expr_LitInt(\"26\")) ], AHP),\n([ Slice_HiLo(Expr_LitInt(\"25\"),Expr_LitInt(\"25\")) ], DN),\n([ Slice_HiLo(Expr_LitInt(\"24\"),Expr_LitInt(\"24\")) ], FZ),\n([ Slice_HiLo(Expr_LitInt(\"23\"),Expr_LitInt(\"22\")) ], RMode),\n([ Slice_HiLo(Expr_LitInt(\"21\"),Expr_LitInt(\"20\")) ], Stride),\n([ Slice_HiLo(Expr_LitInt(\"19\"),Expr_LitInt(\"19\")) ], FZ16),\n([ Slice_HiLo(Expr_LitInt(\"18\"),Expr_LitInt(\"16\")) ], Len),\n([ Slice_HiLo(Expr_LitInt(\"15\"),Expr_LitInt(\"15\")) ], IDE),\n([ Slice_HiLo(Expr_LitInt(\"12\"),Expr_LitInt(\"12\")) ], IXE),\n([ Slice_HiLo(Expr_LitInt(\"11\"),Expr_LitInt(\"11\")) ], UFE),\n([ Slice_HiLo(Expr_LitInt(\"10\"),Expr_LitInt(\"10\")) ], OFE),\n([ Slice_HiLo(Expr_LitInt(\"9\"),Expr_LitInt(\"9\")) ], DZE),\n([ Slice_HiLo(Expr_LitInt(\"8\"),Expr_LitInt(\"8\")) ], IOE)),Exp5__4,Expr_Var(FPCR))",
  "Stmt_VarDeclsNoInit(Type_Constructor(FPRounding),[(FPDecodeRounding8__7)])",
  "Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"00\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding8__7),Expr_LitInt(\"0\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"01\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding8__7),Expr_LitInt(\"1\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"10\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding8__7),Expr_LitInt(\"2\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"11\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding8__7),Expr_LitInt(\"3\"))\n],[],(else Stmt_Throw(UNMATCHED CASE)))))))))",
  "Stmt_ConstDecl(Type_Bits(Expr_LitInt(\"64\")),Exp10__6,Expr_TApply(FPConvert.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"32\"))],[(Expr_Slices(Expr_Var(Exp4__5),[(Slice_LoWd(Expr_LitInt(\"0\"),Expr_LitInt(\"32\")))]));(Expr_Var(Exp5__4));(Expr_Var(FPDecodeRounding8__7))]))",
  "Stmt_Assign(LExpr_Array(LExpr_Var(_Z),Expr_LitInt(\"0\")),Expr_TApply(ZeroExtend.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"128\"))],[(Expr_Var(Exp10__6));(Expr_LitInt(\"128\"))]))"
]

"scvtf d0, w21": [
  "Stmt_ConstDecl(Type_Register(\"32\",([ Slice_HiLo(Expr_LitInt(\"26\"),Expr_LitInt(\"26\")) ], AHP),\n([ Slice_HiLo(Expr_LitInt(\"25\"),Expr_LitInt(\"25\")) ], DN),\n([ Slice_HiLo(Expr_LitInt(\"24\"),Expr_LitInt(\"24\")) ], FZ),\n([ Slice_HiLo(Expr_LitInt(\"23\"),Expr_LitInt(\"22\")) ], RMode),\n([ Slice_HiLo(Expr_LitInt(\"21\"),Expr_LitInt(\"20\")) ], Stride),\n([ Slice_HiLo(Expr_LitInt(\"19\"),Expr_LitInt(\"19\")) ], FZ16),\n([ Slice_HiLo(Expr_LitInt(\"18\"),Expr_LitInt(\"16\")) ], Len),\n([ Slice_HiLo(Expr_LitInt(\"15\"),Expr_LitInt(\"15\")) ], IDE),\n([ Slice_HiLo(Expr_LitInt(\"12\"),Expr_LitInt(\"12\")) ], IXE),\n([ Slice_HiLo(Expr_LitInt(\"11\"),Expr_LitInt(\"11\")) ], UFE),\n([ Slice_HiLo(Expr_LitInt(\"10\"),Expr_LitInt(\"10\")) ], OFE),\n([ Slice_HiLo(Expr_LitInt(\"9\"),Expr_LitInt(\"9\")) ], DZE),\n([ Slice_HiLo(Expr_LitInt(\"8\"),Expr_LitInt(\"8\")) ], IOE)),Exp3__3,Expr_Var(FPCR))",
  "Stmt_VarDeclsNoInit(Type_Constructor(FPRounding),[(FPDecodeRounding5__5)])",
  "Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"00\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"0\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"01\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"1\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"10\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"2\"))\n],[],(else Stmt_If(Expr_TApply(eq_bits.0,[(Expr_LitInt(\"2\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitBits(\"11\"))]),[\nStmt_Assign(LExpr_Var(FPDecodeRounding5__5),Expr_LitInt(\"3\"))\n],[],(else Stmt_Throw(UNMATCHED CASE)))))))))",
  "Stmt_ConstDecl(Type_Bits(Expr_LitInt(\"64\")),Exp9__5,Expr_TApply(FixedToFP.0,[(Expr_LitInt(\"32\"));(Expr_LitInt(\"64\"))],[(Expr_Slices(Expr_Array(Expr_Var(_R),Expr_LitInt(\"21\")),[(Slice_LoWd(Expr_LitInt(\"0\"),Expr_LitInt(\"32\")))]));(Expr_LitInt(\"0\"));(Expr_Var(FALSE));(Expr_Var(Exp3__3));(Expr_Var(FPDecodeRounding5__5))]))",
  "Stmt_Assign(LExpr_Array(LExpr_Var(_Z),Expr_LitInt(\"0\")),Expr_TApply(ZeroExtend.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"128\"))],[(Expr_Var(Exp9__5));(Expr_LitInt(\"128\"))]))"
]

The call to the fixed point functions are not really an issue - we can just treat those as uninterpreted functions for the time being. The register declaration, variable declaration of type FPRounding (an enum in ASLi) and conversion from the FPCR bytes to the enum value are the real issue.

The text was updated successfully, but these errors were encountered:

katrinafyi · 2024-02-19T09:48:17Z

Hm yes, coincidentally (again!) I have also been looking at these instructions.

I don't think we can safely eliminate the FPCR reads in general. Are you asking for the types to be replaced with concrete bitvectors of the same size? This could be possible.

In my project, I took the different approach of updating the grammar to support this syntax. This is a bit nicer, I think, as the type constructor is useful to track down the enum's meaning, especially when its values are replaced by bits. You can find the g4 file here, if you would like to try this. The grammar is not too nice to work with and I will fix this in the not-too-far future.

l-kent · 2024-02-20T00:02:58Z

Types replaced with concrete bitvectors of the same size would be ideal - I can look up (and have already done so) what the type actually is in ASLi, but having to manually handle every type declaration like that is not really ideal at all. I'm not asking for FPCR reads to removed, just for them to be cleaned up.

I've already actually rewritten the grammar here to support these elements, as well as more extensively cleaning it up and fixing a number of errors in it. You may find that helpful. This is enough to parse cntlm.

Here's a pseudocode simplification of the fcvt example:

Exp4__5 := Z0
Exp5__4 := FPCR
if FPCR[24:22] == 0b00 then FPDecodeRounding8__7 := 0
elseif FPCR[24:22] == 0b01 then FPDecodeRounding8__7 := 1
elseif FPCR[24:22] == 0b10 then FPDecodeRounding8__7 := 2
elseif FPCR[24:22] == 0b11 then FPDecodeRounding8__7 := 3
else throw Exception(UNMATCHED CASE)
Exp10__6 := FPConvert[64,32](Exp4__5[32:0], Exp5__4, FPDecodeRounding8__7)
Z0 := ZeroExtend[64,128](Exp10__6, 128)

Most of these temporary variables are redundant - Exp4__5 and Exp5__4 don't really serve any purpose and you even could propagate Exp10__6 into the final assignment. FPDecodeRounding8__7 is just a conversion from bitvector to enum, with an impossible to reach exception in that conversion.

The biggest issue for BASIL is FPDecodeRounding8__7 not being a bitvector - this more difficult to work around and will require special treatment. Everything else can be managed (and mostly is at present, though I haven't yet added a way to handle the floating point functions but it is not difficult to do so - they can just be uninterpreted functions for now). I guess the issue with just treating it as a bitvector of size 2 is that there are two other FPRounding modes which are not represented in FPCR and only ever used explicitly?

All the issues with FPRounding could probably be avoided if the FPConvert that only takes two parameters wasn't inlined? That version just calls the three-parameter version, taking the FPRounding value from FPCR's RMode field, but if we could abstract away that version too that would be simplest.

l-kent · 2024-02-20T00:11:14Z

It is simplest at present if BASIL can deal with everything in terms of bitvectors. An alternate version of ASLp that keeps the original typing of everything as much as possible may have some real advantages (with that following through to a significantly overhauled BASIL) and could be worthy of investigation, but that would need to be an explicit design choice, not just having one or two stray non-bitvectors here and there.

ncough · 2024-02-22T01:37:32Z

This should be partially addressed by #48, but the question of floating point function primitives remains open.
With this change set 0x1e22c000 produces:

bits ( 3 ) FPDecodeRounding8__7 ;
FPDecodeRounding8__7 = ZeroExtend.0 {{ 2,3 }} ( FPCR [ 22 +: 2 ],3 ) ;
constant bits ( 64 ) Exp10__6 = FPConvert.0 {{ 64,32 }} ( __array _Z [ 0 ] [ 0 +: 32 ],FPCR,cvt_bits_uint.0 {{ 3 }} ( FPDecodeRounding8__7 ) ) ;
__array _Z [ 0 ] = ZeroExtend.0 {{ 64,128 }} ( Exp10__6,128 ) ;

Note the conversion of FPDecodeRounding8__7 back to an integer via cvt_bits_uint.0, so that the produced ASL types continue to make sense.
Once primitives are worked out, this conversion shouldn't be necessary.

l-kent · 2024-02-22T01:44:03Z

Thanks, that seems like a significant improvement. That should solve all the existing issues.

l-kent · 2024-02-26T00:34:38Z

The fcvt and ucvtf instructions are good now, but scvtf now has exposed integers in some cases which is not really ideal.

"scvtf v3.2d, v3.2d": [
  "Stmt_VarDeclsNoInit(Type_Bits(Expr_LitInt(\"3\")),[(FPDecodeRounding9__6)])",
  "Stmt_Assign(LExpr_Var(FPDecodeRounding9__6),Expr_TApply(ZeroExtend.0,[(Expr_LitInt(\"2\"));(Expr_LitInt(\"3\"))],[(Expr_Slices(Expr_Var(FPCR),[(Slice_LoWd(Expr_LitInt(\"22\"),Expr_LitInt(\"2\")))]));(Expr_LitInt(\"3\"))]))",
  "Stmt_ConstDecl(Type_Constructor(integer),Cse0__5,Expr_TApply(cvt_bits_uint.0,[(Expr_LitInt(\"3\"))],[(Expr_Var(FPDecodeRounding9__6))]))",
  "Stmt_ConstDecl(Type_Bits(Expr_LitInt(\"64\")),Exp13__5,Expr_TApply(FixedToFP.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"64\"))],[(Expr_Slices(Expr_Array(Expr_Var(_Z),Expr_LitInt(\"3\")),[(Slice_LoWd(Expr_LitInt(\"0\"),Expr_LitInt(\"64\")))]));(Expr_LitInt(\"0\"));(Expr_Var(FALSE));(Expr_Var(FPCR));(Expr_Var(Cse0__5))]))",
  "Stmt_ConstDecl(Type_Bits(Expr_LitInt(\"64\")),Exp16__5,Expr_TApply(FixedToFP.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"64\"))],[(Expr_Slices(Expr_Array(Expr_Var(_Z),Expr_LitInt(\"3\")),[(Slice_LoWd(Expr_LitInt(\"64\"),Expr_LitInt(\"64\")))]));(Expr_LitInt(\"0\"));(Expr_Var(FALSE));(Expr_Var(FPCR));(Expr_Var(Cse0__5))]))",
  "Stmt_Assign(LExpr_Array(LExpr_Var(_Z),Expr_LitInt(\"3\")),Expr_TApply(append_bits.0,[(Expr_LitInt(\"64\"));(Expr_LitInt(\"64\"))],[(Expr_Var(Exp16__5));(Expr_Var(Exp13__5))]))"
]

I understand it is just creating the temporary variable Cse0__5 to hold the integer because it is used multiple times, but is it possible to disable the creation of those temporary variables for cvt_bits_uint.0 operations?

ncough · 2024-03-22T00:50:05Z

CSE should no longer create these temporaries with #58.
@l-kent Please let us know if there are any further issues with these operations, otherwise happy to close this one out.

katrinafyi mentioned this issue Feb 22, 2024

Floating Point Cleanup #48

Merged

l-kent mentioned this issue Mar 15, 2024

Semantics.g4 grammar update UQ-PAC/BASIL#197

Merged

ncough mentioned this issue Mar 18, 2024

Limit CSE to generating bitvec temps #58

Merged

l-kent closed this as completed Sep 16, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Improve Partial Evaluation of Floating Point Conversion Operations #46

Improve Partial Evaluation of Floating Point Conversion Operations #46

l-kent commented Feb 19, 2024

katrinafyi commented Feb 19, 2024

l-kent commented Feb 20, 2024 •

edited

Loading

l-kent commented Feb 20, 2024

ncough commented Feb 22, 2024

l-kent commented Feb 22, 2024 •

edited

Loading

l-kent commented Feb 26, 2024

ncough commented Mar 22, 2024

Improve Partial Evaluation of Floating Point Conversion Operations #46

Improve Partial Evaluation of Floating Point Conversion Operations #46

Comments

l-kent commented Feb 19, 2024

katrinafyi commented Feb 19, 2024

l-kent commented Feb 20, 2024 • edited Loading

l-kent commented Feb 20, 2024

ncough commented Feb 22, 2024

l-kent commented Feb 22, 2024 • edited Loading

l-kent commented Feb 26, 2024

ncough commented Mar 22, 2024

l-kent commented Feb 20, 2024 •

edited

Loading

l-kent commented Feb 22, 2024 •

edited

Loading