From fdd98d88d4559b80b127502e15aa1bb7ca0bf9ff Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2026 08:57:07 +1100 Subject: [PATCH 1/2] FPU: Fix zero result detection in fmadd-family instructions With the multiply-add instructions, it is possible to get into state FMADD_6 with R containing a value >= 8.0. If the value is exactly 8.0, the logic will incorrectly conclude that the result is zero because it only tests bits up to UNIT_BIT + 2. Fix this by testing up to UNIT_BIT + 3, and add a test case to the FPU test that triggers this situation. Signed-off-by: Paul Mackerras --- fpu.vhdl | 2 +- tests/fpu/fpu.c | 2 ++ tests/test_fpu.bin | Bin 34680 -> 34744 bytes 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fpu.vhdl b/fpu.vhdl index 190f4a3..d8b95f2 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -2015,7 +2015,7 @@ begin set_r := '0'; opsel_r <= RES_SHIFT; re_sel2 <= REXP2_NE; - if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then + if (or (r.r(UNIT_BIT + 3 downto 0))) = '0' then -- R is all zeroes but there may be non-zero bits in S -- so shift them into R and set S to 0 set_r := '1'; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index a123f62..cdbc7a1 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1629,6 +1629,8 @@ struct fmavals { 0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 }, { 0x3ff0000000000000, 0x000060fbffffefc1, 0x000060fbffffefc1, FPS_RN_NEAR, 0x0000c1f7ffffdf82, 0x0000000000000000, 0x8000c1f7ffffdf82, 0x8000000000000000 }, + { 0x80000000fffffffd, 0x3ff0000000000000, 0x8000000300000003, FPS_UE | FPS_RN_NEAR, + 0xdef0000000000000, 0x5ee0000000300000, 0x5ef0000000000000, 0xdee0000000300000 }, }; int test23(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index cd1d6476befa3ae556c49f6fea75b975b3fb7b09..d48077e277f22a941005c42462339a84971c02d3 100755 GIT binary patch delta 775 zcmZuvO=uHA6n-;_WD`mnj6YRNx)zeOwMcBVcB#QN9$Ht7f(OwusifGBAXpTxL>)TDWQ5<;l-uHcP=6yTtkIPhy z(=dRV0MOE}JX#L(5R7SQfI;N4$Yqhs4(V6IaiUM3{N7l2QfgHI4w(Sv47T;ipr-%} zN!Ywu{%To=ZET0#D~TlXud(+{d9F??u4<1{w+jG9`RRYOZz&zMOXVADn#URB7VK?c zC#mq2z&!Cvf}$N9$3Cn-K_$VugB>B9ZBq1y^G)dQ792l3ND9Z=Yo-~Mk#5c zZAYG5mEAS@c`)QFmXhb~Yw^ORV3*e|GP(_meEWvuK2l+4LSLvf!7?pVL~tT6ue1Gx z8wqWD!2QuBLYwY0e>B3I+mBH)$4~Ew+Z}geeT0m0b5}2+(mfuDpCuF~`P6P7q4G6u z?4g7Vvq#;tq@U&Y&Jw|n^vvMCMZukaR^|zbvS+UOLq*?PG;YY64)Cv@~gH($Zrv5ey^Dp7`FDKdX1j0EY|!V}WcD2?}wb zqHOC%?W<`XwtDNayk$K`{#WFEU3=TaQmgE7{9YbFt3CM-`xY+{yHcB6V@`WeN#}2T zyKqMQ@-3pftTT#3%{Cb2qxl95_l&uEw1 z+s1cSv7>>X`vabo9=m8AO9w9b``slyS+Lra*uG)JgERbG;0v3d;K`0D%m|cL)>(Oi zZOE22;q6*N_U-}qc7;TH_c4YQab{=Oa@_A5LL3&x?m=Yp(;^f;k8JLanA+n(R=**% zy$mtJ9}QH|#dG_rnBhkv3$$*LX%(O4MZ^$)HdteIE^hDlA}i=Zi>k~~)RW7+zDe3> zg`CtoFI#)XzI}IZAD>nHTv0|nYeZ Date: Sat, 17 Jan 2026 22:47:14 +1100 Subject: [PATCH 2/2] dcache: Fix stalls that occurred occasionally with dcbt followed by ld This fixes a race condition that causes a hang in a situation where the program does a dcbt to a cache line, then hits a TLB miss causing some requests to come in to the dcache from the MMU while the cache line requested by the dcbt has not yet started to come in, then does a load to an address in the same cache line requested by the dcbt. If it happens that the data for the load arrives in the same cycle that the load is doing the cache tag and TLB lookups, the dcache_slow process correctly recognizes that the request can be satisfied immediately but incorrectly sends the done signal to the MMU rather than loadstore1, because the logic looks at r1.mmu_req not req.mmu_req. Fix it to use req.mmu_req. Also make sure that RELOAD_WAIT_ACK state only completes a touch that was the one that caused entry to RELOAD_WAIT_ACK state, not a subsequent touch, which will have r1.req.hit_reload = 0. (A touch to the same line that is already being reloaded would be treated as a hit.) Signed-off-by: Paul Mackerras --- dcache.vhdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dcache.vhdl b/dcache.vhdl index 58c464b..0d8d354 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -1704,7 +1704,7 @@ begin end if; -- If this is a touch, complete the instruction - if r1.full = '1' and r1.req.touch = '1' then + if r1.full = '1' and r1.req.touch = '1' and r1.req.hit_reload = '1' then r1.full <= '0'; r1.slow_valid <= '1'; r1.ls_valid <= '1'; @@ -1730,7 +1730,7 @@ begin get_row_of_line(r1.store_row) = get_row_of_line(get_row(req.real_addr)) then r1.full <= '0'; r1.slow_valid <= '1'; - if r1.mmu_req = '0' then + if req.mmu_req = '0' then r1.ls_valid <= '1'; else r1.mmu_done <= '1'; @@ -1891,7 +1891,7 @@ begin r1.wb.stb & r1.wb.cyc & d_out.error & d_out.valid & - req_op_load_miss & req_op_store & req_op_bad & + req_op_load_miss & req_op_store & req_hit_reload & stall_out & std_ulogic_vector(resize(tlb_hit_way, 3)) & valid_ra &