diff --git a/rtl/core/neorv32_cache.vhd b/rtl/core/neorv32_cache.vhd
index b7a0f75b4..36170aed3 100644
--- a/rtl/core/neorv32_cache.vhd
+++ b/rtl/core/neorv32_cache.vhd
@@ -8,11 +8,6 @@
 -- the 4 most significant address bits, well as all atomic (reservation set)        --
 -- operations will always **bypass** the cache resulting in "direct accesses".      --
 --                                                                                  --
--- A fence request will first flush the data cache (write back modified blocks to   --
--- main memory before invalidating all cache blocks to force a re-fetch from main   --
--- memory. After this, the fence request is forwarded to the downstream memory      --
--- system.                                                                          --
---                                                                                  --
 -- Simplified cache architecture ("-->" = direction of access requests):            --
 --                                                                                  --
 --               Direct Access        +----------+                                  --
@@ -946,9 +941,10 @@ begin
 
       when S_FLUSH_START => -- start checking for dirty blocks
       -- ------------------------------------------------------------
-        addr_nxt.idx <= (others => '0'); -- start with index 0
-        upret_nxt    <= S_FLUSH_CHECK; -- come back to S_FLUSH_CHECK after block upload
-        state_nxt    <= S_FLUSH_READ;
+        addr_nxt.idx    <= (others => '0'); -- start with index 0
+        bus_req_o.fence <= bool_to_ulogic_f(READ_ONLY); -- forward fence request
+        upret_nxt       <= S_FLUSH_CHECK; -- come back to S_FLUSH_CHECK after block upload
+        state_nxt       <= S_FLUSH_READ;
 
       when S_FLUSH_READ => -- cache read access latency cycle
       -- ------------------------------------------------------------
@@ -963,7 +959,7 @@ begin
         else -- move on to next block
           addr_nxt.idx <= std_ulogic_vector(unsigned(addr.idx) + 1);
           if (and_reduce_f(addr.idx) = '1') then -- all blocks done?
-            bus_req_o.fence <= '1'; -- forward fence request to downstream memories
+            bus_req_o.fence <= not bool_to_ulogic_f(READ_ONLY); -- forward fence request
             state_nxt       <= S_IDLE;
           else -- go to next block
             state_nxt <= S_FLUSH_READ;