Skip to content

Commit 7c4aa24

Browse files
committed
Add XIP cache maintenance API (fixes #2005)
Also add a cache clean to hardware_flash implementations, to avoid losing pending writes on the subsequent invalidate.
1 parent 3708588 commit 7c4aa24

File tree

7 files changed

+335
-0
lines changed

7 files changed

+335
-0
lines changed

src/cmake/rp2_common.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ pico_add_subdirectory(rp2_common/hardware_timer)
6060
pico_add_subdirectory(rp2_common/hardware_uart)
6161
pico_add_subdirectory(rp2_common/hardware_vreg)
6262
pico_add_subdirectory(rp2_common/hardware_watchdog)
63+
pico_add_subdirectory(rp2_common/hardware_xip_cache)
6364
pico_add_subdirectory(rp2_common/hardware_xosc)
6465

6566
if (PICO_RP2350 OR PICO_COMBINED_DOCS)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
pico_simple_hardware_target(flash)
22
pico_mirrored_target_link_libraries(hardware_flash INTERFACE pico_bootrom)
3+
pico_mirrored_target_link_libraries(hardware_flash INTERFACE hardware_xip_cache)

src/rp2_common/hardware_flash/flash.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#else
1414
#include "hardware/structs/qmi.h"
1515
#endif
16+
#include "hardware/xip_cache.h"
1617

1718
#define FLASH_BLOCK_ERASE_CMD 0xd8
1819

@@ -84,6 +85,8 @@ void __no_inline_not_in_flash_func(flash_range_erase)(uint32_t flash_offs, size_
8485
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
8586
assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_erase_func && flash_flush_cache_func);
8687
flash_init_boot2_copyout();
88+
// Commit any pending writes to external RAM, to avoid losing them in the subsequent flush:
89+
xip_cache_clean_all();
8790

8891
// No flash accesses after this point
8992
__compiler_memory_barrier();
@@ -112,6 +115,7 @@ void __no_inline_not_in_flash_func(flash_range_program)(uint32_t flash_offs, con
112115
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
113116
assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_program_func && flash_flush_cache_func);
114117
flash_init_boot2_copyout();
118+
xip_cache_clean_all();
115119

116120
__compiler_memory_barrier();
117121

@@ -152,6 +156,8 @@ void __no_inline_not_in_flash_func(flash_do_cmd)(const uint8_t *txbuf, uint8_t *
152156
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
153157
assert(connect_internal_flash_func && flash_exit_xip_func && flash_flush_cache_func);
154158
flash_init_boot2_copyout();
159+
xip_cache_clean_all();
160+
155161
__compiler_memory_barrier();
156162
connect_internal_flash_func();
157163
flash_exit_xip_func();
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pico_simple_hardware_target(xip_cache)
2+
3+
pico_mirrored_target_link_libraries(hardware_xip_cache INTERFACE hardware_sync)
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/*
2+
* Copyright (c) 2024 Raspberry Pi Ltd.
3+
*
4+
* SPDX-License-Identifier: BSD-3-Clause
5+
*/
6+
7+
#ifndef _HARDWARE_XIP_CACHE_H
8+
#define _HARDWARE_XIP_CACHE_H
9+
10+
#include "pico.h"
11+
#include "hardware/regs/addressmap.h"
12+
13+
/** \file xip_cache.h
14+
* \defgroup hardware_xip_cache hardware_xip_cache
15+
*
16+
* \brief Low-level cache maintenance operations for the XIP cache
17+
*
18+
* These functions apply some maintenance operation to either the entire cache contents, or a range
19+
* of offsets within the downstream address space. Offsets start from 0 (indicating the first byte
20+
* pf flash), so pointers should have XIP_BASE subtracted before passing into one of these
21+
* functions.
22+
*
23+
* \if rp2040-specific
24+
* The only valid cache maintenance operation on RP2040 is "invalidate", which tells the cache to
25+
* forget everything it knows about some address. This is necessary after a programming operation,
26+
* because the cache does not automatically know about any serial programming operations performed
27+
* on the external flash device, and could return stale data.
28+
* \endif
29+
*
30+
* \if rp2350-specific
31+
* On RP2350, the three types of operation are:
32+
*
33+
* * Invalidate: tell the cache to forget everything it knows about some address. The next access to
34+
* that address will fetch from downstream memory.
35+
*
36+
* * Clean: if the addressed cache line contains data not yet written to external memory, then write
37+
* that data out now, and mark the line as "clean" (i.e. not containing uncommitted write data)
38+
*
39+
* * Pin: mark an address as always being resident in the cache. This persists until the line is
40+
* invalidated, and can be used to allocate part of the cache for cache-as-SRAM use.
41+
*
42+
* When using both external flash and external RAM (e.g. PSRAM), a simple way to maintain coherence
43+
* over flash programming operations is to:
44+
*
45+
* 1. Clean the entire cache (e.g. using xip_cache_clean_all())
46+
*
47+
* 2. Erase + program the flash using serial SPI commands
48+
*
49+
* 3. Invalidate ("flush") the entire cache (e.g. using xip_cache_invalidate_all())
50+
*
51+
* The invalidate ensures the programming is visible to subsequent reads. The clean ensures that the
52+
* invalidate does not discard any cached PSRAM write data.
53+
*
54+
* \endif
55+
*
56+
*/
57+
58+
// PICO_CONFIG: PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE, Enable/disable assertions in the hardwdare_xip_cache module, type=bool, default=0, group=hardwdare_xip_cache
59+
#ifndef PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE
60+
#ifdef PARAM_ASSERTIONS_ENABLED_FLASH // backwards compatibility with SDK < 2.0.0
61+
#define PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE PARAM_ASSERTIONS_ENABLED_FLASH
62+
#else
63+
#define PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE 0
64+
#endif
65+
#endif
66+
67+
#define XIP_CACHE_LINE_SIZE _u(8)
68+
69+
#define XIP_CACHE_SIZE (_u(16) * _u(1024))
70+
71+
#if PICO_RP2040
72+
#define XIP_CACHE_ADDRESS_SPACE_SIZE (_u(16) * _u(1024) * _u(1024))
73+
#else
74+
#define XIP_CACHE_ADDRESS_SPACE_SIZE (XIP_END - XIP_BASE)
75+
#endif
76+
77+
// A read-only cache never requires cleaning (you can still call the functions, they are just no-ops)
78+
#if PICO_RP2040
79+
#define XIP_CACHE_IS_READ_ONLY 1
80+
#else
81+
#define XIP_CACHE_IS_READ_ONLY 0
82+
#endif
83+
84+
#ifndef __ASSEMBLER__
85+
86+
#ifdef __cplusplus
87+
extern "C" {
88+
#endif
89+
90+
/*! \brief Invalidate the cache for the entire XIP address space
91+
* \ingroup hardware_xip_cache
92+
*
93+
* Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather
94+
* than using (potentially stale) cached data.
95+
*
96+
* This function is faster than calling xip_cache_invalidate_range() for the entire address space,
97+
* because it iterates over cachelines instead of addresses.
98+
*
99+
* @note Any pending write data held in the cache is lost: you can force the cache to commit these
100+
* writes first, by calling xip_cache_clean_all()
101+
*
102+
* @note Unlike flash_flush_cache(), this function affects *only* the cache line state.
103+
* flash_flush_cache() calls a ROM API which can have other effects on some platforms, like
104+
* cleaning up the bootrom's QSPI GPIO setup on RP2040. Prefer this function for general cache
105+
* maintenance use, and prefer flash_flush_cache in sequences of ROM flash API calls.
106+
*/
107+
void xip_cache_invalidate_all(void);
108+
109+
/*! \brief Invalidate a range of offsets within the XIP address space
110+
* \ingroup hardware_xip_cache
111+
*
112+
* \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP
113+
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
114+
* Must be 4-byte-aligned on RP2040. Must be a aligned to the start of a cache line
115+
* (XIP_CACHE_LINE_SIZE) on other platforms.
116+
*
117+
* \param size_bytes The number of bytes to invalidate. Must be a multiple of 4 bytes on RP2040.
118+
* Must be a multiple of XIP_CACHE_LINE_SIZE on other platforms.
119+
*
120+
* Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather
121+
* than using (potentially stale) cached data.
122+
123+
* @note Any pending write data held in the cache is lost: you can force the cache to commit these
124+
* writes first, by calling xip_cache_clean_range() with the same parameters. Generally this is
125+
* not necessary because invalidation is used with flash (write-behind via programming), and
126+
* cleaning is used with PSRAM (writing through the cache).
127+
*
128+
*/
129+
void xip_cache_invalidate_range(uintptr_t start_offset, uintptr_t size_bytes);
130+
131+
#if !XIP_CACHE_IS_READ_ONLY
132+
133+
/*! \brief Clean the cache for the entire XIP address space
134+
* \ingroup hardware_xip_cache
135+
*
136+
* This causes the cache to write out all pending write data to the downstream memory. For example,
137+
* when suspending the system with state retained in external PSRAM, this ensures all data has made
138+
* it out to external PSRAM before powering down.
139+
*
140+
* This function is faster than calling xip_cache_clean_range() for the entire address space,
141+
* because it iterates over cachelines instead of addresses.
142+
*
143+
* \if rp2040-specific
144+
* On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the
145+
* XIP_CACHE_IS_READ_ONLY macro.
146+
* \endif
147+
*
148+
* \if rp2350-specific
149+
* On RP2350, due to the workaround applied for RP2350-E11, this function also effectively
150+
* invalidates all cache lines after cleaning them. The next access to each line will miss. Avoid
151+
* this by calling xip_cache_clean_range() which does not suffer this issue.
152+
* \endif
153+
*
154+
*/
155+
156+
void xip_cache_clean_all(void);
157+
158+
/*! \brief Clean a range of offsets within the XIP address space
159+
* \ingroup hardware_xip_cache
160+
*
161+
* This causes the cache to write out pending write data at these offsets to the downstream memory.
162+
*
163+
* \if rp2040-specific
164+
* On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the
165+
* XIP_CACHE_IS_READ_ONLY macro.
166+
* \endif
167+
*
168+
* \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP
169+
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
170+
* Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE).
171+
*
172+
* \param size_bytes The number of bytes to clean. Must be a multiple of XIP_CACHE_LINE_SIZE.
173+
*/
174+
void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes);
175+
176+
#else
177+
// Stub these out inline to avoid generating a call to an empty function when they are no-ops
178+
static inline void xip_cache_clean_all(void) {}
179+
static inline void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes) {
180+
(void)start_offset;
181+
(void)size_bytes;
182+
}
183+
#endif
184+
185+
#if !PICO_RP2040
186+
187+
/*! \brief Pin a range of offsets within the XIP address space
188+
* \ingroup hardware_xip_cache
189+
*
190+
* Pinning a line at an address allocates the line exclusively for use at that address. This means
191+
* that all subsequent accesses to that address will hit the cache, and will not go to downstream
192+
* memory. This persists until one of two things happens:
193+
*
194+
* * The line is invalidated, e.g. via xip_cache_invalidate_all()
195+
*
196+
* * The same line is pinned at a different address (note lines are selected by address modulo
197+
* XIP_CACHE_SIZE)
198+
*
199+
* \param start_offset The first offset to be pinnned. Offset 0 means the first byte of XIP
200+
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
201+
* Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE).
202+
*
203+
* \param size_bytes The number of bytes to pin. Must be a multiple of XIP_CACHE_LINE_SIZE.
204+
*
205+
*/
206+
void xip_cache_pin_range(uintptr_t start_offset, uintptr_t size_bytes);
207+
#endif
208+
209+
#ifdef __cplusplus
210+
}
211+
#endif
212+
213+
#endif // !__ASSEMBLER__
214+
215+
#endif // !_HARDWARE_XIP_CACHE_H
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#include "hardware/xip_cache.h"
2+
#include "hardware/structs/xip.h"
3+
// For barrier macros:
4+
#include "hardware/sync.h"
5+
6+
// Implementation-private constants (exporting these would create a compatibility headache as they
7+
// don't exist on all platforms; all of these operations are exposed through APIs anyways)
8+
9+
#if !PICO_RP2040
10+
typedef enum {
11+
XIP_CACHE_INVALIDATE_BY_SET_WAY = 0,
12+
XIP_CACHE_CLEAN_BY_SET_WAY = 1,
13+
XIP_CACHE_INVALIDATE_BY_ADDRESS = 2,
14+
XIP_CACHE_CLEAN_BY_ADDRESS = 3,
15+
XIP_CACHE_PIN_AT_ADDRESS = 7,
16+
XIP_CACHE_OP_MAX = 7
17+
} cache_op_t;
18+
#endif
19+
20+
// Used to ensure subsequent accesses observe the new state of the maintained cache lines
21+
#define __post_maintenance_barrier() do {__dsb(); __isb();} while (0)
22+
23+
// All functions in this file are marked non-flash, even though they themselves may be executed
24+
// safely from flash, because they are likely to be called during a flash programming operation
25+
// (which makes flash execution momentarily unsafe)
26+
27+
__always_inline static void check_xip_offset_range(uintptr_t start_offset, uintptr_t size_bytes) {
28+
// We use offsets, not addresses, for consistency with the flash API. This means the range of
29+
// valid inputs starts at 0.
30+
(void)start_offset;
31+
(void)size_bytes;
32+
valid_params_if(HARDWARE_XIP_CACHE, start_offset <= XIP_CACHE_ADDRESS_SPACE_SIZE);
33+
valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes <= XIP_CACHE_ADDRESS_SPACE_SIZE);
34+
valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes <= start_offset);
35+
}
36+
37+
#if !PICO_RP2040
38+
// Generic code for RP2350-style caches: apply a maintenance operation to a range of offsets
39+
static void __no_inline_not_in_flash_func(xip_cache_maintain)(uintptr_t start_offset, uintptr_t size_bytes, cache_op_t op) {
40+
check_xip_offset_range(start_offset, size_bytes);
41+
valid_params_if(HARDWARE_XIP_CACHE, (start_offset & (XIP_CACHE_LINE_SIZE - 1u)) == 0);
42+
valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & (XIP_CACHE_LINE_SIZE - 1u)) == 0);
43+
valid_params_if(HARDWARE_XIP_CACHE, (uint)op <= (uint)XIP_CACHE_OP_MAX);
44+
45+
uintptr_t end = start_offset + size_bytes;
46+
for (uintptr_t offset = start_offset; offset < end; offset += XIP_CACHE_LINE_SIZE) {
47+
*(io_wo_8 *) (XIP_MAINTENANCE_BASE + offset + (uintptr_t)op) = 0;
48+
}
49+
__post_maintenance_barrier();
50+
}
51+
#endif
52+
53+
void __no_inline_not_in_flash_func(xip_cache_invalidate_all)(void) {
54+
#if PICO_RP2040
55+
xip_ctrl_hw->flush = 1;
56+
// Read back to wait for completion
57+
(void)xip_ctrl_hw->flush;
58+
__post_maintenance_barrier();
59+
#else
60+
xip_cache_maintain(XIP_CACHE_ADDRESS_SPACE_SIZE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_INVALIDATE_BY_SET_WAY);
61+
#endif
62+
}
63+
64+
void __no_inline_not_in_flash_func(xip_cache_invalidate_range)(uintptr_t start_offset, uintptr_t size_bytes) {
65+
#if PICO_RP2040
66+
// Accsses are at intervals of one half cache line (so 4 bytes) because RP2040's cache has two
67+
// valid flags per cache line, and we need to clear both.
68+
check_xip_offset_range(start_offset, size_bytes);
69+
valid_params_if(HARDWARE_XIP_CACHE, (start_offset & 3u) == 0);
70+
valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & 3u) == 0);
71+
72+
uintptr_t end = start_offset + size_bytes;
73+
// On RP2040 you can invalidate a sector (half-line) by writing to its normal cached+allocating address
74+
for (uintptr_t offset = start_offset; offset < end; offset += 4u) {
75+
*(io_wo_32 *)(offset + XIP_BASE) = 0;
76+
}
77+
__post_maintenance_barrier();
78+
79+
#else
80+
81+
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_INVALIDATE_BY_ADDRESS);
82+
83+
#endif
84+
}
85+
86+
#if !XIP_CACHE_IS_READ_ONLY
87+
void __no_inline_not_in_flash_func(xip_cache_clean_all)(void) {
88+
// Use addresses outside of the downstream QMI address range to work around RP2350-E11; this
89+
// effectively performs a clean+invalidate (except being a no-op on pinned lines) due to the
90+
// erroneous update of the tag. Consequently you will take a miss on the next access to the
91+
// cleaned address.
92+
xip_cache_maintain(XIP_END - XIP_BASE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_CLEAN_BY_SET_WAY);
93+
}
94+
#endif
95+
96+
#if !XIP_CACHE_IS_READ_ONLY
97+
void __no_inline_not_in_flash_func(xip_cache_clean_range)(uintptr_t start_offset, uintptr_t size_bytes) {
98+
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_CLEAN_BY_ADDRESS);
99+
}
100+
#endif
101+
102+
#if !PICO_RP2040
103+
void __no_inline_not_in_flash_func(xip_cache_pin_range)(uintptr_t start_offset, uintptr_t size_bytes) {
104+
valid_params_if(HARDWARE_XIP_CACHE, size_bytes <= XIP_CACHE_SIZE);
105+
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_PIN_AT_ADDRESS);
106+
}
107+
#endif
108+

test/kitchen_sink/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ set(KITCHEN_SINK_LIBS
2424
hardware_uart
2525
hardware_vreg
2626
hardware_watchdog
27+
hardware_xip_cache
2728
hardware_xosc
2829
pico_aon_timer
2930
pico_bit_ops

0 commit comments

Comments
 (0)