• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 Espressif Systems (Shanghai) PTE LTD
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "esp_osal/esp_osal.h"
16 #include "esp_osal/task.h"
17 #include "esp32/spiram.h"
18 #include "esp32/rom/cache.h"
19 #include "sdkconfig.h"
20 #include "esp32/himem.h"
21 #include "soc/soc.h"
22 #include "esp_log.h"
23 
24 /*
25 So, why does the API look this way and is so inflexible to not allow any maps beyond the full 32K chunks? Most of
26 it has to do with the fact that the cache works on the *virtual* addresses What this comes down to is that while it's
27 allowed to map a range of physical memory into the address space two times, there's no cache consistency between the
28 two regions.
29 
30 This means that a write to region A may or may not show up, perhaps delayed, in region B, as it depends on
31 the time that the writeback to SPI RAM is done on A and the time before the corresponding cache line is invalidated
32 on B. Note that this goes for every 32-byte cache line: this implies that if a program writes to address X and Y within
33 A, the write to Y may show up before the write to X does.
34 
35 It gets even worse when both A and B are written: theoretically, a write to a 32-byte cache line in A can be entirely
36 undone because of a write to a different addres in B that happens to be in the same 32-byte cache line.
37 
38 Because of these reasons, we do not allow double mappings at all. This, however, has other implications that make
39 supporting ranges not really useful. Because the lack of double mappings, applications will need to do their own
40 management of mapped regions, meaning they will normally map in and out blocks at a time anyway, as mapping more
41 fluent regions would result in the chance of accidentally mapping two overlapping regions. As this is the case,
42 to keep the code simple, at the moment we just force these blocks to be equal to the 32K MMU page size. The API
43 itself does allow for more granular allocations, so if there's a pressing need for a more complex solution in the
44 future, we can do this.
45 
46 Note: In the future, we can expand on this api to do a memcpy() between SPI RAM and (internal) memory using the SPI1
47 peripheral. This needs support for SPI1 to be in the SPI driver, however.
48 */
49 
50 #if CONFIG_SPIRAM_BANKSWITCH_ENABLE
51 #define SPIRAM_BANKSWITCH_RESERVE CONFIG_SPIRAM_BANKSWITCH_RESERVE
52 #else
53 #define SPIRAM_BANKSWITCH_RESERVE 0
54 #endif
55 
56 #define CACHE_BLOCKSIZE (32*1024)
57 
58 //Start of the virtual address range reserved for himem use
59 #define VIRT_HIMEM_RANGE_START (SOC_EXTRAM_DATA_LOW+(128-SPIRAM_BANKSWITCH_RESERVE)*CACHE_BLOCKSIZE)
60 //Start MMU block reserved for himem use
61 #define VIRT_HIMEM_RANGE_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
62 //Start physical block
63 #define PHYS_HIMEM_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
64 
65 #define TAG "esp_himem"
66 
67 #define HIMEM_CHECK(cond, str, err) if (cond) do {ESP_LOGE(TAG, "%s: %s", __FUNCTION__, str); return err; } while(0)
68 
69 // Metadata for a block of physical RAM
70 typedef struct {
71     unsigned int is_alloced: 1;
72     unsigned int is_mapped: 1;
73 } ramblock_t;
74 
75 //Metadata for a 32-K memory address range
76 typedef struct {
77     unsigned int is_alloced: 1;
78     unsigned int is_mapped: 1;
79     unsigned int ram_block: 16;
80 } rangeblock_t;
81 
82 static ramblock_t *s_ram_descriptor = NULL;
83 static rangeblock_t *s_range_descriptor = NULL;
84 static int s_ramblockcnt = 0;
85 static const int s_rangeblockcnt = SPIRAM_BANKSWITCH_RESERVE;
86 
87 //Handle for a window of address space
88 typedef struct esp_himem_rangedata_t {
89     int block_ct;
90     int block_start;
91 } esp_himem_rangedata_t;
92 
93 //Handle for a range of physical memory
94 typedef struct esp_himem_ramdata_t {
95     int block_ct;
96     uint16_t *block;
97 } esp_himem_ramdata_t;
98 
99 static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED;
100 
ramblock_idx_valid(int ramblock_idx)101 static inline int ramblock_idx_valid(int ramblock_idx)
102 {
103     return (ramblock_idx >= 0 && ramblock_idx < s_ramblockcnt);
104 }
105 
rangeblock_idx_valid(int rangeblock_idx)106 static inline int rangeblock_idx_valid(int rangeblock_idx)
107 {
108     return (rangeblock_idx >= 0 && rangeblock_idx < s_rangeblockcnt);
109 }
110 
set_bank(int virt_bank,int phys_bank,int ct)111 static void set_bank(int virt_bank, int phys_bank, int ct)
112 {
113     int r;
114     r = cache_sram_mmu_set( 0, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
115     assert(r == 0);
116     r = cache_sram_mmu_set( 1, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
117     assert(r == 0);
118 }
119 
esp_himem_get_phys_size(void)120 size_t esp_himem_get_phys_size(void)
121 {
122     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
123     return esp_spiram_get_size()-paddr_start;
124 }
125 
esp_himem_get_free_size(void)126 size_t esp_himem_get_free_size(void)
127 {
128     size_t ret=0;
129     for (int i = 0; i < s_ramblockcnt; i++) {
130         if (!s_ram_descriptor[i].is_alloced) ret+=CACHE_BLOCKSIZE;
131     }
132     return ret;
133 }
134 
esp_himem_reserved_area_size(void)135 size_t esp_himem_reserved_area_size(void) {
136     return CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE;
137 }
138 
139 
esp_himem_init(void)140 void __attribute__((constructor)) esp_himem_init(void)
141 {
142     if (SPIRAM_BANKSWITCH_RESERVE == 0) return;
143     int maxram=esp_spiram_get_size();
144     //catch double init
145     HIMEM_CHECK(s_ram_descriptor != NULL, "already initialized", ); //Looks weird; last arg is empty so it expands to 'return ;'
146     HIMEM_CHECK(s_range_descriptor != NULL, "already initialized", );
147     //need to have some reserved banks
148     HIMEM_CHECK(SPIRAM_BANKSWITCH_RESERVE == 0, "No banks reserved for himem", );
149     //Start and end of physical reserved memory. Note it starts slightly under
150     //the 4MiB mark as the reserved banks can't have an unity mapping to be used by malloc
151     //anymore; we treat them as himem instead.
152     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
153     int paddr_end = maxram;
154     s_ramblockcnt = ((paddr_end - paddr_start) / CACHE_BLOCKSIZE);
155     //Allocate data structures
156     s_ram_descriptor = calloc(sizeof(ramblock_t), s_ramblockcnt);
157     s_range_descriptor = calloc(sizeof(rangeblock_t), SPIRAM_BANKSWITCH_RESERVE);
158     if (s_ram_descriptor == NULL || s_range_descriptor == NULL) {
159         ESP_EARLY_LOGE(TAG, "Cannot allocate memory for meta info. Not initializing!");
160         free(s_ram_descriptor);
161         free(s_range_descriptor);
162         return;
163     }
164     ESP_EARLY_LOGI(TAG, "Initialized. Using last %d 32KB address blocks for bank switching on %d KB of physical memory.",
165                 SPIRAM_BANKSWITCH_RESERVE, (paddr_end - paddr_start)/1024);
166 }
167 
168 
169 //Allocate count not-necessarily consecutive physical RAM blocks, return numbers in blocks[]. Return
170 //true if blocks can be allocated, false if not.
allocate_blocks(int count,uint16_t * blocks_out)171 static bool allocate_blocks(int count, uint16_t *blocks_out)
172 {
173     int n = 0;
174     for (int i = 0; i < s_ramblockcnt && n != count; i++) {
175         if (!s_ram_descriptor[i].is_alloced) {
176             blocks_out[n] = i;
177             n++;
178         }
179     }
180     if (n == count) {
181         //All blocks could be allocated. Mark as in use.
182         for (int i = 0; i < count; i++) {
183             s_ram_descriptor[blocks_out[i]].is_alloced = true;
184             assert(s_ram_descriptor[blocks_out[i]].is_mapped == false);
185         }
186         return true;
187     } else {
188         //Error allocating blocks
189         return false;
190     }
191 }
192 
193 
esp_himem_alloc(size_t size,esp_himem_handle_t * handle_out)194 esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out)
195 {
196     if (size % CACHE_BLOCKSIZE != 0) {
197         return ESP_ERR_INVALID_SIZE;
198     }
199     int blocks = size / CACHE_BLOCKSIZE;
200     esp_himem_ramdata_t *r = calloc(sizeof(esp_himem_ramdata_t), 1);
201     if (!r) {
202         goto nomem;
203     }
204     r->block = calloc(sizeof(uint16_t), blocks);
205     if (!r->block) {
206         goto nomem;
207     }
208     portENTER_CRITICAL(&spinlock);
209     int ok = allocate_blocks(blocks, r->block);
210     portEXIT_CRITICAL(&spinlock);
211     if (!ok) {
212         goto nomem;
213     }
214     r->block_ct = blocks;
215     *handle_out = r;
216     return ESP_OK;
217 nomem:
218     if (r) {
219         free(r->block);
220     }
221     free(r);
222     return ESP_ERR_NO_MEM;
223 }
224 
esp_himem_free(esp_himem_handle_t handle)225 esp_err_t esp_himem_free(esp_himem_handle_t handle)
226 {
227     //Check if any of the blocks is still mapped; fail if this is the case.
228     for (int i = 0; i < handle->block_ct; i++) {
229         assert(ramblock_idx_valid(handle->block[i]));
230         HIMEM_CHECK(s_ram_descriptor[handle->block[i]].is_mapped, "block in range still mapped", ESP_ERR_INVALID_ARG);
231     }
232     //Mark blocks as free
233     portENTER_CRITICAL(&spinlock);
234     for (int i = 0; i < handle->block_ct; i++) {
235         s_ram_descriptor[handle->block[i]].is_alloced = false;
236     }
237     portEXIT_CRITICAL(&spinlock);
238 
239     //Free handle
240     free(handle->block);
241     free(handle);
242     return ESP_OK;
243 }
244 
245 
esp_himem_alloc_map_range(size_t size,esp_himem_rangehandle_t * handle_out)246 esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out)
247 {
248     HIMEM_CHECK(s_ram_descriptor == NULL, "Himem not available!", ESP_ERR_INVALID_STATE);
249     HIMEM_CHECK(size % CACHE_BLOCKSIZE != 0, "requested size not aligned to blocksize", ESP_ERR_INVALID_SIZE);
250     int blocks = size / CACHE_BLOCKSIZE;
251     esp_himem_rangedata_t *r = calloc(sizeof(esp_himem_rangedata_t), 1);
252     if (!r) {
253         return ESP_ERR_NO_MEM;
254     }
255     r->block_ct = blocks;
256     r->block_start = -1;
257     int start_free = 0;
258     portENTER_CRITICAL(&spinlock);
259     for (int i = 0; i < s_rangeblockcnt; i++) {
260         if (s_range_descriptor[i].is_alloced) {
261             start_free = i + 1; //optimistically assume next block is free...
262         } else if (i - start_free == blocks - 1) {
263             //We found a span of blocks that's big enough to allocate the requested range in.
264             r->block_start = start_free;
265             break;
266         }
267     }
268 
269     if (r->block_start == -1) {
270         //Couldn't find enough free blocks
271         free(r);
272         portEXIT_CRITICAL(&spinlock);
273         return ESP_ERR_NO_MEM;
274     }
275     //Range is found. Mark the blocks as in use.
276     for (int i = 0; i < blocks; i++) {
277         s_range_descriptor[r->block_start + i].is_alloced = 1;
278     }
279     portEXIT_CRITICAL(&spinlock);
280     //All done.
281     *handle_out = r;
282     return ESP_OK;
283 }
284 
esp_himem_free_map_range(esp_himem_rangehandle_t handle)285 esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle)
286 {
287     //Check if any of the blocks in the range have a mapping
288     for (int i = 0; i < handle->block_ct; i++) {
289         assert(rangeblock_idx_valid(handle->block_start + i));
290         assert(s_range_descriptor[i + handle->block_start].is_alloced == 1); //should be, if handle is valid
291         HIMEM_CHECK(s_range_descriptor[i + handle->block_start].is_mapped, "memory still mapped to range", ESP_ERR_INVALID_ARG);
292     }
293     //We should be good to free this. Mark blocks as free.
294     portENTER_CRITICAL(&spinlock);
295     for (int i = 0; i < handle->block_ct; i++) {
296         s_range_descriptor[i + handle->block_start].is_alloced = 0;
297     }
298     portEXIT_CRITICAL(&spinlock);
299     free(handle);
300     return ESP_OK;
301 }
302 
303 
esp_himem_map(esp_himem_handle_t handle,esp_himem_rangehandle_t range,size_t ram_offset,size_t range_offset,size_t len,int flags,void ** out_ptr)304 esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr)
305 {
306     int ram_block = ram_offset / CACHE_BLOCKSIZE;
307     int range_block = range_offset / CACHE_BLOCKSIZE;
308     int blockcount = len / CACHE_BLOCKSIZE;
309     HIMEM_CHECK(s_ram_descriptor == NULL, "Himem not available!", ESP_ERR_INVALID_STATE);
310     //Offsets and length must be block-aligned
311     HIMEM_CHECK(ram_offset % CACHE_BLOCKSIZE != 0, "ram offset not aligned to blocksize", ESP_ERR_INVALID_ARG);
312     HIMEM_CHECK(range_offset % CACHE_BLOCKSIZE != 0, "range not aligned to blocksize", ESP_ERR_INVALID_ARG);
313     HIMEM_CHECK(len % CACHE_BLOCKSIZE != 0, "length not aligned to blocksize", ESP_ERR_INVALID_ARG);
314     //ram and range should be within allocated range
315     HIMEM_CHECK(ram_block + blockcount > handle->block_ct, "args not in range of phys ram handle", ESP_ERR_INVALID_SIZE);
316     HIMEM_CHECK(range_block + blockcount > range->block_ct, "args not in range of range handle", ESP_ERR_INVALID_SIZE);
317 
318     //Check if ram blocks aren't already mapped, and if memory range is unmapped
319     for (int i = 0; i < blockcount; i++) {
320         HIMEM_CHECK(s_ram_descriptor[handle->block[i + ram_block]].is_mapped, "ram already mapped", ESP_ERR_INVALID_STATE);
321         HIMEM_CHECK(s_range_descriptor[range->block_start + i + range_block].is_mapped, "range already mapped", ESP_ERR_INVALID_STATE);
322     }
323 
324     //Map and mark as mapped
325     portENTER_CRITICAL(&spinlock);
326     for (int i = 0; i < blockcount; i++) {
327         assert(ramblock_idx_valid(handle->block[i + ram_block]));
328         s_ram_descriptor[handle->block[i + ram_block]].is_mapped = 1;
329         s_range_descriptor[range->block_start + i + range_block].is_mapped = 1;
330         s_range_descriptor[range->block_start + i + range_block].ram_block = handle->block[i + ram_block];
331     }
332     portEXIT_CRITICAL(&spinlock);
333     for (int i = 0; i < blockcount; i++) {
334         set_bank(VIRT_HIMEM_RANGE_BLOCKSTART + range->block_start + i + range_block, handle->block[i + ram_block] + PHYS_HIMEM_BLOCKSTART, 1);
335     }
336 
337     //Set out pointer
338     *out_ptr = (void *)(VIRT_HIMEM_RANGE_START + (range->block_start + range_offset) * CACHE_BLOCKSIZE);
339     return ESP_OK;
340 }
341 
esp_himem_unmap(esp_himem_rangehandle_t range,void * ptr,size_t len)342 esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len)
343 {
344     //Note: doesn't actually unmap, just clears cache and marks blocks as unmapped.
345     //Future optimization: could actually lazy-unmap here: essentially, do nothing and only clear the cache when we re-use
346     //the block for a different physical address.
347     int range_offset = (uint32_t)ptr - VIRT_HIMEM_RANGE_START;
348     int range_block = (range_offset / CACHE_BLOCKSIZE) - range->block_start;
349     int blockcount = len / CACHE_BLOCKSIZE;
350     HIMEM_CHECK(range_offset % CACHE_BLOCKSIZE != 0, "range offset not block-aligned", ESP_ERR_INVALID_ARG);
351     HIMEM_CHECK(len % CACHE_BLOCKSIZE != 0, "map length not block-aligned", ESP_ERR_INVALID_ARG);
352     HIMEM_CHECK(range_block + blockcount > range->block_ct, "range out of bounds for handle", ESP_ERR_INVALID_ARG);
353 
354     portENTER_CRITICAL(&spinlock);
355     for (int i = 0; i < blockcount; i++) {
356         int ramblock = s_range_descriptor[range->block_start + i + range_block].ram_block;
357         assert(ramblock_idx_valid(ramblock));
358         s_ram_descriptor[ramblock].is_mapped = 0;
359         s_range_descriptor[range->block_start + i + range_block].is_mapped = 0;
360     }
361     esp_spiram_writeback_cache();
362     portEXIT_CRITICAL(&spinlock);
363     return ESP_OK;
364 }
365