1 /* Copyright 2024 Advanced Micro Devices, Inc. 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a 4 * copy of this software and associated documentation files (the "Software"), 5 * to deal in the Software without restriction, including without limitation 6 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 * and/or sell copies of the Software, and to permit persons to whom the 8 * Software is furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 19 * OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * Authors: AMD 22 * 23 */ 24 #pragma once 25 26 #include <stdint.h> 27 #include <string.h> 28 #include "config_writer.h" 29 30 /** To use this config caching helper, there are pre-requisites: 31 * The object that passes to the hw programming layer must have the following members in its 32 * structure 33 * 1. struct config_cache config_cache; 34 * 2. bool dirty; 35 * 36 * e.g. 37 * struct transfer_function { 38 * bool dirty; 39 * struct config_cache config_cache; 40 * }; 41 * 42 * The upper layer has to indicate this object is dirty or not for the hw programming layer to 43 * determine i. re-use the config cache? ii. cache the new settings? 44 * 45 * Before using the CONFIG_CACHE(), make sure the function has these local variables visible in the 46 * same code block: 47 * 1. struct config_writer *config_writer 48 * - usually been declared with PROGRAM_ENTRY() 49 * 2. a debug option that want to disable caching or not 50 * 3. an input object that has the config_cache member 51 * 4. the hw programming function that would generate command buffer content 52 * 5. the input/output context that has configs vector which stores the generated configs 53 * 54 * Inside this CONFIG_CACHE macro it will clear the dirty bit after consuming the settings 55 * 56 * Make sure to free up this cache object when the parent object is destroyed using 57 * CONFIG_CACHE_FREE() 58 * 59 */ 60 61 #ifdef __cplusplus 62 extern "C" { 63 #endif 64 65 struct vpe_priv; 66 struct vpe_vector; 67 68 /* a common config cache structure to be included in the object that is for program hardware API 69 * layer 70 */ 71 struct config_cache { 72 uint8_t *p_buffer; 73 uint64_t size; 74 bool cached; 75 }; 76 77 /* A macro that helps cache the config packet, it won't cache if it is in bypass mode 78 * as bypass mode is not heavy lifting programming. 79 * 80 * /param obj_cache an object that has the config cache member 81 * /param ctx an input/output context that contains the configs vector 82 * /param disable_cache a flag that controls a caching is needed 83 * /param is_bypass if it is in bypass, it doesn't cache the bypass config 84 * /param program_func_call the program call that generate config packet content 85 * /param inst index to address the config_cache array 86 */ 87 #define CONFIG_CACHE(obj_cache, ctx, disable_cache, is_bypass, program_func_call, inst) \ 88 { \ 89 bool use_cache = false; \ 90 \ 91 if ((obj_cache) && !disable_cache && (obj_cache)->config_cache[inst].p_buffer && \ 92 (obj_cache)->config_cache[inst].cached && !((obj_cache)->dirty[inst]) && !is_bypass) { \ 93 /* make sure it opens a new config packet */ \ 94 config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ 95 \ 96 /* reuse the cache */ \ 97 if (config_writer->buf->size >= (obj_cache)->config_cache[inst].size) { \ 98 memcpy((void *)(uintptr_t)config_writer->base_cpu_va, \ 99 (obj_cache)->config_cache[inst].p_buffer, \ 100 (size_t)(obj_cache)->config_cache[inst].size); \ 101 config_writer->buf->cpu_va = \ 102 config_writer->base_cpu_va + (obj_cache)->config_cache[inst].size; \ 103 config_writer->buf->gpu_va = \ 104 config_writer->base_gpu_va + (obj_cache)->config_cache[inst].size; \ 105 config_writer->buf->size -= \ 106 ((obj_cache)->config_cache[inst].size - sizeof(uint32_t)); \ 107 use_cache = true; \ 108 } \ 109 } \ 110 \ 111 if (!use_cache) { \ 112 uint64_t start, end; \ 113 uint16_t num_config = (uint16_t)(ctx)->configs[inst]->num_elements; \ 114 \ 115 if (!is_bypass) { \ 116 /* make sure it opens a new config packet so we can cache a complete new config */ \ 117 /* for bypass we don't do caching, so no need to open a new desc */ \ 118 config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ 119 } \ 120 \ 121 start = config_writer->base_cpu_va; \ 122 program_func_call; \ 123 end = config_writer->buf->cpu_va; \ 124 \ 125 if (!disable_cache && !is_bypass) { \ 126 /* only cache when it is not crossing config packets */ \ 127 if (num_config == (ctx)->configs[inst]->num_elements) { \ 128 if ((obj_cache)->dirty[inst]) { \ 129 uint64_t size = end - start; \ 130 \ 131 if ((obj_cache)->config_cache[inst].size < size) { \ 132 if ((obj_cache)->config_cache[inst].p_buffer) \ 133 vpe_free((obj_cache)->config_cache[inst].p_buffer); \ 134 \ 135 (obj_cache)->config_cache[inst].p_buffer = vpe_zalloc((size_t)size); \ 136 if ((obj_cache)->config_cache[inst].p_buffer) { \ 137 memcpy((obj_cache)->config_cache[inst].p_buffer, \ 138 (void *)(uintptr_t)start, (size_t)size); \ 139 (obj_cache)->config_cache[inst].size = size; \ 140 (obj_cache)->config_cache[inst].cached = true; \ 141 } else { \ 142 (obj_cache)->config_cache[inst].size = 0; \ 143 } \ 144 } \ 145 } \ 146 } \ 147 } \ 148 } \ 149 if ((obj_cache)) \ 150 (obj_cache)->dirty[inst] = false; \ 151 } 152 153 /* the following macro requires a local variable vpr_priv to be present */ 154 #define CONFIG_CACHE_FREE(cache) \ 155 { \ 156 if (cache.p_buffer) \ 157 vpe_free(cache.p_buffer); \ 158 } 159 160 #ifdef __cplusplus 161 } 162 #endif 163