1 /* -*- c++ -*- */ 2 /* 3 * Copyright © 2020 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 #pragma once 26 27 #include "brw_ir_analysis.h" 28 29 struct fs_visitor; 30 31 namespace brw { 32 /** 33 * Various estimates of the performance of a shader based on static 34 * analysis. 35 */ 36 struct performance { 37 performance(const fs_visitor *v); 38 ~performance(); 39 40 analysis_dependency_class dependency_classperformance41 dependency_class() const 42 { 43 return (DEPENDENCY_INSTRUCTIONS | 44 DEPENDENCY_BLOCKS); 45 } 46 47 bool validateperformance48 validate(const fs_visitor *) const 49 { 50 return true; 51 } 52 53 /** 54 * Array containing estimates of the runtime of each basic block of the 55 * program in cycle units. 56 */ 57 unsigned *block_latency; 58 59 /** 60 * Estimate of the runtime of the whole program in cycle units assuming 61 * uncontended execution. 62 */ 63 unsigned latency; 64 65 /** 66 * Estimate of the throughput of the whole program in 67 * invocations-per-cycle units. 68 * 69 * Note that this might be lower than the ratio between the dispatch 70 * width of the program and its latency estimate in cases where 71 * performance doesn't scale without limits as a function of its thread 72 * parallelism, e.g. due to the existence of a bottleneck in a shared 73 * function. 74 */ 75 float throughput; 76 77 private: 78 performance(const performance &perf); 79 performance & 80 operator=(performance u); 81 }; 82 } 83