1#!/bin/sh 2# 3# This is an utility script to manage Intel GPU frequencies. 4# It can be used for debugging performance problems or trying to obtain a stable 5# frequency while benchmarking. 6# 7# Note the Intel i915 GPU driver allows to change the minimum, maximum and boost 8# frequencies in steps of 50 MHz via: 9# 10# /sys/class/drm/card<n>/<freq_info> 11# 12# Where <n> is the DRM card index and <freq_info> one of the following: 13# 14# - gt_max_freq_mhz (enforced maximum freq) 15# - gt_min_freq_mhz (enforced minimum freq) 16# - gt_boost_freq_mhz (enforced boost freq) 17# 18# The hardware capabilities can be accessed via: 19# 20# - gt_RP0_freq_mhz (supported maximum freq) 21# - gt_RPn_freq_mhz (supported minimum freq) 22# - gt_RP1_freq_mhz (most efficient freq) 23# 24# The current frequency can be read from: 25# - gt_act_freq_mhz (the actual GPU freq) 26# - gt_cur_freq_mhz (the last requested freq) 27# 28# Also note that in addition to GPU management, the script offers the 29# possibility to adjust CPU operating frequencies. However, this is currently 30# limited to just setting the maximum scaling frequency as percentage of the 31# maximum frequency allowed by the hardware. 32# 33# Copyright (C) 2022 Collabora Ltd. 34# Author: Cristian Ciocaltea <cristian.ciocaltea@collabora.com> 35# 36# SPDX-License-Identifier: MIT 37# 38 39# 40# Constants 41# 42 43# GPU 44DRM_FREQ_SYSFS_PATTERN="/sys/class/drm/card%d/gt_%s_freq_mhz" 45ENF_FREQ_INFO="max min boost" 46CAP_FREQ_INFO="RP0 RPn RP1" 47ACT_FREQ_INFO="act cur" 48THROTT_DETECT_SLEEP_SEC=2 49THROTT_DETECT_PID_FILE_PATH=/tmp/thrott-detect.pid 50 51# CPU 52CPU_SYSFS_PREFIX=/sys/devices/system/cpu 53CPU_PSTATE_SYSFS_PATTERN="${CPU_SYSFS_PREFIX}/intel_pstate/%s" 54CPU_FREQ_SYSFS_PATTERN="${CPU_SYSFS_PREFIX}/cpu%s/cpufreq/%s_freq" 55CAP_CPU_FREQ_INFO="cpuinfo_max cpuinfo_min" 56ENF_CPU_FREQ_INFO="scaling_max scaling_min" 57ACT_CPU_FREQ_INFO="scaling_cur" 58 59# 60# Global variables. 61# 62unset INTEL_DRM_CARD_INDEX 63unset GET_ACT_FREQ GET_ENF_FREQ GET_CAP_FREQ 64unset SET_MIN_FREQ SET_MAX_FREQ 65unset MONITOR_FREQ 66unset CPU_SET_MAX_FREQ 67unset DETECT_THROTT 68unset DRY_RUN 69 70# 71# Simple printf based stderr logger. 72# 73log() { 74 local msg_type=$1 75 76 shift 77 printf "%s: %s: " "${msg_type}" "${0##*/}" >&2 78 printf "$@" >&2 79 printf "\n" >&2 80} 81 82# 83# Helper to print sysfs path for the given card index and freq info. 84# 85# arg1: Frequency info sysfs name, one of *_FREQ_INFO constants above 86# arg2: Video card index, defaults to INTEL_DRM_CARD_INDEX 87# 88print_freq_sysfs_path() { 89 printf ${DRM_FREQ_SYSFS_PATTERN} "${2:-${INTEL_DRM_CARD_INDEX}}" "$1" 90} 91 92# 93# Helper to set INTEL_DRM_CARD_INDEX for the first identified Intel video card. 94# 95identify_intel_gpu() { 96 local i=0 vendor path 97 98 while [ ${i} -lt 16 ]; do 99 [ -c "/dev/dri/card$i" ] || { 100 i=$((i + 1)) 101 continue 102 } 103 104 path=$(print_freq_sysfs_path "" ${i}) 105 path=${path%/*}/device/vendor 106 107 [ -r "${path}" ] && read vendor < "${path}" && \ 108 [ "${vendor}" = "0x8086" ] && INTEL_DRM_CARD_INDEX=$i && return 0 109 110 i=$((i + 1)) 111 done 112 113 return 1 114} 115 116# 117# Read the specified freq info from sysfs. 118# 119# arg1: Flag (y/n) to also enable printing the freq info. 120# arg2...: Frequency info sysfs name(s), see *_FREQ_INFO constants above 121# return: Global variable(s) FREQ_${arg} containing the requested information 122# 123read_freq_info() { 124 local var val info path print=0 ret=0 125 126 [ "$1" = "y" ] && print=1 127 shift 128 129 while [ $# -gt 0 ]; do 130 info=$1 131 shift 132 var=FREQ_${info} 133 path=$(print_freq_sysfs_path "${info}") 134 135 [ -r ${path} ] && read ${var} < ${path} || { 136 log ERROR "Failed to read freq info from: %s" "${path}" 137 ret=1 138 continue 139 } 140 141 [ -n "${var}" ] || { 142 log ERROR "Got empty freq info from: %s" "${path}" 143 ret=1 144 continue 145 } 146 147 [ ${print} -eq 1 ] && { 148 eval val=\$${var} 149 printf "%6s: %4s MHz\n" "${info}" "${val}" 150 } 151 done 152 153 return ${ret} 154} 155 156# 157# Display requested info. 158# 159print_freq_info() { 160 local req_freq 161 162 [ -n "${GET_CAP_FREQ}" ] && { 163 printf "* Hardware capabilities\n" 164 read_freq_info y ${CAP_FREQ_INFO} 165 printf "\n" 166 } 167 168 [ -n "${GET_ENF_FREQ}" ] && { 169 printf "* Enforcements\n" 170 read_freq_info y ${ENF_FREQ_INFO} 171 printf "\n" 172 } 173 174 [ -n "${GET_ACT_FREQ}" ] && { 175 printf "* Actual\n" 176 read_freq_info y ${ACT_FREQ_INFO} 177 printf "\n" 178 } 179} 180 181# 182# Helper to print frequency value as requested by user via '-s, --set' option. 183# arg1: user requested freq value 184# 185compute_freq_set() { 186 local val 187 188 case "$1" in 189 +) 190 val=${FREQ_RP0} 191 ;; 192 -) 193 val=${FREQ_RPn} 194 ;; 195 *%) 196 val=$((${1%?} * ${FREQ_RP0} / 100)) 197 # Adjust freq to comply with 50 MHz increments 198 val=$((val / 50 * 50)) 199 ;; 200 *[!0-9]*) 201 log ERROR "Cannot set freq to invalid value: %s" "$1" 202 return 1 203 ;; 204 "") 205 log ERROR "Cannot set freq to unspecified value" 206 return 1 207 ;; 208 *) 209 # Adjust freq to comply with 50 MHz increments 210 val=$(($1 / 50 * 50)) 211 ;; 212 esac 213 214 printf "%s" "${val}" 215} 216 217# 218# Helper for set_freq(). 219# 220set_freq_max() { 221 log INFO "Setting GPU max freq to %s MHz" "${SET_MAX_FREQ}" 222 223 read_freq_info n min || return $? 224 225 [ ${SET_MAX_FREQ} -gt ${FREQ_RP0} ] && { 226 log ERROR "Cannot set GPU max freq (%s) to be greater than hw max freq (%s)" \ 227 "${SET_MAX_FREQ}" "${FREQ_RP0}" 228 return 1 229 } 230 231 [ ${SET_MAX_FREQ} -lt ${FREQ_RPn} ] && { 232 log ERROR "Cannot set GPU max freq (%s) to be less than hw min freq (%s)" \ 233 "${SET_MIN_FREQ}" "${FREQ_RPn}" 234 return 1 235 } 236 237 [ ${SET_MAX_FREQ} -lt ${FREQ_min} ] && { 238 log ERROR "Cannot set GPU max freq (%s) to be less than min freq (%s)" \ 239 "${SET_MAX_FREQ}" "${FREQ_min}" 240 return 1 241 } 242 243 [ -z "${DRY_RUN}" ] || return 0 244 245 printf "%s" ${SET_MAX_FREQ} | tee $(print_freq_sysfs_path max) \ 246 $(print_freq_sysfs_path boost) > /dev/null 247 [ $? -eq 0 ] || { 248 log ERROR "Failed to set GPU max frequency" 249 return 1 250 } 251} 252 253# 254# Helper for set_freq(). 255# 256set_freq_min() { 257 log INFO "Setting GPU min freq to %s MHz" "${SET_MIN_FREQ}" 258 259 read_freq_info n max || return $? 260 261 [ ${SET_MIN_FREQ} -gt ${FREQ_max} ] && { 262 log ERROR "Cannot set GPU min freq (%s) to be greater than max freq (%s)" \ 263 "${SET_MIN_FREQ}" "${FREQ_max}" 264 return 1 265 } 266 267 [ ${SET_MIN_FREQ} -lt ${FREQ_RPn} ] && { 268 log ERROR "Cannot set GPU min freq (%s) to be less than hw min freq (%s)" \ 269 "${SET_MIN_FREQ}" "${FREQ_RPn}" 270 return 1 271 } 272 273 [ -z "${DRY_RUN}" ] || return 0 274 275 printf "%s" ${SET_MIN_FREQ} > $(print_freq_sysfs_path min) 276 [ $? -eq 0 ] || { 277 log ERROR "Failed to set GPU min frequency" 278 return 1 279 } 280} 281 282# 283# Set min or max or both GPU frequencies to the user indicated values. 284# 285set_freq() { 286 # Get hw max & min frequencies 287 read_freq_info n RP0 RPn || return $? 288 289 [ -z "${SET_MAX_FREQ}" ] || { 290 SET_MAX_FREQ=$(compute_freq_set "${SET_MAX_FREQ}") 291 [ -z "${SET_MAX_FREQ}" ] && return 1 292 } 293 294 [ -z "${SET_MIN_FREQ}" ] || { 295 SET_MIN_FREQ=$(compute_freq_set "${SET_MIN_FREQ}") 296 [ -z "${SET_MIN_FREQ}" ] && return 1 297 } 298 299 # 300 # Ensure correct operation order, to avoid setting min freq 301 # to a value which is larger than max freq. 302 # 303 # E.g.: 304 # crt_min=crt_max=600; new_min=new_max=700 305 # > operation order: max=700; min=700 306 # 307 # crt_min=crt_max=600; new_min=new_max=500 308 # > operation order: min=500; max=500 309 # 310 if [ -n "${SET_MAX_FREQ}" ] && [ -n "${SET_MIN_FREQ}" ]; then 311 [ ${SET_MAX_FREQ} -lt ${SET_MIN_FREQ} ] && { 312 log ERROR "Cannot set GPU max freq to be less than min freq" 313 return 1 314 } 315 316 read_freq_info n min || return $? 317 318 if [ ${SET_MAX_FREQ} -lt ${FREQ_min} ]; then 319 set_freq_min || return $? 320 set_freq_max 321 else 322 set_freq_max || return $? 323 set_freq_min 324 fi 325 elif [ -n "${SET_MAX_FREQ}" ]; then 326 set_freq_max 327 elif [ -n "${SET_MIN_FREQ}" ]; then 328 set_freq_min 329 else 330 log "Unexpected call to set_freq()" 331 return 1 332 fi 333} 334 335# 336# Helper for detect_throttling(). 337# 338get_thrott_detect_pid() { 339 [ -e ${THROTT_DETECT_PID_FILE_PATH} ] || return 0 340 341 local pid 342 read pid < ${THROTT_DETECT_PID_FILE_PATH} || { 343 log ERROR "Failed to read pid from: %s" "${THROTT_DETECT_PID_FILE_PATH}" 344 return 1 345 } 346 347 local proc_path=/proc/${pid:-invalid}/cmdline 348 [ -r ${proc_path} ] && grep -qs "${0##*/}" ${proc_path} && { 349 printf "%s" "${pid}" 350 return 0 351 } 352 353 # Remove orphaned PID file 354 rm -rf ${THROTT_DETECT_PID_FILE_PATH} 355 return 1 356} 357 358# 359# Control detection and reporting of GPU throttling events. 360# arg1: start - run throttle detector in background 361# stop - stop throttle detector process, if any 362# status - verify if throttle detector is running 363# 364detect_throttling() { 365 local pid 366 pid=$(get_thrott_detect_pid) 367 368 case "$1" in 369 status) 370 printf "Throttling detector is " 371 [ -z "${pid}" ] && printf "not running\n" && return 0 372 printf "running (pid=%s)\n" ${pid} 373 ;; 374 375 stop) 376 [ -z "${pid}" ] && return 0 377 378 log INFO "Stopping throttling detector (pid=%s)" "${pid}" 379 kill ${pid}; sleep 1; kill -0 ${pid} 2>/dev/null && kill -9 ${pid} 380 rm -rf ${THROTT_DETECT_PID_FILE_PATH} 381 ;; 382 383 start) 384 [ -n "${pid}" ] && { 385 log WARN "Throttling detector is already running (pid=%s)" ${pid} 386 return 0 387 } 388 389 ( 390 read_freq_info n RPn || exit $? 391 392 while true; do 393 sleep ${THROTT_DETECT_SLEEP_SEC} 394 read_freq_info n act min cur || exit $? 395 396 # 397 # The throttling seems to occur when act freq goes below min. 398 # However, it's necessary to exclude the idle states, where 399 # act freq normally reaches RPn and cur goes below min. 400 # 401 [ ${FREQ_act} -lt ${FREQ_min} ] && \ 402 [ ${FREQ_act} -gt ${FREQ_RPn} ] && \ 403 [ ${FREQ_cur} -ge ${FREQ_min} ] && \ 404 printf "GPU throttling detected: act=%s min=%s cur=%s RPn=%s\n" \ 405 ${FREQ_act} ${FREQ_min} ${FREQ_cur} ${FREQ_RPn} 406 done 407 ) & 408 409 pid=$! 410 log INFO "Started GPU throttling detector (pid=%s)" ${pid} 411 412 printf "%s\n" ${pid} > ${THROTT_DETECT_PID_FILE_PATH} || \ 413 log WARN "Failed to write throttle detector PID file" 414 ;; 415 esac 416} 417 418# 419# Retrieve the list of online CPUs. 420# 421get_online_cpus() { 422 local path cpu_index 423 424 printf "0" 425 for path in $(grep 1 ${CPU_SYSFS_PREFIX}/cpu*/online); do 426 cpu_index=${path##*/cpu} 427 printf " %s" ${cpu_index%%/*} 428 done 429} 430 431# 432# Helper to print sysfs path for the given CPU index and freq info. 433# 434# arg1: Frequency info sysfs name, one of *_CPU_FREQ_INFO constants above 435# arg2: CPU index 436# 437print_cpu_freq_sysfs_path() { 438 printf ${CPU_FREQ_SYSFS_PATTERN} "$2" "$1" 439} 440 441# 442# Read the specified CPU freq info from sysfs. 443# 444# arg1: CPU index 445# arg2: Flag (y/n) to also enable printing the freq info. 446# arg3...: Frequency info sysfs name(s), see *_CPU_FREQ_INFO constants above 447# return: Global variable(s) CPU_FREQ_${arg} containing the requested information 448# 449read_cpu_freq_info() { 450 local var val info path cpu_index print=0 ret=0 451 452 cpu_index=$1 453 [ "$2" = "y" ] && print=1 454 shift 2 455 456 while [ $# -gt 0 ]; do 457 info=$1 458 shift 459 var=CPU_FREQ_${info} 460 path=$(print_cpu_freq_sysfs_path "${info}" ${cpu_index}) 461 462 [ -r ${path} ] && read ${var} < ${path} || { 463 log ERROR "Failed to read CPU freq info from: %s" "${path}" 464 ret=1 465 continue 466 } 467 468 [ -n "${var}" ] || { 469 log ERROR "Got empty CPU freq info from: %s" "${path}" 470 ret=1 471 continue 472 } 473 474 [ ${print} -eq 1 ] && { 475 eval val=\$${var} 476 printf "%6s: %4s Hz\n" "${info}" "${val}" 477 } 478 done 479 480 return ${ret} 481} 482 483# 484# Helper to print freq. value as requested by user via '--cpu-set-max' option. 485# arg1: user requested freq value 486# 487compute_cpu_freq_set() { 488 local val 489 490 case "$1" in 491 +) 492 val=${CPU_FREQ_cpuinfo_max} 493 ;; 494 -) 495 val=${CPU_FREQ_cpuinfo_min} 496 ;; 497 *%) 498 val=$((${1%?} * ${CPU_FREQ_cpuinfo_max} / 100)) 499 ;; 500 *[!0-9]*) 501 log ERROR "Cannot set CPU freq to invalid value: %s" "$1" 502 return 1 503 ;; 504 "") 505 log ERROR "Cannot set CPU freq to unspecified value" 506 return 1 507 ;; 508 *) 509 log ERROR "Cannot set CPU freq to custom value; use +, -, or % instead" 510 return 1 511 ;; 512 esac 513 514 printf "%s" "${val}" 515} 516 517# 518# Adjust CPU max scaling frequency. 519# 520set_cpu_freq_max() { 521 local target_freq res=0 522 case "${CPU_SET_MAX_FREQ}" in 523 +) 524 target_freq=100 525 ;; 526 -) 527 target_freq=1 528 ;; 529 *%) 530 target_freq=${CPU_SET_MAX_FREQ%?} 531 ;; 532 *) 533 log ERROR "Invalid CPU freq" 534 return 1 535 ;; 536 esac 537 538 local pstate_info=$(printf "${CPU_PSTATE_SYSFS_PATTERN}" max_perf_pct) 539 [ -e "${pstate_info}" ] && { 540 log INFO "Setting intel_pstate max perf to %s" "${target_freq}%" 541 printf "%s" "${target_freq}" > "${pstate_info}" 542 [ $? -eq 0 ] || { 543 log ERROR "Failed to set intel_pstate max perf" 544 res=1 545 } 546 } 547 548 local cpu_index 549 for cpu_index in $(get_online_cpus); do 550 read_cpu_freq_info ${cpu_index} n ${CAP_CPU_FREQ_INFO} || { res=$?; continue; } 551 552 target_freq=$(compute_cpu_freq_set "${CPU_SET_MAX_FREQ}") 553 [ -z "${target_freq}" ] && { res=$?; continue; } 554 555 log INFO "Setting CPU%s max scaling freq to %s Hz" ${cpu_index} "${target_freq}" 556 [ -n "${DRY_RUN}" ] && continue 557 558 printf "%s" ${target_freq} > $(print_cpu_freq_sysfs_path scaling_max ${cpu_index}) 559 [ $? -eq 0 ] || { 560 res=1 561 log ERROR "Failed to set CPU%s max scaling frequency" ${cpu_index} 562 } 563 done 564 565 return ${res} 566} 567 568# 569# Show help message. 570# 571print_usage() { 572 cat <<EOF 573Usage: ${0##*/} [OPTION]... 574 575A script to manage Intel GPU frequencies. Can be used for debugging performance 576problems or trying to obtain a stable frequency while benchmarking. 577 578Note Intel GPUs only accept specific frequencies, usually multiples of 50 MHz. 579 580Options: 581 -g, --get [act|enf|cap|all] 582 Get frequency information: active (default), enforced, 583 hardware capabilities or all of them. 584 585 -s, --set [{min|max}=]{FREQUENCY[%]|+|-} 586 Set min or max frequency to the given value (MHz). 587 Append '%' to interpret FREQUENCY as % of hw max. 588 Use '+' or '-' to set frequency to hardware max or min. 589 Omit min/max prefix to set both frequencies. 590 591 -r, --reset Reset frequencies to hardware defaults. 592 593 -m, --monitor [act|enf|cap|all] 594 Monitor the indicated frequencies via 'watch' utility. 595 See '-g, --get' option for more details. 596 597 -d|--detect-thrott [start|stop|status] 598 Start (default operation) the throttling detector 599 as a background process. Use 'stop' or 'status' to 600 terminate the detector process or verify its status. 601 602 --cpu-set-max [FREQUENCY%|+|-} 603 Set CPU max scaling frequency as % of hw max. 604 Use '+' or '-' to set frequency to hardware max or min. 605 606 -r, --reset Reset frequencies to hardware defaults. 607 608 --dry-run See what the script will do without applying any 609 frequency changes. 610 611 -h, --help Display this help text and exit. 612EOF 613} 614 615# 616# Parse user input for '-g, --get' option. 617# Returns 0 if a value has been provided, otherwise 1. 618# 619parse_option_get() { 620 local ret=0 621 622 case "$1" in 623 act) GET_ACT_FREQ=1;; 624 enf) GET_ENF_FREQ=1;; 625 cap) GET_CAP_FREQ=1;; 626 all) GET_ACT_FREQ=1; GET_ENF_FREQ=1; GET_CAP_FREQ=1;; 627 -*|"") 628 # No value provided, using default. 629 GET_ACT_FREQ=1 630 ret=1 631 ;; 632 *) 633 print_usage 634 exit 1 635 ;; 636 esac 637 638 return ${ret} 639} 640 641# 642# Validate user input for '-s, --set' option. 643# arg1: input value to be validated 644# arg2: optional flag indicating input is restricted to % 645# 646validate_option_set() { 647 case "$1" in 648 +|-|[0-9]%|[0-9][0-9]%) 649 return 0 650 ;; 651 *[!0-9]*|"") 652 print_usage 653 exit 1 654 ;; 655 esac 656 657 [ -z "$2" ] || { print_usage; exit 1; } 658} 659 660# 661# Parse script arguments. 662# 663[ $# -eq 0 ] && { print_usage; exit 1; } 664 665while [ $# -gt 0 ]; do 666 case "$1" in 667 -g|--get) 668 parse_option_get "$2" && shift 669 ;; 670 671 -s|--set) 672 shift 673 case "$1" in 674 min=*) 675 SET_MIN_FREQ=${1#min=} 676 validate_option_set "${SET_MIN_FREQ}" 677 ;; 678 max=*) 679 SET_MAX_FREQ=${1#max=} 680 validate_option_set "${SET_MAX_FREQ}" 681 ;; 682 *) 683 SET_MIN_FREQ=$1 684 validate_option_set "${SET_MIN_FREQ}" 685 SET_MAX_FREQ=${SET_MIN_FREQ} 686 ;; 687 esac 688 ;; 689 690 -r|--reset) 691 RESET_FREQ=1 692 SET_MIN_FREQ="-" 693 SET_MAX_FREQ="+" 694 ;; 695 696 -m|--monitor) 697 MONITOR_FREQ=act 698 parse_option_get "$2" && MONITOR_FREQ=$2 && shift 699 ;; 700 701 -d|--detect-thrott) 702 DETECT_THROTT=start 703 case "$2" in 704 start|stop|status) 705 DETECT_THROTT=$2 706 shift 707 ;; 708 esac 709 ;; 710 711 --cpu-set-max) 712 shift 713 CPU_SET_MAX_FREQ=$1 714 validate_option_set "${CPU_SET_MAX_FREQ}" restricted 715 ;; 716 717 --dry-run) 718 DRY_RUN=1 719 ;; 720 721 -h|--help) 722 print_usage 723 exit 0 724 ;; 725 726 *) 727 print_usage 728 exit 1 729 ;; 730 esac 731 732 shift 733done 734 735# 736# Main 737# 738RET=0 739 740identify_intel_gpu || { 741 log INFO "No Intel GPU detected" 742 exit 0 743} 744 745[ -n "${SET_MIN_FREQ}${SET_MAX_FREQ}" ] && { set_freq || RET=$?; } 746print_freq_info 747 748[ -n "${DETECT_THROTT}" ] && detect_throttling ${DETECT_THROTT} 749 750[ -n "${CPU_SET_MAX_FREQ}" ] && { set_cpu_freq_max || RET=$?; } 751 752[ -n "${MONITOR_FREQ}" ] && { 753 log INFO "Entering frequency monitoring mode" 754 sleep 2 755 exec watch -d -n 1 "$0" -g "${MONITOR_FREQ}" 756} 757 758exit ${RET} 759