• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/sh
2#
3# This is an utility script to manage Intel GPU frequencies.
4# It can be used for debugging performance problems or trying to obtain a stable
5# frequency while benchmarking.
6#
7# Note the Intel i915 GPU driver allows to change the minimum, maximum and boost
8# frequencies in steps of 50 MHz via:
9#
10# /sys/class/drm/card<n>/<freq_info>
11#
12# Where <n> is the DRM card index and <freq_info> one of the following:
13#
14# - gt_max_freq_mhz (enforced maximum freq)
15# - gt_min_freq_mhz (enforced minimum freq)
16# - gt_boost_freq_mhz (enforced boost freq)
17#
18# The hardware capabilities can be accessed via:
19#
20# - gt_RP0_freq_mhz (supported maximum freq)
21# - gt_RPn_freq_mhz (supported minimum freq)
22# - gt_RP1_freq_mhz (most efficient freq)
23#
24# The current frequency can be read from:
25# - gt_act_freq_mhz (the actual GPU freq)
26# - gt_cur_freq_mhz (the last requested freq)
27#
28# Also note that in addition to GPU management, the script offers the
29# possibility to adjust CPU operating frequencies. However, this is currently
30# limited to just setting the maximum scaling frequency as percentage of the
31# maximum frequency allowed by the hardware.
32#
33# Copyright (C) 2022 Collabora Ltd.
34# Author: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
35#
36# SPDX-License-Identifier: MIT
37#
38
39#
40# Constants
41#
42
43# GPU
44DRM_FREQ_SYSFS_PATTERN="/sys/class/drm/card%d/gt_%s_freq_mhz"
45ENF_FREQ_INFO="max min boost"
46CAP_FREQ_INFO="RP0 RPn RP1"
47ACT_FREQ_INFO="act cur"
48THROTT_DETECT_SLEEP_SEC=2
49THROTT_DETECT_PID_FILE_PATH=/tmp/thrott-detect.pid
50
51# CPU
52CPU_SYSFS_PREFIX=/sys/devices/system/cpu
53CPU_PSTATE_SYSFS_PATTERN="${CPU_SYSFS_PREFIX}/intel_pstate/%s"
54CPU_FREQ_SYSFS_PATTERN="${CPU_SYSFS_PREFIX}/cpu%s/cpufreq/%s_freq"
55CAP_CPU_FREQ_INFO="cpuinfo_max cpuinfo_min"
56ENF_CPU_FREQ_INFO="scaling_max scaling_min"
57ACT_CPU_FREQ_INFO="scaling_cur"
58
59#
60# Global variables.
61#
62unset INTEL_DRM_CARD_INDEX
63unset GET_ACT_FREQ GET_ENF_FREQ GET_CAP_FREQ
64unset SET_MIN_FREQ SET_MAX_FREQ
65unset MONITOR_FREQ
66unset CPU_SET_MAX_FREQ
67unset DETECT_THROTT
68unset DRY_RUN
69
70#
71# Simple printf based stderr logger.
72#
73log() {
74    local msg_type=$1
75
76    shift
77    printf "%s: %s: " "${msg_type}" "${0##*/}" >&2
78    printf "$@" >&2
79    printf "\n" >&2
80}
81
82#
83# Helper to print sysfs path for the given card index and freq info.
84#
85# arg1: Frequency info sysfs name, one of *_FREQ_INFO constants above
86# arg2: Video card index, defaults to INTEL_DRM_CARD_INDEX
87#
88print_freq_sysfs_path() {
89    printf ${DRM_FREQ_SYSFS_PATTERN} "${2:-${INTEL_DRM_CARD_INDEX}}" "$1"
90}
91
92#
93# Helper to set INTEL_DRM_CARD_INDEX for the first identified Intel video card.
94#
95identify_intel_gpu() {
96    local i=0 vendor path
97
98    while [ ${i} -lt 16 ]; do
99        [ -c "/dev/dri/card$i" ] || {
100            i=$((i + 1))
101            continue
102        }
103
104        path=$(print_freq_sysfs_path "" ${i})
105        path=${path%/*}/device/vendor
106
107        [ -r "${path}" ] && read vendor < "${path}" && \
108            [ "${vendor}" = "0x8086" ] && INTEL_DRM_CARD_INDEX=$i && return 0
109
110        i=$((i + 1))
111    done
112
113    return 1
114}
115
116#
117# Read the specified freq info from sysfs.
118#
119# arg1: Flag (y/n) to also enable printing the freq info.
120# arg2...: Frequency info sysfs name(s), see *_FREQ_INFO constants above
121# return: Global variable(s) FREQ_${arg} containing the requested information
122#
123read_freq_info() {
124    local var val info path print=0 ret=0
125
126    [ "$1" = "y" ] && print=1
127    shift
128
129    while [ $# -gt 0 ]; do
130        info=$1
131        shift
132        var=FREQ_${info}
133        path=$(print_freq_sysfs_path "${info}")
134
135        [ -r ${path} ] && read ${var} < ${path} || {
136            log ERROR "Failed to read freq info from: %s" "${path}"
137            ret=1
138            continue
139        }
140
141        [ -n "${var}" ] || {
142            log ERROR "Got empty freq info from: %s" "${path}"
143            ret=1
144            continue
145        }
146
147        [ ${print} -eq 1 ] && {
148            eval val=\$${var}
149            printf "%6s: %4s MHz\n" "${info}" "${val}"
150        }
151    done
152
153    return ${ret}
154}
155
156#
157# Display requested info.
158#
159print_freq_info() {
160    local req_freq
161
162    [ -n "${GET_CAP_FREQ}" ] && {
163        printf "* Hardware capabilities\n"
164        read_freq_info y ${CAP_FREQ_INFO}
165        printf "\n"
166    }
167
168    [ -n "${GET_ENF_FREQ}" ] && {
169        printf "* Enforcements\n"
170        read_freq_info y ${ENF_FREQ_INFO}
171        printf "\n"
172    }
173
174    [ -n "${GET_ACT_FREQ}" ] && {
175        printf "* Actual\n"
176        read_freq_info y ${ACT_FREQ_INFO}
177        printf "\n"
178    }
179}
180
181#
182# Helper to print frequency value as requested by user via '-s, --set' option.
183# arg1: user requested freq value
184#
185compute_freq_set() {
186    local val
187
188    case "$1" in
189    +)
190        val=${FREQ_RP0}
191        ;;
192    -)
193        val=${FREQ_RPn}
194        ;;
195    *%)
196        val=$((${1%?} * ${FREQ_RP0} / 100))
197        # Adjust freq to comply with 50 MHz increments
198        val=$((val / 50 * 50))
199        ;;
200    *[!0-9]*)
201        log ERROR "Cannot set freq to invalid value: %s" "$1"
202        return 1
203        ;;
204    "")
205        log ERROR "Cannot set freq to unspecified value"
206        return 1
207        ;;
208    *)
209        # Adjust freq to comply with 50 MHz increments
210        val=$(($1 / 50 * 50))
211        ;;
212    esac
213
214    printf "%s" "${val}"
215}
216
217#
218# Helper for set_freq().
219#
220set_freq_max() {
221    log INFO "Setting GPU max freq to %s MHz" "${SET_MAX_FREQ}"
222
223    read_freq_info n min || return $?
224
225    [ ${SET_MAX_FREQ} -gt ${FREQ_RP0} ] && {
226        log ERROR "Cannot set GPU max freq (%s) to be greater than hw max freq (%s)" \
227            "${SET_MAX_FREQ}" "${FREQ_RP0}"
228        return 1
229    }
230
231    [ ${SET_MAX_FREQ} -lt ${FREQ_RPn} ] && {
232        log ERROR "Cannot set GPU max freq (%s) to be less than hw min freq (%s)" \
233            "${SET_MIN_FREQ}" "${FREQ_RPn}"
234        return 1
235    }
236
237    [ ${SET_MAX_FREQ} -lt ${FREQ_min} ] && {
238        log ERROR "Cannot set GPU max freq (%s) to be less than min freq (%s)" \
239            "${SET_MAX_FREQ}" "${FREQ_min}"
240        return 1
241    }
242
243    [ -z "${DRY_RUN}" ] || return 0
244
245    printf "%s" ${SET_MAX_FREQ} | tee $(print_freq_sysfs_path max) \
246        $(print_freq_sysfs_path boost) > /dev/null
247    [ $? -eq 0 ] || {
248        log ERROR "Failed to set GPU max frequency"
249        return 1
250    }
251}
252
253#
254# Helper for set_freq().
255#
256set_freq_min() {
257    log INFO "Setting GPU min freq to %s MHz" "${SET_MIN_FREQ}"
258
259    read_freq_info n max || return $?
260
261    [ ${SET_MIN_FREQ} -gt ${FREQ_max} ] && {
262        log ERROR "Cannot set GPU min freq (%s) to be greater than max freq (%s)" \
263            "${SET_MIN_FREQ}" "${FREQ_max}"
264        return 1
265    }
266
267    [ ${SET_MIN_FREQ} -lt ${FREQ_RPn} ] && {
268        log ERROR "Cannot set GPU min freq (%s) to be less than hw min freq (%s)" \
269            "${SET_MIN_FREQ}" "${FREQ_RPn}"
270        return 1
271    }
272
273    [ -z "${DRY_RUN}" ] || return 0
274
275    printf "%s" ${SET_MIN_FREQ} > $(print_freq_sysfs_path min)
276    [ $? -eq 0 ] || {
277        log ERROR "Failed to set GPU min frequency"
278        return 1
279    }
280}
281
282#
283# Set min or max or both GPU frequencies to the user indicated values.
284#
285set_freq() {
286    # Get hw max & min frequencies
287    read_freq_info n RP0 RPn || return $?
288
289    [ -z "${SET_MAX_FREQ}" ] || {
290        SET_MAX_FREQ=$(compute_freq_set "${SET_MAX_FREQ}")
291        [ -z "${SET_MAX_FREQ}" ] && return 1
292    }
293
294    [ -z "${SET_MIN_FREQ}" ] || {
295        SET_MIN_FREQ=$(compute_freq_set "${SET_MIN_FREQ}")
296        [ -z "${SET_MIN_FREQ}" ] && return 1
297    }
298
299    #
300    # Ensure correct operation order, to avoid setting min freq
301    # to a value which is larger than max freq.
302    #
303    # E.g.:
304    #   crt_min=crt_max=600; new_min=new_max=700
305    #   > operation order: max=700; min=700
306    #
307    #   crt_min=crt_max=600; new_min=new_max=500
308    #   > operation order: min=500; max=500
309    #
310    if [ -n "${SET_MAX_FREQ}" ] && [ -n "${SET_MIN_FREQ}" ]; then
311        [ ${SET_MAX_FREQ} -lt ${SET_MIN_FREQ} ] && {
312            log ERROR "Cannot set GPU max freq to be less than min freq"
313            return 1
314        }
315
316        read_freq_info n min || return $?
317
318        if [ ${SET_MAX_FREQ} -lt ${FREQ_min} ]; then
319            set_freq_min || return $?
320            set_freq_max
321        else
322            set_freq_max || return $?
323            set_freq_min
324        fi
325    elif [ -n "${SET_MAX_FREQ}" ]; then
326        set_freq_max
327    elif [ -n "${SET_MIN_FREQ}" ]; then
328        set_freq_min
329    else
330        log "Unexpected call to set_freq()"
331        return 1
332    fi
333}
334
335#
336# Helper for detect_throttling().
337#
338get_thrott_detect_pid() {
339    [ -e ${THROTT_DETECT_PID_FILE_PATH} ] || return 0
340
341    local pid
342    read pid < ${THROTT_DETECT_PID_FILE_PATH} || {
343        log ERROR "Failed to read pid from: %s" "${THROTT_DETECT_PID_FILE_PATH}"
344        return 1
345    }
346
347    local proc_path=/proc/${pid:-invalid}/cmdline
348    [ -r ${proc_path} ] && grep -qs "${0##*/}" ${proc_path} && {
349        printf "%s" "${pid}"
350        return 0
351    }
352
353    # Remove orphaned PID file
354    rm -rf ${THROTT_DETECT_PID_FILE_PATH}
355    return 1
356}
357
358#
359# Control detection and reporting of GPU throttling events.
360# arg1: start - run throttle detector in background
361#       stop - stop throttle detector process, if any
362#       status - verify if throttle detector is running
363#
364detect_throttling() {
365    local pid
366    pid=$(get_thrott_detect_pid)
367
368    case "$1" in
369    status)
370        printf "Throttling detector is "
371        [ -z "${pid}" ] && printf "not running\n" && return 0
372        printf "running (pid=%s)\n" ${pid}
373        ;;
374
375    stop)
376        [ -z "${pid}" ] && return 0
377
378        log INFO "Stopping throttling detector (pid=%s)" "${pid}"
379        kill ${pid}; sleep 1; kill -0 ${pid} 2>/dev/null && kill -9 ${pid}
380        rm -rf ${THROTT_DETECT_PID_FILE_PATH}
381        ;;
382
383    start)
384        [ -n "${pid}" ] && {
385            log WARN "Throttling detector is already running (pid=%s)" ${pid}
386            return 0
387        }
388
389        (
390            read_freq_info n RPn || exit $?
391
392            while true; do
393                sleep ${THROTT_DETECT_SLEEP_SEC}
394                read_freq_info n act min cur || exit $?
395
396                #
397                # The throttling seems to occur when act freq goes below min.
398                # However, it's necessary to exclude the idle states, where
399                # act freq normally reaches RPn and cur goes below min.
400                #
401                [ ${FREQ_act} -lt ${FREQ_min} ] && \
402                [ ${FREQ_act} -gt ${FREQ_RPn} ] && \
403                [ ${FREQ_cur} -ge ${FREQ_min} ] && \
404                    printf "GPU throttling detected: act=%s min=%s cur=%s RPn=%s\n" \
405                    ${FREQ_act} ${FREQ_min} ${FREQ_cur} ${FREQ_RPn}
406            done
407        ) &
408
409        pid=$!
410        log INFO "Started GPU throttling detector (pid=%s)" ${pid}
411
412        printf "%s\n" ${pid} > ${THROTT_DETECT_PID_FILE_PATH} || \
413            log WARN "Failed to write throttle detector PID file"
414        ;;
415    esac
416}
417
418#
419# Retrieve the list of online CPUs.
420#
421get_online_cpus() {
422    local path cpu_index
423
424    printf "0"
425    for path in $(grep 1 ${CPU_SYSFS_PREFIX}/cpu*/online); do
426        cpu_index=${path##*/cpu}
427        printf " %s" ${cpu_index%%/*}
428    done
429}
430
431#
432# Helper to print sysfs path for the given CPU index and freq info.
433#
434# arg1: Frequency info sysfs name, one of *_CPU_FREQ_INFO constants above
435# arg2: CPU index
436#
437print_cpu_freq_sysfs_path() {
438    printf ${CPU_FREQ_SYSFS_PATTERN} "$2" "$1"
439}
440
441#
442# Read the specified CPU freq info from sysfs.
443#
444# arg1: CPU index
445# arg2: Flag (y/n) to also enable printing the freq info.
446# arg3...: Frequency info sysfs name(s), see *_CPU_FREQ_INFO constants above
447# return: Global variable(s) CPU_FREQ_${arg} containing the requested information
448#
449read_cpu_freq_info() {
450    local var val info path cpu_index print=0 ret=0
451
452    cpu_index=$1
453    [ "$2" = "y" ] && print=1
454    shift 2
455
456    while [ $# -gt 0 ]; do
457        info=$1
458        shift
459        var=CPU_FREQ_${info}
460        path=$(print_cpu_freq_sysfs_path "${info}" ${cpu_index})
461
462        [ -r ${path} ] && read ${var} < ${path} || {
463            log ERROR "Failed to read CPU freq info from: %s" "${path}"
464            ret=1
465            continue
466        }
467
468        [ -n "${var}" ] || {
469            log ERROR "Got empty CPU freq info from: %s" "${path}"
470            ret=1
471            continue
472        }
473
474        [ ${print} -eq 1 ] && {
475            eval val=\$${var}
476            printf "%6s: %4s Hz\n" "${info}" "${val}"
477        }
478    done
479
480    return ${ret}
481}
482
483#
484# Helper to print freq. value as requested by user via '--cpu-set-max' option.
485# arg1: user requested freq value
486#
487compute_cpu_freq_set() {
488    local val
489
490    case "$1" in
491    +)
492        val=${CPU_FREQ_cpuinfo_max}
493        ;;
494    -)
495        val=${CPU_FREQ_cpuinfo_min}
496        ;;
497    *%)
498        val=$((${1%?} * ${CPU_FREQ_cpuinfo_max} / 100))
499        ;;
500    *[!0-9]*)
501        log ERROR "Cannot set CPU freq to invalid value: %s" "$1"
502        return 1
503        ;;
504    "")
505        log ERROR "Cannot set CPU freq to unspecified value"
506        return 1
507        ;;
508    *)
509        log ERROR "Cannot set CPU freq to custom value; use +, -, or % instead"
510        return 1
511        ;;
512    esac
513
514    printf "%s" "${val}"
515}
516
517#
518# Adjust CPU max scaling frequency.
519#
520set_cpu_freq_max() {
521    local target_freq res=0
522    case "${CPU_SET_MAX_FREQ}" in
523    +)
524        target_freq=100
525        ;;
526    -)
527        target_freq=1
528        ;;
529    *%)
530        target_freq=${CPU_SET_MAX_FREQ%?}
531        ;;
532    *)
533        log ERROR "Invalid CPU freq"
534        return 1
535        ;;
536    esac
537
538    local pstate_info=$(printf "${CPU_PSTATE_SYSFS_PATTERN}" max_perf_pct)
539    [ -e "${pstate_info}" ] && {
540        log INFO "Setting intel_pstate max perf to %s" "${target_freq}%"
541        printf "%s" "${target_freq}" > "${pstate_info}"
542        [ $? -eq 0 ] || {
543            log ERROR "Failed to set intel_pstate max perf"
544            res=1
545        }
546    }
547
548    local cpu_index
549    for cpu_index in $(get_online_cpus); do
550        read_cpu_freq_info ${cpu_index} n ${CAP_CPU_FREQ_INFO} || { res=$?; continue; }
551
552        target_freq=$(compute_cpu_freq_set "${CPU_SET_MAX_FREQ}")
553        [ -z "${target_freq}" ] && { res=$?; continue; }
554
555        log INFO "Setting CPU%s max scaling freq to %s Hz" ${cpu_index} "${target_freq}"
556        [ -n "${DRY_RUN}" ] && continue
557
558        printf "%s" ${target_freq} > $(print_cpu_freq_sysfs_path scaling_max ${cpu_index})
559        [ $? -eq 0 ] || {
560            res=1
561            log ERROR "Failed to set CPU%s max scaling frequency" ${cpu_index}
562        }
563    done
564
565    return ${res}
566}
567
568#
569# Show help message.
570#
571print_usage() {
572    cat <<EOF
573Usage: ${0##*/} [OPTION]...
574
575A script to manage Intel GPU frequencies. Can be used for debugging performance
576problems or trying to obtain a stable frequency while benchmarking.
577
578Note Intel GPUs only accept specific frequencies, usually multiples of 50 MHz.
579
580Options:
581  -g, --get [act|enf|cap|all]
582                        Get frequency information: active (default), enforced,
583                        hardware capabilities or all of them.
584
585  -s, --set [{min|max}=]{FREQUENCY[%]|+|-}
586                        Set min or max frequency to the given value (MHz).
587                        Append '%' to interpret FREQUENCY as % of hw max.
588                        Use '+' or '-' to set frequency to hardware max or min.
589                        Omit min/max prefix to set both frequencies.
590
591  -r, --reset           Reset frequencies to hardware defaults.
592
593  -m, --monitor [act|enf|cap|all]
594                        Monitor the indicated frequencies via 'watch' utility.
595                        See '-g, --get' option for more details.
596
597  -d|--detect-thrott [start|stop|status]
598                        Start (default operation) the throttling detector
599                        as a background process. Use 'stop' or 'status' to
600                        terminate the detector process or verify its status.
601
602  --cpu-set-max [FREQUENCY%|+|-}
603                        Set CPU max scaling frequency as % of hw max.
604                        Use '+' or '-' to set frequency to hardware max or min.
605
606  -r, --reset           Reset frequencies to hardware defaults.
607
608  --dry-run             See what the script will do without applying any
609                        frequency changes.
610
611  -h, --help            Display this help text and exit.
612EOF
613}
614
615#
616# Parse user input for '-g, --get' option.
617# Returns 0 if a value has been provided, otherwise 1.
618#
619parse_option_get() {
620    local ret=0
621
622    case "$1" in
623    act) GET_ACT_FREQ=1;;
624    enf) GET_ENF_FREQ=1;;
625    cap) GET_CAP_FREQ=1;;
626    all) GET_ACT_FREQ=1; GET_ENF_FREQ=1; GET_CAP_FREQ=1;;
627    -*|"")
628        # No value provided, using default.
629        GET_ACT_FREQ=1
630        ret=1
631        ;;
632    *)
633        print_usage
634        exit 1
635        ;;
636    esac
637
638    return ${ret}
639}
640
641#
642# Validate user input for '-s, --set' option.
643# arg1: input value to be validated
644# arg2: optional flag indicating input is restricted to %
645#
646validate_option_set() {
647    case "$1" in
648    +|-|[0-9]%|[0-9][0-9]%)
649        return 0
650        ;;
651    *[!0-9]*|"")
652        print_usage
653        exit 1
654        ;;
655    esac
656
657    [ -z "$2" ] || { print_usage; exit 1; }
658}
659
660#
661# Parse script arguments.
662#
663[ $# -eq 0 ] && { print_usage; exit 1; }
664
665while [ $# -gt 0 ]; do
666    case "$1" in
667    -g|--get)
668        parse_option_get "$2" && shift
669        ;;
670
671    -s|--set)
672        shift
673        case "$1" in
674        min=*)
675            SET_MIN_FREQ=${1#min=}
676            validate_option_set "${SET_MIN_FREQ}"
677            ;;
678        max=*)
679            SET_MAX_FREQ=${1#max=}
680            validate_option_set "${SET_MAX_FREQ}"
681            ;;
682        *)
683            SET_MIN_FREQ=$1
684            validate_option_set "${SET_MIN_FREQ}"
685            SET_MAX_FREQ=${SET_MIN_FREQ}
686            ;;
687        esac
688        ;;
689
690    -r|--reset)
691        RESET_FREQ=1
692        SET_MIN_FREQ="-"
693        SET_MAX_FREQ="+"
694        ;;
695
696    -m|--monitor)
697        MONITOR_FREQ=act
698        parse_option_get "$2" && MONITOR_FREQ=$2 && shift
699        ;;
700
701    -d|--detect-thrott)
702        DETECT_THROTT=start
703        case "$2" in
704        start|stop|status)
705            DETECT_THROTT=$2
706            shift
707            ;;
708        esac
709        ;;
710
711    --cpu-set-max)
712        shift
713        CPU_SET_MAX_FREQ=$1
714        validate_option_set "${CPU_SET_MAX_FREQ}" restricted
715        ;;
716
717    --dry-run)
718        DRY_RUN=1
719        ;;
720
721    -h|--help)
722        print_usage
723        exit 0
724        ;;
725
726    *)
727        print_usage
728        exit 1
729        ;;
730    esac
731
732    shift
733done
734
735#
736# Main
737#
738RET=0
739
740identify_intel_gpu || {
741    log INFO "No Intel GPU detected"
742    exit 0
743}
744
745[ -n "${SET_MIN_FREQ}${SET_MAX_FREQ}" ] && { set_freq || RET=$?; }
746print_freq_info
747
748[ -n "${DETECT_THROTT}" ] && detect_throttling ${DETECT_THROTT}
749
750[ -n "${CPU_SET_MAX_FREQ}" ] && { set_cpu_freq_max || RET=$?; }
751
752[ -n "${MONITOR_FREQ}" ] && {
753    log INFO "Entering frequency monitoring mode"
754    sleep 2
755    exec watch -d -n 1 "$0" -g "${MONITOR_FREQ}"
756}
757
758exit ${RET}
759