1#!/usr/bin/env python 2# Copyright 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# A Python library to read and store procfs (/proc) information on Linux. 7# 8# Each information storage class in this file stores original data as original 9# as reasonablly possible. Translation is done when requested. It is to make it 10# always possible to probe the original data. 11 12 13import collections 14import logging 15import os 16import re 17import struct 18import sys 19 20 21class _NullHandler(logging.Handler): 22 def emit(self, record): 23 pass 24 25 26_LOGGER = logging.getLogger('procfs') 27_LOGGER.addHandler(_NullHandler()) 28 29 30class ProcStat(object): 31 """Reads and stores information in /proc/pid/stat.""" 32 _PATTERN = re.compile(r'^' 33 '(?P<PID>-?[0-9]+) ' 34 '\((?P<COMM>.+)\) ' 35 '(?P<STATE>[RSDZTW]) ' 36 '(?P<PPID>-?[0-9]+) ' 37 '(?P<PGRP>-?[0-9]+) ' 38 '(?P<SESSION>-?[0-9]+) ' 39 '(?P<TTY_NR>-?[0-9]+) ' 40 '(?P<TPGID>-?[0-9]+) ' 41 '(?P<FLAGS>[0-9]+) ' 42 '(?P<MINFIT>[0-9]+) ' 43 '(?P<CMINFIT>[0-9]+) ' 44 '(?P<MAJFIT>[0-9]+) ' 45 '(?P<CMAJFIT>[0-9]+) ' 46 '(?P<UTIME>[0-9]+) ' 47 '(?P<STIME>[0-9]+) ' 48 '(?P<CUTIME>[0-9]+) ' 49 '(?P<CSTIME>[0-9]+) ' 50 '(?P<PRIORITY>[0-9]+) ' 51 '(?P<NICE>[0-9]+) ' 52 '(?P<NUM_THREADS>[0-9]+) ' 53 '(?P<ITREALVALUE>[0-9]+) ' 54 '(?P<STARTTIME>[0-9]+) ' 55 '(?P<VSIZE>[0-9]+) ' 56 '(?P<RSS>[0-9]+) ' 57 '(?P<RSSLIM>[0-9]+) ' 58 '(?P<STARTCODE>[0-9]+) ' 59 '(?P<ENDCODE>[0-9]+) ' 60 '(?P<STARTSTACK>[0-9]+) ' 61 '(?P<KSTKESP>[0-9]+) ' 62 '(?P<KSTKEIP>[0-9]+) ' 63 '(?P<SIGNAL>[0-9]+) ' 64 '(?P<BLOCKED>[0-9]+) ' 65 '(?P<SIGIGNORE>[0-9]+) ' 66 '(?P<SIGCATCH>[0-9]+) ' 67 '(?P<WCHAN>[0-9]+) ' 68 '(?P<NSWAP>[0-9]+) ' 69 '(?P<CNSWAP>[0-9]+) ' 70 '(?P<EXIT_SIGNAL>[0-9]+) ' 71 '(?P<PROCESSOR>[0-9]+) ' 72 '(?P<RT_PRIORITY>[0-9]+) ' 73 '(?P<POLICY>[0-9]+) ' 74 '(?P<DELAYACCT_BLKIO_TICKS>[0-9]+) ' 75 '(?P<GUEST_TIME>[0-9]+) ' 76 '(?P<CGUEST_TIME>[0-9]+)', re.IGNORECASE) 77 78 def __init__(self, raw, pid, vsize, rss): 79 self._raw = raw 80 self._pid = pid 81 self._vsize = vsize 82 self._rss = rss 83 84 @staticmethod 85 def load_file(stat_f): 86 raw = stat_f.readlines() 87 stat = ProcStat._PATTERN.match(raw[0]) 88 return ProcStat(raw, 89 stat.groupdict().get('PID'), 90 stat.groupdict().get('VSIZE'), 91 stat.groupdict().get('RSS')) 92 93 @staticmethod 94 def load(pid): 95 with open(os.path.join('/proc', str(pid), 'stat'), 'r') as stat_f: 96 return ProcStat.load_file(stat_f) 97 98 @property 99 def raw(self): 100 return self._raw 101 102 @property 103 def pid(self): 104 return int(self._pid) 105 106 @property 107 def vsize(self): 108 return int(self._vsize) 109 110 @property 111 def rss(self): 112 return int(self._rss) 113 114 115class ProcStatm(object): 116 """Reads and stores information in /proc/pid/statm.""" 117 _PATTERN = re.compile(r'^' 118 '(?P<SIZE>[0-9]+) ' 119 '(?P<RESIDENT>[0-9]+) ' 120 '(?P<SHARE>[0-9]+) ' 121 '(?P<TEXT>[0-9]+) ' 122 '(?P<LIB>[0-9]+) ' 123 '(?P<DATA>[0-9]+) ' 124 '(?P<DT>[0-9]+)', re.IGNORECASE) 125 126 def __init__(self, raw, size, resident, share, text, lib, data, dt): 127 self._raw = raw 128 self._size = size 129 self._resident = resident 130 self._share = share 131 self._text = text 132 self._lib = lib 133 self._data = data 134 self._dt = dt 135 136 @staticmethod 137 def load_file(statm_f): 138 raw = statm_f.readlines() 139 statm = ProcStatm._PATTERN.match(raw[0]) 140 return ProcStatm(raw, 141 statm.groupdict().get('SIZE'), 142 statm.groupdict().get('RESIDENT'), 143 statm.groupdict().get('SHARE'), 144 statm.groupdict().get('TEXT'), 145 statm.groupdict().get('LIB'), 146 statm.groupdict().get('DATA'), 147 statm.groupdict().get('DT')) 148 149 @staticmethod 150 def load(pid): 151 with open(os.path.join('/proc', str(pid), 'statm'), 'r') as statm_f: 152 return ProcStatm.load_file(statm_f) 153 154 @property 155 def raw(self): 156 return self._raw 157 158 @property 159 def size(self): 160 return int(self._size) 161 162 @property 163 def resident(self): 164 return int(self._resident) 165 166 @property 167 def share(self): 168 return int(self._share) 169 170 @property 171 def text(self): 172 return int(self._text) 173 174 @property 175 def lib(self): 176 return int(self._lib) 177 178 @property 179 def data(self): 180 return int(self._data) 181 182 @property 183 def dt(self): 184 return int(self._dt) 185 186 187class ProcStatus(object): 188 """Reads and stores information in /proc/pid/status.""" 189 _PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)') 190 191 def __init__(self, raw, dct): 192 self._raw = raw 193 self._pid = dct.get('Pid') 194 self._name = dct.get('Name') 195 self._vm_peak = dct.get('VmPeak') 196 self._vm_size = dct.get('VmSize') 197 self._vm_lck = dct.get('VmLck') 198 self._vm_pin = dct.get('VmPin') 199 self._vm_hwm = dct.get('VmHWM') 200 self._vm_rss = dct.get('VmRSS') 201 self._vm_data = dct.get('VmData') 202 self._vm_stack = dct.get('VmStk') 203 self._vm_exe = dct.get('VmExe') 204 self._vm_lib = dct.get('VmLib') 205 self._vm_pte = dct.get('VmPTE') 206 self._vm_swap = dct.get('VmSwap') 207 208 @staticmethod 209 def load_file(status_f): 210 raw = status_f.readlines() 211 dct = {} 212 for line in raw: 213 status_match = ProcStatus._PATTERN.match(line) 214 if status_match: 215 match_dict = status_match.groupdict() 216 dct[match_dict['NAME']] = match_dict['VALUE'] 217 else: 218 raise SyntaxError('Unknown /proc/pid/status format.') 219 return ProcStatus(raw, dct) 220 221 @staticmethod 222 def load(pid): 223 with open(os.path.join('/proc', str(pid), 'status'), 'r') as status_f: 224 return ProcStatus.load_file(status_f) 225 226 @property 227 def raw(self): 228 return self._raw 229 230 @property 231 def pid(self): 232 return int(self._pid) 233 234 @property 235 def vm_peak(self): 236 """Returns a high-water (peak) virtual memory size in kilo-bytes.""" 237 if self._vm_peak.endswith('kB'): 238 return int(self._vm_peak.split()[0]) 239 raise ValueError('VmPeak is not in kB.') 240 241 @property 242 def vm_size(self): 243 """Returns a virtual memory size in kilo-bytes.""" 244 if self._vm_size.endswith('kB'): 245 return int(self._vm_size.split()[0]) 246 raise ValueError('VmSize is not in kB.') 247 248 @property 249 def vm_hwm(self): 250 """Returns a high-water (peak) resident set size (RSS) in kilo-bytes.""" 251 if self._vm_hwm.endswith('kB'): 252 return int(self._vm_hwm.split()[0]) 253 raise ValueError('VmHWM is not in kB.') 254 255 @property 256 def vm_rss(self): 257 """Returns a resident set size (RSS) in kilo-bytes.""" 258 if self._vm_rss.endswith('kB'): 259 return int(self._vm_rss.split()[0]) 260 raise ValueError('VmRSS is not in kB.') 261 262 263class ProcMapsEntry(object): 264 """A class representing one line in /proc/pid/maps.""" 265 266 def __init__( 267 self, begin, end, readable, writable, executable, private, offset, 268 major, minor, inode, name): 269 self.begin = begin 270 self.end = end 271 self.readable = readable 272 self.writable = writable 273 self.executable = executable 274 self.private = private 275 self.offset = offset 276 self.major = major 277 self.minor = minor 278 self.inode = inode 279 self.name = name 280 281 def as_dict(self): 282 return { 283 'begin': self.begin, 284 'end': self.end, 285 'readable': self.readable, 286 'writable': self.writable, 287 'executable': self.executable, 288 'private': self.private, 289 'offset': self.offset, 290 'major': self.major, 291 'minor': self.minor, 292 'inode': self.inode, 293 'name': self.name, 294 } 295 296 297class ProcMaps(object): 298 """Reads and stores information in /proc/pid/maps.""" 299 300 MAPS_PATTERN = re.compile( 301 r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+' 302 r'(\d+)\s*(.*)$', re.IGNORECASE) 303 304 def __init__(self): 305 self._sorted_indexes = [] 306 self._dictionary = {} 307 self._sorted = True 308 309 def iter(self, condition): 310 if not self._sorted: 311 self._sorted_indexes.sort() 312 self._sorted = True 313 for index in self._sorted_indexes: 314 if not condition or condition(self._dictionary[index]): 315 yield self._dictionary[index] 316 317 def __iter__(self): 318 if not self._sorted: 319 self._sorted_indexes.sort() 320 self._sorted = True 321 for index in self._sorted_indexes: 322 yield self._dictionary[index] 323 324 @staticmethod 325 def load_file(maps_f): 326 table = ProcMaps() 327 for line in maps_f: 328 table.append_line(line) 329 return table 330 331 @staticmethod 332 def load(pid): 333 with open(os.path.join('/proc', str(pid), 'maps'), 'r') as maps_f: 334 return ProcMaps.load_file(maps_f) 335 336 def append_line(self, line): 337 entry = self.parse_line(line) 338 if entry: 339 self._append_entry(entry) 340 return entry 341 342 @staticmethod 343 def parse_line(line): 344 matched = ProcMaps.MAPS_PATTERN.match(line) 345 if matched: 346 return ProcMapsEntry( # pylint: disable=W0212 347 int(matched.group(1), 16), # begin 348 int(matched.group(2), 16), # end 349 matched.group(3), # readable 350 matched.group(4), # writable 351 matched.group(5), # executable 352 matched.group(6), # private 353 int(matched.group(7), 16), # offset 354 matched.group(8), # major 355 matched.group(9), # minor 356 int(matched.group(10), 10), # inode 357 matched.group(11) # name 358 ) 359 else: 360 return None 361 362 @staticmethod 363 def constants(entry): 364 return (entry.writable == '-' and entry.executable == '-' and re.match( 365 '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', 366 entry.name)) 367 368 @staticmethod 369 def executable(entry): 370 return (entry.executable == 'x' and re.match( 371 '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', 372 entry.name)) 373 374 @staticmethod 375 def executable_and_constants(entry): 376 return (((entry.writable == '-' and entry.executable == '-') or 377 entry.executable == 'x') and re.match( 378 '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', 379 entry.name)) 380 381 def _append_entry(self, entry): 382 if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin: 383 self._sorted = False 384 self._sorted_indexes.append(entry.begin) 385 self._dictionary[entry.begin] = entry 386 387 388class ProcSmaps(object): 389 """Reads and stores information in /proc/pid/smaps.""" 390 _SMAPS_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)') 391 392 class VMA(object): 393 def __init__(self): 394 self._size = 0 395 self._rss = 0 396 self._pss = 0 397 398 def append(self, name, value): 399 dct = { 400 'Size': '_size', 401 'Rss': '_rss', 402 'Pss': '_pss', 403 'Referenced': '_referenced', 404 'Private_Clean': '_private_clean', 405 'Shared_Clean': '_shared_clean', 406 'KernelPageSize': '_kernel_page_size', 407 'MMUPageSize': '_mmu_page_size', 408 } 409 if name in dct: 410 self.__setattr__(dct[name], value) 411 412 @property 413 def size(self): 414 if self._size.endswith('kB'): 415 return int(self._size.split()[0]) 416 return int(self._size) 417 418 @property 419 def rss(self): 420 if self._rss.endswith('kB'): 421 return int(self._rss.split()[0]) 422 return int(self._rss) 423 424 @property 425 def pss(self): 426 if self._pss.endswith('kB'): 427 return int(self._pss.split()[0]) 428 return int(self._pss) 429 430 def __init__(self, raw, total_dct, maps, vma_internals): 431 self._raw = raw 432 self._size = total_dct['Size'] 433 self._rss = total_dct['Rss'] 434 self._pss = total_dct['Pss'] 435 self._referenced = total_dct['Referenced'] 436 self._shared_clean = total_dct['Shared_Clean'] 437 self._private_clean = total_dct['Private_Clean'] 438 self._kernel_page_size = total_dct['KernelPageSize'] 439 self._mmu_page_size = total_dct['MMUPageSize'] 440 self._maps = maps 441 self._vma_internals = vma_internals 442 443 @staticmethod 444 def load(pid): 445 with open(os.path.join('/proc', str(pid), 'smaps'), 'r') as smaps_f: 446 raw = smaps_f.readlines() 447 448 vma = None 449 vma_internals = collections.OrderedDict() 450 total_dct = collections.defaultdict(int) 451 maps = ProcMaps() 452 for line in raw: 453 maps_match = ProcMaps.MAPS_PATTERN.match(line) 454 if maps_match: 455 vma = maps.append_line(line.strip()) 456 vma_internals[vma] = ProcSmaps.VMA() 457 else: 458 smaps_match = ProcSmaps._SMAPS_PATTERN.match(line) 459 if smaps_match: 460 match_dict = smaps_match.groupdict() 461 vma_internals[vma].append(match_dict['NAME'], match_dict['VALUE']) 462 total_dct[match_dict['NAME']] += int(match_dict['VALUE'].split()[0]) 463 464 return ProcSmaps(raw, total_dct, maps, vma_internals) 465 466 @property 467 def size(self): 468 return self._size 469 470 @property 471 def rss(self): 472 return self._rss 473 474 @property 475 def referenced(self): 476 return self._referenced 477 478 @property 479 def pss(self): 480 return self._pss 481 482 @property 483 def private_clean(self): 484 return self._private_clean 485 486 @property 487 def shared_clean(self): 488 return self._shared_clean 489 490 @property 491 def kernel_page_size(self): 492 return self._kernel_page_size 493 494 @property 495 def mmu_page_size(self): 496 return self._mmu_page_size 497 498 @property 499 def vma_internals(self): 500 return self._vma_internals 501 502 503class ProcPagemap(object): 504 """Reads and stores partial information in /proc/pid/pagemap. 505 506 It picks up virtual addresses to read based on ProcMaps (/proc/pid/maps). 507 See https://www.kernel.org/doc/Documentation/vm/pagemap.txt for details. 508 """ 509 _BYTES_PER_PAGEMAP_VALUE = 8 510 _BYTES_PER_OS_PAGE = 4096 511 _VIRTUAL_TO_PAGEMAP_OFFSET = _BYTES_PER_OS_PAGE / _BYTES_PER_PAGEMAP_VALUE 512 513 _MASK_PRESENT = 1 << 63 514 _MASK_SWAPPED = 1 << 62 515 _MASK_FILEPAGE_OR_SHAREDANON = 1 << 61 516 _MASK_SOFTDIRTY = 1 << 55 517 _MASK_PFN = (1 << 55) - 1 518 519 class VMA(object): 520 def __init__(self, vsize, present, swapped, pageframes): 521 self._vsize = vsize 522 self._present = present 523 self._swapped = swapped 524 self._pageframes = pageframes 525 526 @property 527 def vsize(self): 528 return int(self._vsize) 529 530 @property 531 def present(self): 532 return int(self._present) 533 534 @property 535 def swapped(self): 536 return int(self._swapped) 537 538 @property 539 def pageframes(self): 540 return self._pageframes 541 542 def __init__(self, vsize, present, swapped, vma_internals, in_process_dup): 543 self._vsize = vsize 544 self._present = present 545 self._swapped = swapped 546 self._vma_internals = vma_internals 547 self._in_process_dup = in_process_dup 548 549 @staticmethod 550 def load(pid, maps): 551 total_present = 0 552 total_swapped = 0 553 total_vsize = 0 554 in_process_dup = 0 555 vma_internals = collections.OrderedDict() 556 process_pageframe_set = set() 557 558 pagemap_fd = os.open( 559 os.path.join('/proc', str(pid), 'pagemap'), os.O_RDONLY) 560 for vma in maps: 561 present = 0 562 swapped = 0 563 vsize = 0 564 pageframes = collections.defaultdict(int) 565 begin_offset = ProcPagemap._offset(vma.begin) 566 chunk_size = ProcPagemap._offset(vma.end) - begin_offset 567 os.lseek(pagemap_fd, begin_offset, os.SEEK_SET) 568 buf = os.read(pagemap_fd, chunk_size) 569 if len(buf) < chunk_size: 570 _LOGGER.warn('Failed to read pagemap at 0x%x in %d.' % (vma.begin, pid)) 571 pagemap_values = struct.unpack( 572 '=%dQ' % (len(buf) / ProcPagemap._BYTES_PER_PAGEMAP_VALUE), buf) 573 for pagemap_value in pagemap_values: 574 vsize += ProcPagemap._BYTES_PER_OS_PAGE 575 if pagemap_value & ProcPagemap._MASK_PRESENT: 576 if (pagemap_value & ProcPagemap._MASK_PFN) in process_pageframe_set: 577 in_process_dup += ProcPagemap._BYTES_PER_OS_PAGE 578 else: 579 process_pageframe_set.add(pagemap_value & ProcPagemap._MASK_PFN) 580 if (pagemap_value & ProcPagemap._MASK_PFN) not in pageframes: 581 present += ProcPagemap._BYTES_PER_OS_PAGE 582 pageframes[pagemap_value & ProcPagemap._MASK_PFN] += 1 583 if pagemap_value & ProcPagemap._MASK_SWAPPED: 584 swapped += ProcPagemap._BYTES_PER_OS_PAGE 585 vma_internals[vma] = ProcPagemap.VMA(vsize, present, swapped, pageframes) 586 total_present += present 587 total_swapped += swapped 588 total_vsize += vsize 589 os.close(pagemap_fd) 590 591 return ProcPagemap(total_vsize, total_present, total_swapped, 592 vma_internals, in_process_dup) 593 594 @staticmethod 595 def _offset(virtual_address): 596 return virtual_address / ProcPagemap._VIRTUAL_TO_PAGEMAP_OFFSET 597 598 @property 599 def vsize(self): 600 return int(self._vsize) 601 602 @property 603 def present(self): 604 return int(self._present) 605 606 @property 607 def swapped(self): 608 return int(self._swapped) 609 610 @property 611 def vma_internals(self): 612 return self._vma_internals 613 614 615class _ProcessMemory(object): 616 """Aggregates process memory information from /proc for manual testing.""" 617 def __init__(self, pid): 618 self._pid = pid 619 self._maps = None 620 self._pagemap = None 621 self._stat = None 622 self._status = None 623 self._statm = None 624 self._smaps = [] 625 626 def _read(self, proc_file): 627 lines = [] 628 with open(os.path.join('/proc', str(self._pid), proc_file), 'r') as proc_f: 629 lines = proc_f.readlines() 630 return lines 631 632 def read_all(self): 633 self.read_stat() 634 self.read_statm() 635 self.read_status() 636 self.read_smaps() 637 self.read_maps() 638 self.read_pagemap(self._maps) 639 640 def read_maps(self): 641 self._maps = ProcMaps.load(self._pid) 642 643 def read_pagemap(self, maps): 644 self._pagemap = ProcPagemap.load(self._pid, maps) 645 646 def read_smaps(self): 647 self._smaps = ProcSmaps.load(self._pid) 648 649 def read_stat(self): 650 self._stat = ProcStat.load(self._pid) 651 652 def read_statm(self): 653 self._statm = ProcStatm.load(self._pid) 654 655 def read_status(self): 656 self._status = ProcStatus.load(self._pid) 657 658 @property 659 def pid(self): 660 return self._pid 661 662 @property 663 def maps(self): 664 return self._maps 665 666 @property 667 def pagemap(self): 668 return self._pagemap 669 670 @property 671 def smaps(self): 672 return self._smaps 673 674 @property 675 def stat(self): 676 return self._stat 677 678 @property 679 def statm(self): 680 return self._statm 681 682 @property 683 def status(self): 684 return self._status 685 686 687def main(argv): 688 """The main function for manual testing.""" 689 _LOGGER.setLevel(logging.WARNING) 690 handler = logging.StreamHandler() 691 handler.setLevel(logging.WARNING) 692 handler.setFormatter(logging.Formatter( 693 '%(asctime)s:%(name)s:%(levelname)s:%(message)s')) 694 _LOGGER.addHandler(handler) 695 696 pids = [] 697 for arg in argv[1:]: 698 try: 699 pid = int(arg) 700 except ValueError: 701 raise SyntaxError("%s is not an integer." % arg) 702 else: 703 pids.append(pid) 704 705 procs = {} 706 for pid in pids: 707 procs[pid] = _ProcessMemory(pid) 708 procs[pid].read_all() 709 710 print '=== PID: %d ===' % pid 711 712 print ' stat: %d' % procs[pid].stat.vsize 713 print ' statm: %d' % (procs[pid].statm.size * 4096) 714 print ' status: %d (Peak:%d)' % (procs[pid].status.vm_size * 1024, 715 procs[pid].status.vm_peak * 1024) 716 print ' smaps: %d' % (procs[pid].smaps.size * 1024) 717 print 'pagemap: %d' % procs[pid].pagemap.vsize 718 print ' stat: %d' % (procs[pid].stat.rss * 4096) 719 print ' statm: %d' % (procs[pid].statm.resident * 4096) 720 print ' status: %d (Peak:%d)' % (procs[pid].status.vm_rss * 1024, 721 procs[pid].status.vm_hwm * 1024) 722 print ' smaps: %d' % (procs[pid].smaps.rss * 1024) 723 print 'pagemap: %d' % procs[pid].pagemap.present 724 725 return 0 726 727 728if __name__ == '__main__': 729 sys.exit(main(sys.argv)) 730