1#!/usr/bin/env python 2# Copyright 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# A Python library to read and store procfs (/proc) information on Linux. 7# 8# Each information storage class in this file stores original data as original 9# as reasonablly possible. Translation is done when requested. It is to make it 10# always possible to probe the original data. 11 12 13import collections 14import logging 15import os 16import re 17import struct 18import sys 19 20 21class _NullHandler(logging.Handler): 22 def emit(self, record): 23 pass 24 25 26_LOGGER = logging.getLogger('procfs') 27_LOGGER.addHandler(_NullHandler()) 28 29 30class ProcStat(object): 31 """Reads and stores information in /proc/pid/stat.""" 32 _PATTERN = re.compile(r'^' 33 '(?P<PID>-?[0-9]+) ' 34 '\((?P<COMM>.+)\) ' 35 '(?P<STATE>[RSDZTW]) ' 36 '(?P<PPID>-?[0-9]+) ' 37 '(?P<PGRP>-?[0-9]+) ' 38 '(?P<SESSION>-?[0-9]+) ' 39 '(?P<TTY_NR>-?[0-9]+) ' 40 '(?P<TPGID>-?[0-9]+) ' 41 '(?P<FLAGS>[0-9]+) ' 42 '(?P<MINFIT>[0-9]+) ' 43 '(?P<CMINFIT>[0-9]+) ' 44 '(?P<MAJFIT>[0-9]+) ' 45 '(?P<CMAJFIT>[0-9]+) ' 46 '(?P<UTIME>[0-9]+) ' 47 '(?P<STIME>[0-9]+) ' 48 '(?P<CUTIME>[0-9]+) ' 49 '(?P<CSTIME>[0-9]+) ' 50 '(?P<PRIORITY>[0-9]+) ' 51 '(?P<NICE>[0-9]+) ' 52 '(?P<NUM_THREADS>[0-9]+) ' 53 '(?P<ITREALVALUE>[0-9]+) ' 54 '(?P<STARTTIME>[0-9]+) ' 55 '(?P<VSIZE>[0-9]+) ' 56 '(?P<RSS>[0-9]+) ' 57 '(?P<RSSLIM>[0-9]+) ' 58 '(?P<STARTCODE>[0-9]+) ' 59 '(?P<ENDCODE>[0-9]+) ' 60 '(?P<STARTSTACK>[0-9]+) ' 61 '(?P<KSTKESP>[0-9]+) ' 62 '(?P<KSTKEIP>[0-9]+) ' 63 '(?P<SIGNAL>[0-9]+) ' 64 '(?P<BLOCKED>[0-9]+) ' 65 '(?P<SIGIGNORE>[0-9]+) ' 66 '(?P<SIGCATCH>[0-9]+) ' 67 '(?P<WCHAN>[0-9]+) ' 68 '(?P<NSWAP>[0-9]+) ' 69 '(?P<CNSWAP>[0-9]+) ' 70 '(?P<EXIT_SIGNAL>[0-9]+) ' 71 '(?P<PROCESSOR>[0-9]+) ' 72 '(?P<RT_PRIORITY>[0-9]+) ' 73 '(?P<POLICY>[0-9]+) ' 74 '(?P<DELAYACCT_BLKIO_TICKS>[0-9]+) ' 75 '(?P<GUEST_TIME>[0-9]+) ' 76 '(?P<CGUEST_TIME>[0-9]+)', re.IGNORECASE) 77 78 def __init__(self, raw, pid, vsize, rss): 79 self._raw = raw 80 self._pid = pid 81 self._vsize = vsize 82 self._rss = rss 83 84 @staticmethod 85 def load_file(stat_f): 86 raw = stat_f.readlines() 87 stat = ProcStat._PATTERN.match(raw[0]) 88 return ProcStat(raw, 89 stat.groupdict().get('PID'), 90 stat.groupdict().get('VSIZE'), 91 stat.groupdict().get('RSS')) 92 93 @staticmethod 94 def load(pid): 95 try: 96 with open(os.path.join('/proc', str(pid), 'stat'), 'r') as stat_f: 97 return ProcStat.load_file(stat_f) 98 except IOError: 99 return None 100 101 @property 102 def raw(self): 103 return self._raw 104 105 @property 106 def pid(self): 107 return int(self._pid) 108 109 @property 110 def vsize(self): 111 return int(self._vsize) 112 113 @property 114 def rss(self): 115 return int(self._rss) 116 117 118class ProcStatm(object): 119 """Reads and stores information in /proc/pid/statm.""" 120 _PATTERN = re.compile(r'^' 121 '(?P<SIZE>[0-9]+) ' 122 '(?P<RESIDENT>[0-9]+) ' 123 '(?P<SHARE>[0-9]+) ' 124 '(?P<TEXT>[0-9]+) ' 125 '(?P<LIB>[0-9]+) ' 126 '(?P<DATA>[0-9]+) ' 127 '(?P<DT>[0-9]+)', re.IGNORECASE) 128 129 def __init__(self, raw, size, resident, share, text, lib, data, dt): 130 self._raw = raw 131 self._size = size 132 self._resident = resident 133 self._share = share 134 self._text = text 135 self._lib = lib 136 self._data = data 137 self._dt = dt 138 139 @staticmethod 140 def load_file(statm_f): 141 try: 142 raw = statm_f.readlines() 143 except (IOError, OSError): 144 return None 145 statm = ProcStatm._PATTERN.match(raw[0]) 146 return ProcStatm(raw, 147 statm.groupdict().get('SIZE'), 148 statm.groupdict().get('RESIDENT'), 149 statm.groupdict().get('SHARE'), 150 statm.groupdict().get('TEXT'), 151 statm.groupdict().get('LIB'), 152 statm.groupdict().get('DATA'), 153 statm.groupdict().get('DT')) 154 155 @staticmethod 156 def load(pid): 157 try: 158 with open(os.path.join('/proc', str(pid), 'statm'), 'r') as statm_f: 159 return ProcStatm.load_file(statm_f) 160 except (IOError, OSError): 161 return None 162 163 @property 164 def raw(self): 165 return self._raw 166 167 @property 168 def size(self): 169 return int(self._size) 170 171 @property 172 def resident(self): 173 return int(self._resident) 174 175 @property 176 def share(self): 177 return int(self._share) 178 179 @property 180 def text(self): 181 return int(self._text) 182 183 @property 184 def lib(self): 185 return int(self._lib) 186 187 @property 188 def data(self): 189 return int(self._data) 190 191 @property 192 def dt(self): 193 return int(self._dt) 194 195 196class ProcStatus(object): 197 """Reads and stores information in /proc/pid/status.""" 198 _PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)') 199 200 def __init__(self, raw, dct): 201 self._raw = raw 202 self._pid = dct.get('Pid') 203 self._name = dct.get('Name') 204 self._vm_peak = dct.get('VmPeak') 205 self._vm_size = dct.get('VmSize') 206 self._vm_lck = dct.get('VmLck') 207 self._vm_pin = dct.get('VmPin') 208 self._vm_hwm = dct.get('VmHWM') 209 self._vm_rss = dct.get('VmRSS') 210 self._vm_data = dct.get('VmData') 211 self._vm_stack = dct.get('VmStk') 212 self._vm_exe = dct.get('VmExe') 213 self._vm_lib = dct.get('VmLib') 214 self._vm_pte = dct.get('VmPTE') 215 self._vm_swap = dct.get('VmSwap') 216 217 @staticmethod 218 def load_file(status_f): 219 raw = status_f.readlines() 220 dct = {} 221 for line in raw: 222 status_match = ProcStatus._PATTERN.match(line) 223 if status_match: 224 match_dict = status_match.groupdict() 225 dct[match_dict['NAME']] = match_dict['VALUE'] 226 else: 227 raise SyntaxError('Unknown /proc/pid/status format.') 228 return ProcStatus(raw, dct) 229 230 @staticmethod 231 def load(pid): 232 with open(os.path.join('/proc', str(pid), 'status'), 'r') as status_f: 233 return ProcStatus.load_file(status_f) 234 235 @property 236 def raw(self): 237 return self._raw 238 239 @property 240 def pid(self): 241 return int(self._pid) 242 243 @property 244 def vm_peak(self): 245 """Returns a high-water (peak) virtual memory size in kilo-bytes.""" 246 if self._vm_peak.endswith('kB'): 247 return int(self._vm_peak.split()[0]) 248 raise ValueError('VmPeak is not in kB.') 249 250 @property 251 def vm_size(self): 252 """Returns a virtual memory size in kilo-bytes.""" 253 if self._vm_size.endswith('kB'): 254 return int(self._vm_size.split()[0]) 255 raise ValueError('VmSize is not in kB.') 256 257 @property 258 def vm_hwm(self): 259 """Returns a high-water (peak) resident set size (RSS) in kilo-bytes.""" 260 if self._vm_hwm.endswith('kB'): 261 return int(self._vm_hwm.split()[0]) 262 raise ValueError('VmHWM is not in kB.') 263 264 @property 265 def vm_rss(self): 266 """Returns a resident set size (RSS) in kilo-bytes.""" 267 if self._vm_rss.endswith('kB'): 268 return int(self._vm_rss.split()[0]) 269 raise ValueError('VmRSS is not in kB.') 270 271 272class ProcMapsEntry(object): 273 """A class representing one line in /proc/pid/maps.""" 274 275 def __init__( 276 self, begin, end, readable, writable, executable, private, offset, 277 major, minor, inode, name): 278 self.begin = begin 279 self.end = end 280 self.readable = readable 281 self.writable = writable 282 self.executable = executable 283 self.private = private 284 self.offset = offset 285 self.major = major 286 self.minor = minor 287 self.inode = inode 288 self.name = name 289 290 def as_dict(self): 291 return { 292 'begin': self.begin, 293 'end': self.end, 294 'readable': self.readable, 295 'writable': self.writable, 296 'executable': self.executable, 297 'private': self.private, 298 'offset': self.offset, 299 'major': self.major, 300 'minor': self.minor, 301 'inode': self.inode, 302 'name': self.name, 303 } 304 305 306class ProcMaps(object): 307 """Reads and stores information in /proc/pid/maps.""" 308 309 MAPS_PATTERN = re.compile( 310 r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+' 311 r'(\d+)\s*(.*)$', re.IGNORECASE) 312 313 EXECUTABLE_PATTERN = re.compile( 314 r'\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?') 315 316 def __init__(self): 317 self._sorted_indexes = [] 318 self._dictionary = {} 319 self._sorted = True 320 321 def iter(self, condition): 322 if not self._sorted: 323 self._sorted_indexes.sort() 324 self._sorted = True 325 for index in self._sorted_indexes: 326 if not condition or condition(self._dictionary[index]): 327 yield self._dictionary[index] 328 329 def __iter__(self): 330 if not self._sorted: 331 self._sorted_indexes.sort() 332 self._sorted = True 333 for index in self._sorted_indexes: 334 yield self._dictionary[index] 335 336 @staticmethod 337 def load_file(maps_f): 338 table = ProcMaps() 339 for line in maps_f: 340 table.append_line(line) 341 return table 342 343 @staticmethod 344 def load(pid): 345 try: 346 with open(os.path.join('/proc', str(pid), 'maps'), 'r') as maps_f: 347 return ProcMaps.load_file(maps_f) 348 except (IOError, OSError): 349 return None 350 351 def append_line(self, line): 352 entry = self.parse_line(line) 353 if entry: 354 self._append_entry(entry) 355 return entry 356 357 @staticmethod 358 def parse_line(line): 359 matched = ProcMaps.MAPS_PATTERN.match(line) 360 if matched: 361 return ProcMapsEntry( # pylint: disable=W0212 362 int(matched.group(1), 16), # begin 363 int(matched.group(2), 16), # end 364 matched.group(3), # readable 365 matched.group(4), # writable 366 matched.group(5), # executable 367 matched.group(6), # private 368 int(matched.group(7), 16), # offset 369 matched.group(8), # major 370 matched.group(9), # minor 371 int(matched.group(10), 10), # inode 372 matched.group(11) # name 373 ) 374 else: 375 return None 376 377 @staticmethod 378 def constants(entry): 379 return entry.writable == '-' and entry.executable == '-' 380 381 @staticmethod 382 def executable(entry): 383 return entry.executable == 'x' 384 385 @staticmethod 386 def executable_and_constants(entry): 387 return ((entry.writable == '-' and entry.executable == '-') or 388 entry.executable == 'x') 389 390 def _append_entry(self, entry): 391 if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin: 392 self._sorted = False 393 self._sorted_indexes.append(entry.begin) 394 self._dictionary[entry.begin] = entry 395 396 397class ProcSmaps(object): 398 """Reads and stores information in /proc/pid/smaps.""" 399 _SMAPS_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)') 400 401 class VMA(object): 402 def __init__(self): 403 self._size = 0 404 self._rss = 0 405 self._pss = 0 406 407 def append(self, name, value): 408 dct = { 409 'Size': '_size', 410 'Rss': '_rss', 411 'Pss': '_pss', 412 'Referenced': '_referenced', 413 'Private_Clean': '_private_clean', 414 'Shared_Clean': '_shared_clean', 415 'KernelPageSize': '_kernel_page_size', 416 'MMUPageSize': '_mmu_page_size', 417 } 418 if name in dct: 419 self.__setattr__(dct[name], value) 420 421 @property 422 def size(self): 423 if self._size.endswith('kB'): 424 return int(self._size.split()[0]) 425 return int(self._size) 426 427 @property 428 def rss(self): 429 if self._rss.endswith('kB'): 430 return int(self._rss.split()[0]) 431 return int(self._rss) 432 433 @property 434 def pss(self): 435 if self._pss.endswith('kB'): 436 return int(self._pss.split()[0]) 437 return int(self._pss) 438 439 def __init__(self, raw, total_dct, maps, vma_internals): 440 self._raw = raw 441 self._size = total_dct['Size'] 442 self._rss = total_dct['Rss'] 443 self._pss = total_dct['Pss'] 444 self._referenced = total_dct['Referenced'] 445 self._shared_clean = total_dct['Shared_Clean'] 446 self._private_clean = total_dct['Private_Clean'] 447 self._kernel_page_size = total_dct['KernelPageSize'] 448 self._mmu_page_size = total_dct['MMUPageSize'] 449 self._maps = maps 450 self._vma_internals = vma_internals 451 452 @staticmethod 453 def load(pid): 454 with open(os.path.join('/proc', str(pid), 'smaps'), 'r') as smaps_f: 455 raw = smaps_f.readlines() 456 457 vma = None 458 vma_internals = collections.OrderedDict() 459 total_dct = collections.defaultdict(int) 460 maps = ProcMaps() 461 for line in raw: 462 maps_match = ProcMaps.MAPS_PATTERN.match(line) 463 if maps_match: 464 vma = maps.append_line(line.strip()) 465 vma_internals[vma] = ProcSmaps.VMA() 466 else: 467 smaps_match = ProcSmaps._SMAPS_PATTERN.match(line) 468 if smaps_match: 469 match_dict = smaps_match.groupdict() 470 vma_internals[vma].append(match_dict['NAME'], match_dict['VALUE']) 471 total_dct[match_dict['NAME']] += int(match_dict['VALUE'].split()[0]) 472 473 return ProcSmaps(raw, total_dct, maps, vma_internals) 474 475 @property 476 def size(self): 477 return self._size 478 479 @property 480 def rss(self): 481 return self._rss 482 483 @property 484 def referenced(self): 485 return self._referenced 486 487 @property 488 def pss(self): 489 return self._pss 490 491 @property 492 def private_clean(self): 493 return self._private_clean 494 495 @property 496 def shared_clean(self): 497 return self._shared_clean 498 499 @property 500 def kernel_page_size(self): 501 return self._kernel_page_size 502 503 @property 504 def mmu_page_size(self): 505 return self._mmu_page_size 506 507 @property 508 def vma_internals(self): 509 return self._vma_internals 510 511 512class ProcPagemap(object): 513 """Reads and stores partial information in /proc/pid/pagemap. 514 515 It picks up virtual addresses to read based on ProcMaps (/proc/pid/maps). 516 See https://www.kernel.org/doc/Documentation/vm/pagemap.txt for details. 517 """ 518 _BYTES_PER_PAGEMAP_VALUE = 8 519 _BYTES_PER_OS_PAGE = 4096 520 _VIRTUAL_TO_PAGEMAP_OFFSET = _BYTES_PER_OS_PAGE / _BYTES_PER_PAGEMAP_VALUE 521 522 _MASK_PRESENT = 1 << 63 523 _MASK_SWAPPED = 1 << 62 524 _MASK_FILEPAGE_OR_SHAREDANON = 1 << 61 525 _MASK_SOFTDIRTY = 1 << 55 526 _MASK_PFN = (1 << 55) - 1 527 528 class VMA(object): 529 def __init__(self, vsize, present, swapped, pageframes): 530 self._vsize = vsize 531 self._present = present 532 self._swapped = swapped 533 self._pageframes = pageframes 534 535 @property 536 def vsize(self): 537 return int(self._vsize) 538 539 @property 540 def present(self): 541 return int(self._present) 542 543 @property 544 def swapped(self): 545 return int(self._swapped) 546 547 @property 548 def pageframes(self): 549 return self._pageframes 550 551 def __init__(self, vsize, present, swapped, vma_internals, in_process_dup): 552 self._vsize = vsize 553 self._present = present 554 self._swapped = swapped 555 self._vma_internals = vma_internals 556 self._in_process_dup = in_process_dup 557 558 @staticmethod 559 def load(pid, maps): 560 total_present = 0 561 total_swapped = 0 562 total_vsize = 0 563 in_process_dup = 0 564 vma_internals = collections.OrderedDict() 565 process_pageframe_set = set() 566 567 try: 568 pagemap_fd = os.open( 569 os.path.join('/proc', str(pid), 'pagemap'), os.O_RDONLY) 570 except (IOError, OSError): 571 return None 572 for vma in maps: 573 present = 0 574 swapped = 0 575 vsize = 0 576 pageframes = collections.defaultdict(int) 577 begin_offset = ProcPagemap._offset(vma.begin) 578 chunk_size = ProcPagemap._offset(vma.end) - begin_offset 579 try: 580 os.lseek(pagemap_fd, begin_offset, os.SEEK_SET) 581 buf = os.read(pagemap_fd, chunk_size) 582 except (IOError, OSError): 583 return None 584 if len(buf) < chunk_size: 585 _LOGGER.warn('Failed to read pagemap at 0x%x in %d.' % (vma.begin, pid)) 586 pagemap_values = struct.unpack( 587 '=%dQ' % (len(buf) / ProcPagemap._BYTES_PER_PAGEMAP_VALUE), buf) 588 for pagemap_value in pagemap_values: 589 vsize += ProcPagemap._BYTES_PER_OS_PAGE 590 if pagemap_value & ProcPagemap._MASK_PRESENT: 591 if (pagemap_value & ProcPagemap._MASK_PFN) in process_pageframe_set: 592 in_process_dup += ProcPagemap._BYTES_PER_OS_PAGE 593 else: 594 process_pageframe_set.add(pagemap_value & ProcPagemap._MASK_PFN) 595 if (pagemap_value & ProcPagemap._MASK_PFN) not in pageframes: 596 present += ProcPagemap._BYTES_PER_OS_PAGE 597 pageframes[pagemap_value & ProcPagemap._MASK_PFN] += 1 598 if pagemap_value & ProcPagemap._MASK_SWAPPED: 599 swapped += ProcPagemap._BYTES_PER_OS_PAGE 600 vma_internals[vma] = ProcPagemap.VMA(vsize, present, swapped, pageframes) 601 total_present += present 602 total_swapped += swapped 603 total_vsize += vsize 604 try: 605 os.close(pagemap_fd) 606 except OSError: 607 return None 608 609 return ProcPagemap(total_vsize, total_present, total_swapped, 610 vma_internals, in_process_dup) 611 612 @staticmethod 613 def _offset(virtual_address): 614 return virtual_address / ProcPagemap._VIRTUAL_TO_PAGEMAP_OFFSET 615 616 @property 617 def vsize(self): 618 return int(self._vsize) 619 620 @property 621 def present(self): 622 return int(self._present) 623 624 @property 625 def swapped(self): 626 return int(self._swapped) 627 628 @property 629 def vma_internals(self): 630 return self._vma_internals 631 632 633class _ProcessMemory(object): 634 """Aggregates process memory information from /proc for manual testing.""" 635 def __init__(self, pid): 636 self._pid = pid 637 self._maps = None 638 self._pagemap = None 639 self._stat = None 640 self._status = None 641 self._statm = None 642 self._smaps = [] 643 644 def _read(self, proc_file): 645 lines = [] 646 with open(os.path.join('/proc', str(self._pid), proc_file), 'r') as proc_f: 647 lines = proc_f.readlines() 648 return lines 649 650 def read_all(self): 651 self.read_stat() 652 self.read_statm() 653 self.read_status() 654 self.read_smaps() 655 self.read_maps() 656 self.read_pagemap(self._maps) 657 658 def read_maps(self): 659 self._maps = ProcMaps.load(self._pid) 660 661 def read_pagemap(self, maps): 662 self._pagemap = ProcPagemap.load(self._pid, maps) 663 664 def read_smaps(self): 665 self._smaps = ProcSmaps.load(self._pid) 666 667 def read_stat(self): 668 self._stat = ProcStat.load(self._pid) 669 670 def read_statm(self): 671 self._statm = ProcStatm.load(self._pid) 672 673 def read_status(self): 674 self._status = ProcStatus.load(self._pid) 675 676 @property 677 def pid(self): 678 return self._pid 679 680 @property 681 def maps(self): 682 return self._maps 683 684 @property 685 def pagemap(self): 686 return self._pagemap 687 688 @property 689 def smaps(self): 690 return self._smaps 691 692 @property 693 def stat(self): 694 return self._stat 695 696 @property 697 def statm(self): 698 return self._statm 699 700 @property 701 def status(self): 702 return self._status 703 704 705def main(argv): 706 """The main function for manual testing.""" 707 _LOGGER.setLevel(logging.WARNING) 708 handler = logging.StreamHandler() 709 handler.setLevel(logging.WARNING) 710 handler.setFormatter(logging.Formatter( 711 '%(asctime)s:%(name)s:%(levelname)s:%(message)s')) 712 _LOGGER.addHandler(handler) 713 714 pids = [] 715 for arg in argv[1:]: 716 try: 717 pid = int(arg) 718 except ValueError: 719 raise SyntaxError("%s is not an integer." % arg) 720 else: 721 pids.append(pid) 722 723 procs = {} 724 for pid in pids: 725 procs[pid] = _ProcessMemory(pid) 726 procs[pid].read_all() 727 728 print '=== PID: %d ===' % pid 729 730 print ' stat: %d' % procs[pid].stat.vsize 731 print ' statm: %d' % (procs[pid].statm.size * 4096) 732 print ' status: %d (Peak:%d)' % (procs[pid].status.vm_size * 1024, 733 procs[pid].status.vm_peak * 1024) 734 print ' smaps: %d' % (procs[pid].smaps.size * 1024) 735 print 'pagemap: %d' % procs[pid].pagemap.vsize 736 print ' stat: %d' % (procs[pid].stat.rss * 4096) 737 print ' statm: %d' % (procs[pid].statm.resident * 4096) 738 print ' status: %d (Peak:%d)' % (procs[pid].status.vm_rss * 1024, 739 procs[pid].status.vm_hwm * 1024) 740 print ' smaps: %d' % (procs[pid].smaps.rss * 1024) 741 print 'pagemap: %d' % procs[pid].pagemap.present 742 743 return 0 744 745 746if __name__ == '__main__': 747 sys.exit(main(sys.argv)) 748