whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 01897f3e05ede4d66c0f9df465fde1d67a1d733f
parent e9ebc2151f88600e726e51e5f7ca9c33ad53b35f
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sat,  3 Nov 2018 18:13:43 -0700

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates and fixes from Ingo Molnar:
 "These are almost all tooling updates: 'perf top', 'perf trace' and
  'perf script' fixes and updates, an UAPI header sync with the merge
  window versions, license marker updates, much improved Sparc support
  from David Miller, and a number of fixes"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (66 commits)
  perf intel-pt/bts: Calculate cpumode for synthesized samples
  perf intel-pt: Insert callchain context into synthesized callchains
  perf tools: Don't clone maps from parent when synthesizing forks
  perf top: Start display thread earlier
  tools headers uapi: Update linux/if_link.h header copy
  tools headers uapi: Update linux/netlink.h header copy
  tools headers: Sync the various kvm.h header copies
  tools include uapi: Update linux/mmap.h copy
  perf trace beauty: Use the mmap flags table generated from headers
  perf beauty: Wire up the mmap flags table generator to the Makefile
  perf beauty: Add a generator for MAP_ mmap's flag constants
  tools include uapi: Update asound.h copy
  tools arch uapi: Update asm-generic/unistd.h and arm64 unistd.h copies
  tools include uapi: Update linux/fs.h copy
  perf callchain: Honour the ordering of PERF_CONTEXT_{USER,KERNEL,etc}
  perf cs-etm: Correct CPU mode for samples
  perf unwind: Take pgoff into account when reporting elf to libdwfl
  perf top: Do not use overwrite mode by default
  perf top: Allow disabling the overwrite mode
  perf trace: Beautify mount's first pathname arg
  ...

Diffstat:
Minclude/uapi/linux/perf_event.h | 2++
Mkernel/events/core.c | 2+-
Mtools/arch/arm64/include/uapi/asm/unistd.h | 1+
Mtools/arch/powerpc/include/uapi/asm/kvm.h | 1+
Mtools/arch/s390/include/uapi/asm/kvm.h | 2++
Mtools/arch/x86/include/uapi/asm/kvm.h | 6++----
Mtools/include/uapi/asm-generic/unistd.h | 2++
Atools/include/uapi/linux/fs.h | 393+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/include/uapi/linux/if_link.h | 1+
Mtools/include/uapi/linux/kvm.h | 21+++++++++++++++++++--
Mtools/include/uapi/linux/mman.h | 2++
Mtools/include/uapi/linux/netlink.h | 1+
Mtools/include/uapi/linux/perf_event.h | 2++
Mtools/include/uapi/sound/asound.h | 2+-
Mtools/lib/subcmd/parse-options.c | 19+++++++++++++++++++
Mtools/lib/subcmd/parse-options.h | 2++
Atools/perf/Documentation/build-xed.txt | 19+++++++++++++++++++
Mtools/perf/Documentation/intel-pt.txt | 2+-
Mtools/perf/Documentation/itrace.txt | 7++++---
Mtools/perf/Documentation/perf-script.txt | 18++++++++++++++++++
Mtools/perf/Documentation/perf-top.txt | 10++++++++++
Mtools/perf/Documentation/perf-trace.txt | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/perf/Makefile.perf | 19+++++++++++++++++++
Mtools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 2+-
Mtools/perf/arch/sparc/Makefile | 2++
Atools/perf/arch/sparc/annotate/instructions.c | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/perf/builtin-record.c | 24++++++++++++++++++++++--
Mtools/perf/builtin-script.c | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Mtools/perf/builtin-stat.c | 24++++++++++++++++++++++++
Mtools/perf/builtin-top.c | 21+++++++++++++++------
Mtools/perf/builtin-trace.c | 98++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mtools/perf/check-headers.sh | 1+
Mtools/perf/perf.h | 1+
Dtools/perf/scripts/python/call-graph-from-sql.py | 339-------------------------------------------------------------------------------
Mtools/perf/scripts/python/export-to-postgresql.py | 2+-
Mtools/perf/scripts/python/export-to-sqlite.py | 2+-
Atools/perf/scripts/python/exported-sql-viewer.py | 2128+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/perf/trace/beauty/Build | 1+
Mtools/perf/trace/beauty/beauty.h | 7+++++++
Mtools/perf/trace/beauty/clone.c | 3+--
Mtools/perf/trace/beauty/drm_ioctl.sh | 1+
Mtools/perf/trace/beauty/eventfd.c | 2+-
Mtools/perf/trace/beauty/fcntl.c | 3+--
Mtools/perf/trace/beauty/flock.c | 2+-
Mtools/perf/trace/beauty/futex_op.c | 2+-
Mtools/perf/trace/beauty/futex_val3.c | 2+-
Mtools/perf/trace/beauty/ioctl.c | 3+--
Mtools/perf/trace/beauty/kcmp.c | 3+--
Mtools/perf/trace/beauty/kcmp_type.sh | 1+
Mtools/perf/trace/beauty/kvm_ioctl.sh | 1+
Mtools/perf/trace/beauty/madvise_behavior.sh | 1+
Mtools/perf/trace/beauty/mmap.c | 50++++++++++++--------------------------------------
Atools/perf/trace/beauty/mmap_flags.sh | 32++++++++++++++++++++++++++++++++
Mtools/perf/trace/beauty/mode_t.c | 2+-
Atools/perf/trace/beauty/mount_flags.c | 43+++++++++++++++++++++++++++++++++++++++++++
Atools/perf/trace/beauty/mount_flags.sh | 15+++++++++++++++
Mtools/perf/trace/beauty/msg_flags.c | 2+-
Mtools/perf/trace/beauty/open_flags.c | 2+-
Mtools/perf/trace/beauty/perf_event_open.c | 2+-
Mtools/perf/trace/beauty/perf_ioctl.sh | 1+
Mtools/perf/trace/beauty/pid.c | 3++-
Mtools/perf/trace/beauty/pkey_alloc.c | 30+++++++++++++++++-------------
Mtools/perf/trace/beauty/pkey_alloc_access_rights.sh | 1+
Mtools/perf/trace/beauty/prctl.c | 3+--
Mtools/perf/trace/beauty/prctl_option.sh | 1+
Mtools/perf/trace/beauty/sched_policy.c | 2+-
Mtools/perf/trace/beauty/seccomp.c | 2+-
Mtools/perf/trace/beauty/signum.c | 2+-
Mtools/perf/trace/beauty/sndrv_ctl_ioctl.sh | 1+
Mtools/perf/trace/beauty/sndrv_pcm_ioctl.sh | 1+
Mtools/perf/trace/beauty/sockaddr.c | 2+-
Mtools/perf/trace/beauty/socket.c | 2+-
Mtools/perf/trace/beauty/socket_ipproto.sh | 1+
Mtools/perf/trace/beauty/socket_type.c | 2+-
Mtools/perf/trace/beauty/statx.c | 3+--
Mtools/perf/trace/beauty/vhost_virtio_ioctl.sh | 1+
Mtools/perf/trace/beauty/waitid_options.c | 2+-
Mtools/perf/util/annotate.c | 8++++++++
Mtools/perf/util/auxtrace.c | 17++++++++++++-----
Mtools/perf/util/auxtrace.h | 5++++-
Mtools/perf/util/cs-etm.c | 42++++++++++++++++++++++++++++++++----------
Mtools/perf/util/env.h | 1+
Mtools/perf/util/event.c | 1+
Mtools/perf/util/evlist.c | 2+-
Mtools/perf/util/evsel.c | 27+++++++++++++++++++++------
Mtools/perf/util/evsel.h | 5+++++
Mtools/perf/util/genelf.h | 6++++++
Mtools/perf/util/header.c | 23+++++++++++++++++++++++
Mtools/perf/util/header.h | 1+
Mtools/perf/util/intel-bts.c | 20++++++++++++++------
Mtools/perf/util/intel-pt.c | 31+++++++++++++++++++------------
Mtools/perf/util/machine.c | 54++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtools/perf/util/parse-events.c | 8++++++++
Mtools/perf/util/parse-events.h | 1+
Mtools/perf/util/parse-events.l | 1+
Mtools/perf/util/symbol-elf.c | 12+++++++++++-
Mtools/perf/util/symbol.h | 3++-
Mtools/perf/util/thread-stack.c | 44+++++++++++++++++++++++++++++++++++---------
Mtools/perf/util/thread-stack.h | 2+-
Mtools/perf/util/thread.c | 13+++++--------
Mtools/perf/util/thread.h | 4+++-
Mtools/perf/util/unwind-libdw.c | 4++--
102 files changed, 3616 insertions(+), 539 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/kernel/events/core.c b/kernel/events/core.c @@ -750,7 +750,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) __update_cgrp_time(event->cgrp); } diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -16,5 +16,6 @@ */ #define __ARCH_WANT_RENAMEAT +#define __ARCH_WANT_NEW_STAT #include <asm-generic/unistd.h> diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h @@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc { #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 +#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4 +#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5 /* kvm attributes for migration mode */ #define KVM_S390_VM_MIGRATION_STOP 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h @@ -300,10 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - union { - __u8 pad; - __u8 pending; - }; + __u8 pending; __u32 error_code; } exception; struct { @@ -387,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h @@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee) /* fs/stat.c */ #define __NR_readlinkat 78 __SYSCALL(__NR_readlinkat, sys_readlinkat) +#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64) #define __NR3264_fstatat 79 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) #define __NR3264_fstat 80 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat) +#endif /* fs/sync.c */ #define __NR_sync 81 diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FS_H +#define _UAPI_LINUX_FS_H + +/* + * This file has definitions for some important file table structures + * and constants and structures used by various generic file system + * ioctl's. Please do not make any changes in this file before + * sending patches for review to linux-fsdevel@vger.kernel.org and + * linux-api@vger.kernel.org. + */ + +#include <linux/limits.h> +#include <linux/ioctl.h> +#include <linux/types.h> + +/* + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. + */ + +/* Fixed constants first: */ +#undef NR_OPEN +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ + +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) + +#define SEEK_SET 0 /* seek relative to beginning of file */ +#define SEEK_CUR 1 /* seek relative to current file position */ +#define SEEK_END 2 /* seek relative to end of file */ +#define SEEK_DATA 3 /* seek to the next data */ +#define SEEK_HOLE 4 /* seek to the next hole */ +#define SEEK_MAX SEEK_HOLE + +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ + +struct file_clone_range { + __s64 src_fd; + __u64 src_offset; + __u64 src_length; + __u64 dest_offset; +}; + +struct fstrim_range { + __u64 start; + __u64 len; + __u64 minlen; +}; + +/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ +#define FILE_DEDUPE_RANGE_SAME 0 +#define FILE_DEDUPE_RANGE_DIFFERS 1 + +/* from struct btrfs_ioctl_file_extent_same_info */ +struct file_dedupe_range_info { + __s64 dest_fd; /* in - destination file */ + __u64 dest_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file. */ + /* status of this dedupe operation: + * < 0 for error + * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds + * == FILE_DEDUPE_RANGE_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; /* must be zero */ +}; + +/* from struct btrfs_ioctl_file_extent_same_args */ +struct file_dedupe_range { + __u64 src_offset; /* in - start of extent in source */ + __u64 src_length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; /* must be zero */ + __u32 reserved2; /* must be zero */ + struct file_dedupe_range_info info[0]; +}; + +/* And dynamically-tunable limits and defaults: */ +struct files_stat_struct { + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ +}; + +struct inodes_stat_t { + long nr_inodes; + long nr_unused; + long dummy[5]; /* padding for sysctl ABI compatibility */ +}; + + +#define NR_FILE 8192 /* this can well be larger on a larger system */ + + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. + */ +struct fsxattr { + __u32 fsx_xflags; /* xflags field value (get/set) */ + __u32 fsx_extsize; /* extsize field value (get/set)*/ + __u32 fsx_nextents; /* nextents field value (get) */ + __u32 fsx_projid; /* project identifier (get/set) */ + __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/ + unsigned char fsx_pad[8]; +}; + +/* + * Flags for the fsx_xflags field + */ +#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ +#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ +#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ +#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ +#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ +#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ +#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ +#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ +#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ + +/* the read-only stuff doesn't really belong here, but any other place is + probably as bad and I don't want to create yet another include file. */ + +#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ +#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ +#define BLKRRPART _IO(0x12,95) /* re-read partition table */ +#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ +#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ +#define BLKRASET _IO(0x12,98) /* set read ahead for block device */ +#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ +#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ +#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ +#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ +#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ +#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ +#if 0 +#define BLKPG _IO(0x12,105)/* See blkpg.h */ + +/* Some people are morons. Do not use sizeof! */ + +#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ +#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ +/* This was here just to show that the number is taken - + probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ +#endif +/* A jump here: 108-111 have been used for various private purposes. */ +#define BLKBSZGET _IOR(0x12,112,size_t) +#define BLKBSZSET _IOW(0x12,113,size_t) +#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ +#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) +#define BLKTRACESTART _IO(0x12,116) +#define BLKTRACESTOP _IO(0x12,117) +#define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) +#define BLKDISCARDZEROES _IO(0x12,124) +#define BLKSECDISCARD _IO(0x12,125) +#define BLKROTATIONAL _IO(0x12,126) +#define BLKZEROOUT _IO(0x12,127) +/* + * A jump here: 130-131 are reserved for zoned block devices + * (see uapi/linux/blkzoned.h) + */ + +#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ +#define FIBMAP _IO(0x00,1) /* bmap access */ +#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ +#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FICLONE _IOW(0x94, 9, int) +#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) +#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) + +#define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */ + +#define FS_IOC_GETFLAGS _IOR('f', 1, long) +#define FS_IOC_SETFLAGS _IOW('f', 2, long) +#define FS_IOC_GETVERSION _IOR('v', 1, long) +#define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) +#define FS_IOC32_GETFLAGS _IOR('f', 1, int) +#define FS_IOC32_SETFLAGS _IOW('f', 2, int) +#define FS_IOC32_GETVERSION _IOR('v', 1, int) +#define FS_IOC32_SETVERSION _IOW('v', 2, int) +#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) +#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) +#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX]) +#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) + +/* + * File system encryption support + */ +/* Policy provided via an ioctl on the topmost directory */ +#define FS_KEY_DESCRIPTOR_SIZE 8 + +#define FS_POLICY_FLAGS_PAD_4 0x00 +#define FS_POLICY_FLAGS_PAD_8 0x01 +#define FS_POLICY_FLAGS_PAD_16 0x02 +#define FS_POLICY_FLAGS_PAD_32 0x03 +#define FS_POLICY_FLAGS_PAD_MASK 0x03 +#define FS_POLICY_FLAGS_VALID 0x03 + +/* Encryption algorithms */ +#define FS_ENCRYPTION_MODE_INVALID 0 +#define FS_ENCRYPTION_MODE_AES_256_XTS 1 +#define FS_ENCRYPTION_MODE_AES_256_GCM 2 +#define FS_ENCRYPTION_MODE_AES_256_CBC 3 +#define FS_ENCRYPTION_MODE_AES_256_CTS 4 +#define FS_ENCRYPTION_MODE_AES_128_CBC 5 +#define FS_ENCRYPTION_MODE_AES_128_CTS 6 +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ + +struct fscrypt_policy { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; +}; + +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) + +/* Parameters for passing an encryption key into the kernel keyring */ +#define FS_KEY_DESC_PREFIX "fscrypt:" +#define FS_KEY_DESC_PREFIX_SIZE 8 + +/* Structure that userspace passes to the kernel keyring */ +#define FS_MAX_KEY_SIZE 64 + +struct fscrypt_key { + __u32 mode; + __u8 raw[FS_MAX_KEY_SIZE]; + __u32 size; +}; + +/* + * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) + * + * Note: for historical reasons, these flags were originally used and + * defined for use by ext2/ext3, and then other file systems started + * using these flags so they wouldn't need to write their own version + * of chattr/lsattr (which was shipped as part of e2fsprogs). You + * should think twice before trying to use these flags in new + * contexts, or trying to assign these flags, since they are used both + * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are + * almost out of 32-bit flags. :-) + * + * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from + * XFS to the generic FS level interface. This uses a structure that + * has padding and hence has more room to grow, so it may be more + * appropriate for many new use cases. + * + * Please do not change these flags or interfaces before checking with + * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org. + */ +#define FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define FS_UNRM_FL 0x00000002 /* Undelete */ +#define FS_COMPR_FL 0x00000004 /* Compress file */ +#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define FS_DIRTY_FL 0x00000100 +#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ +/* End compression flags --- maybe not all used */ +#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */ +#define FS_BTREE_FL 0x00001000 /* btree format dir */ +#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ +#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ +#define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ +#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ +#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + + +#define SYNC_FILE_RANGE_WAIT_BEFORE 1 +#define SYNC_FILE_RANGE_WRITE 2 +#define SYNC_FILE_RANGE_WAIT_AFTER 4 + +/* + * Flags for preadv2/pwritev2: + */ + +typedef int __bitwise __kernel_rwf_t; + +/* high priority request, poll if possible */ +#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) + +/* per-IO O_DSYNC */ +#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002) + +/* per-IO O_SYNC */ +#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004) + +/* per-IO, return -EAGAIN if operation would block */ +#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) + +/* per-IO O_APPEND */ +#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) + +/* mask of flags supported by the kernel */ +#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ + RWF_APPEND) + +#endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h @@ -287,6 +287,7 @@ enum { IFLA_BR_MCAST_STATS_ENABLED, IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, __IFLA_BR_MAX, }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -752,6 +758,15 @@ struct kvm_ppc_resize_hpt { #define KVM_S390_SIE_PAGE_OFFSET 1 /* + * On arm64, machine type can be used to request the physical + * address size for the VM. Bits[7-0] are reserved for the guest + * PA size shift (i.e, log2(PA_Size)). For backward compatibility, + * value 0 implies the default IPA size, 40bits. + */ +#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL +#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) +/* * ioctls for /dev/kvm fds: */ #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) @@ -958,6 +973,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SEND_IPI 161 #define KVM_CAP_COALESCED_PIO 162 #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 +#define KVM_CAP_ARM_VM_IPA_SIZE 165 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h @@ -28,7 +28,9 @@ #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h @@ -155,6 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 +#define NETLINK_DUMP_STRICT_CHK 12 struct nl_pktinfo { __u32 group; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h @@ -752,7 +752,7 @@ struct snd_timer_info { #define SNDRV_TIMER_PSFLG_EARLY_EVENT (1<<2) /* write early event to the poll queue */ struct snd_timer_params { - unsigned int flags; /* flags - SNDRV_MIXER_PSFLG_* */ + unsigned int flags; /* flags - SNDRV_TIMER_PSFLG_* */ unsigned int ticks; /* requested resolution in ticks */ unsigned int queue_size; /* total size of queue (32-1024) */ unsigned int reserved0; /* reserved, was: failure locations */ diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c @@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_INTEGER: case OPTION_UINTEGER: case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: default: break; @@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_INTEGER: case OPTION_UINTEGER: case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: default: break; @@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "expects a numerical value", flags); return 0; + case OPTION_ULONG: + if (unset) { + *(unsigned long *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(unsigned long *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + case OPTION_U64: if (unset) { *(u64 *)opt->value = 0; @@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full) case OPTION_ARGUMENT: break; case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: case OPTION_INTEGER: case OPTION_UINTEGER: diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h @@ -25,6 +25,7 @@ enum parse_opt_type { OPTION_STRING, OPTION_INTEGER, OPTION_LONG, + OPTION_ULONG, OPTION_CALLBACK, OPTION_U64, OPTION_UINTEGER, @@ -133,6 +134,7 @@ struct option { #define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } #define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) } #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } +#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } #define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) } #define OPT_STRING_OPTARG(s, l, v, a, h, d) \ diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt @@ -0,0 +1,19 @@ + +For --xed the xed tool is needed. Here is how to install it: + + $ git clone https://github.com/intelxed/mbuild.git mbuild + $ git clone https://github.com/intelxed/xed + $ cd xed + $ ./mfile.py --share + $ ./mfile.py examples + $ sudo ./mfile.py --prefix=/usr/local install + $ sudo ldconfig + $ sudo cp obj/examples/xed /usr/local/bin + +Basic xed testing: + + $ xed | head -3 + ERROR: required argument(s) were missing + Copyright (C) 2017, Intel Corporation. All rights reserved. + XED version: [v10.0-328-g7d62c8c49b7b] + $ diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt @@ -106,7 +106,7 @@ in transaction, respectively. While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script call-graph-from-sql.py for an example of using the database. +and to script exported-sql-viewer.py for an example of using the database. There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt @@ -11,10 +11,11 @@ l synthesize last branch entries (use with i or x) s skip initial number of events - The default is all events i.e. the same as --itrace=ibxwpe + The default is all events i.e. the same as --itrace=ibxwpe, + except for perf script where it is --itrace=ce - In addition, the period (default 100000) for instructions events - can be specified in units of: + In addition, the period (default 100000, except for perf script where it is 1) + for instructions events can be specified in units of: i instructions t ticks diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt @@ -383,6 +383,24 @@ include::itrace.txt[] will be printed. Each entry has function name and file/line. Enabled by default, disable with --no-inline. +--insn-trace:: + Show instruction stream for intel_pt traces. Combine with --xed to + show disassembly. + +--xed:: + Run xed disassembler on output. Requires installing the xed disassembler. + +--call-trace:: + Show call stream for intel_pt traces. The CPUs are interleaved, but + can be filtered with -C. + +--call-ret-trace:: + Show call and return stream for intel_pt traces. + +--graph-function:: + For itrace only show specified functions and their callees for + itrace. Multiple functions can be separated by comma. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt @@ -242,6 +242,16 @@ Default is to monitor all CPUS. --hierarchy:: Enable hierarchy output. +--overwrite:: + Enable this to use just the most recent records, which helps in high core count + machines such as Knights Landing/Mill, but right now is disabled by default as + the pausing used in this technique is leading to loss of metadata events such + as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading + to lots of unknown samples appearing on the UI. Enable this if you are in such + machines and profiling a workload that doesn't creates short lived threads and/or + doesn't uses many executable mmap operations. Work is being planed to solve + this situation, till then, this will remain disabled by default. + --force:: Don't do ownership validation. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt @@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --kernel-syscall-graph:: Show the kernel callchains on the syscall exit path. +--max-events=N:: + Stop after processing N events. Note that strace-like events are considered + only at exit time or when a syscall is interrupted, i.e. in those cases this + option is equivalent to the number of lines printed. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. Note that at this point @@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults: As you can see, there was major pagefault in python process, from CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. +Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here): + + $ perf trace -e open* --max-events 4 + [root@jouet perf]# trace -e open* --max-events 4 + 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31 + 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 + 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 + 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 + $ + +Trace the first minor page fault when running a workload: + + # perf trace -F min --max-stack=7 --max-events 1 sleep 1 + 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k) + __clear_user ([kernel.kallsyms]) + load_elf_binary ([kernel.kallsyms]) + search_binary_handler ([kernel.kallsyms]) + __do_execve_file.isra.33 ([kernel.kallsyms]) + __x64_sys_execve ([kernel.kallsyms]) + do_syscall_64 ([kernel.kallsyms]) + entry_SYSCALL_64 ([kernel.kallsyms]) + # + +Trace the next min page page fault to take place on the first CPU: + + # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0 + 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.) + js::gc::FreeSpan::initAsEmpty (inlined) + js::gc::Arena::setAsNotAllocated (inlined) + js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so) + js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so) + js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so) + js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so) + js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined) + js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so) + js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined) + JSThinInlineString::new_<(js::AllowGC)1> (inlined) + AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined) + js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so) + [0x18b26e6bc2bd] (/tmp/perf-17136.map) + # + +Trace the next two sched:sched_switch events, four block:*_plug events, the +next block:*_unplug and the next three net:*dev_queue events, this last one +with a backtrace of at most 16 entries, system wide: + + # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ + 0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120] + 0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120] + 254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66 + __dev_queue_xmit ([kernel.kallsyms]) + 273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78 + __dev_queue_xmit ([kernel.kallsyms]) + 274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78 + __dev_queue_xmit ([kernel.kallsyms]) + 2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58] + 2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1 + 4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8] + 8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] + 8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] + # + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf @@ -1,4 +1,5 @@ include ../scripts/Makefile.include +include ../scripts/Makefile.arch # The default target of this Makefile is... all: @@ -385,6 +386,8 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) linux_uapi_dir := $(srctree)/tools/include/uapi/linux +asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic +arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl @@ -460,6 +463,18 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ +mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c +mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh + +$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl) + $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ + +mount_flags_array := $(beauty_outdir)/mount_flags_array.c +mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh + +$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl) + $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@ + prctl_option_array := $(beauty_outdir)/prctl_option_array.c prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh @@ -577,6 +592,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(socket_ipproto_array) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ + $(mmap_flags_array) \ + $(mount_flags_array) \ $(perf_ioctl_array) \ $(prctl_option_array) \ $(arch_errno_name_array) @@ -863,6 +880,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(madvise_behavior_array) \ + $(OUTPUT)$(mmap_flags_array) \ + $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -23,7 +23,7 @@ create_table_from_c() { local sc nr last_sc - create_table_exe=`mktemp /tmp/create-table-XXXXXX` + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` { diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile @@ -1,3 +1,5 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +static int is_branch_cond(const char *cond) +{ + if (cond[0] == '\0') + return 1; + + if (cond[0] == 'a' && cond[1] == '\0') + return 1; + + if (cond[0] == 'c' && + (cond[1] == 'c' || cond[1] == 's') && + cond[2] == '\0') + return 1; + + if (cond[0] == 'e' && + (cond[1] == '\0' || + (cond[1] == 'q' && cond[2] == '\0'))) + return 1; + + if (cond[0] == 'g' && + (cond[1] == '\0' || + (cond[1] == 't' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'l' && + (cond[1] == '\0' || + (cond[1] == 't' && cond[2] == '\0') || + (cond[1] == 'u' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'n' && + (cond[1] == '\0' || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'z' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'b' && + cond[1] == 'p' && + cond[2] == 'o' && + cond[3] == 's' && + cond[4] == '\0') + return 1; + + if (cond[0] == 'v' && + (cond[1] == 'c' || cond[1] == 's') && + cond[2] == '\0') + return 1; + + if (cond[0] == 'b' && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + return 0; +} + +static int is_branch_reg_cond(const char *cond) +{ + if ((cond[0] == 'n' || cond[0] == 'l') && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + if (cond[0] == 'z' && + cond[1] == '\0') + return 1; + + if ((cond[0] == 'g' || cond[0] == 'l') && + cond[1] == 'e' && + cond[2] == 'z' && + cond[3] == '\0') + return 1; + + if (cond[0] == 'g' && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + return 0; +} + +static int is_branch_float_cond(const char *cond) +{ + if (cond[0] == '\0') + return 1; + + if ((cond[0] == 'a' || cond[0] == 'e' || + cond[0] == 'z' || cond[0] == 'g' || + cond[0] == 'l' || cond[0] == 'n' || + cond[0] == 'o' || cond[0] == 'u') && + cond[1] == '\0') + return 1; + + if (((cond[0] == 'g' && cond[1] == 'e') || + (cond[0] == 'l' && (cond[1] == 'e' || + cond[1] == 'g')) || + (cond[0] == 'n' && (cond[1] == 'e' || + cond[1] == 'z')) || + (cond[0] == 'u' && (cond[1] == 'e' || + cond[1] == 'g' || + cond[1] == 'l'))) && + cond[2] == '\0') + return 1; + + if (cond[0] == 'u' && + (cond[1] == 'g' || cond[1] == 'l') && + cond[2] == 'e' && + cond[3] == '\0') + return 1; + + return 0; +} + +static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strcmp(name, "call") || + !strcmp(name, "jmp") || + !strcmp(name, "jmpl")) { + ops = &call_ops; + } else if (!strcmp(name, "ret") || + !strcmp(name, "retl") || + !strcmp(name, "return")) { + ops = &ret_ops; + } else if (!strcmp(name, "mov")) { + ops = &mov_ops; + } else { + if (name[0] == 'c' && + (name[1] == 'w' || name[1] == 'x')) + name += 2; + + if (name[0] == 'b') { + const char *cond = name + 1; + + if (cond[0] == 'r') { + if (is_branch_reg_cond(cond + 1)) + ops = &jump_ops; + } else if (is_branch_cond(cond)) { + ops = &jump_ops; + } + } else if (name[0] == 'f' && name[1] == 'b') { + if (is_branch_float_cond(name + 2)) + ops = &jump_ops; + } + } + + if (ops) + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = sparc__associate_instruction_ops; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c @@ -592,6 +592,9 @@ static void record__init_features(struct record *rec) if (!rec->opts.full_auxtrace) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) + perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__init_features(rec); + if (rec->opts.use_clockid && rec->opts.clockid_res_ns) + session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; + if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, @@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = { CLOCKID_END, }; +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + static int parse_clockid(const struct option *opt, const char *str, int unset) { struct record_opts *opts = (struct record_opts *)opt->value; @@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) /* if its a number, we're done */ if (sscanf(str, "%d", &opts->clockid) == 1) - return 0; + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); /* allow a "CLOCK_" prefix to the name */ if (!strncasecmp(str, "CLOCK_", 6)) @@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) for (cm = clockids; cm->name; cm++) { if (!strcasecmp(str, cm->name)) { opts->clockid = cm->clockid; - return 0; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); } } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c @@ -44,6 +44,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> +#include <subcmd/pager.h> #include "sane_ctype.h" @@ -912,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, - int insn, FILE *fp) + int insn, FILE *fp, int *total_cycles) { int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip, dump_insn(x, ip, inbuf, len, NULL), @@ -921,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, en->flags.in_tx ? " INTX" : "", en->flags.abort ? " ABORT" : ""); if (en->flags.cycles) { - printed += fprintf(fp, " %d cycles", en->flags.cycles); + *total_cycles += en->flags.cycles; + printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles); if (insn) printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles); } @@ -978,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, u8 buffer[MAXBB]; unsigned off; struct symbol *lastsym = NULL; + int total_cycles = 0; if (!(br && br->nr)) return 0; @@ -998,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], - &x, buffer, len, 0, fp); + &x, buffer, len, 0, fp, &total_cycles); } /* Print all blocks */ @@ -1026,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp); + printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp, + &total_cycles); break; } else { printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, @@ -1104,6 +1108,35 @@ out: return printed; } +static const char *resolve_branch_sym(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al, + u64 *ip) +{ + struct addr_location addr_al; + struct perf_event_attr *attr = &evsel->attr; + const char *name = NULL; + + if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { + if (sample_addr_correlates_sym(attr)) { + thread__resolve(thread, &addr_al, sample); + if (addr_al.sym) + name = addr_al.sym->name; + else + *ip = sample->addr; + } else { + *ip = sample->addr; + } + } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { + if (al->sym) + name = al->sym->name; + else + *ip = sample->ip; + } + return name; +} + static int perf_sample__fprintf_callindent(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -1111,7 +1144,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, { struct perf_event_attr *attr = &evsel->attr; size_t depth = thread_stack__depth(thread); - struct addr_location addr_al; const char *name = NULL; static int spacing; int len = 0; @@ -1125,22 +1157,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN) depth += 1; - if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { - if (sample_addr_correlates_sym(attr)) { - thread__resolve(thread, &addr_al, sample); - if (addr_al.sym) - name = addr_al.sym->name; - else - ip = sample->addr; - } else { - ip = sample->addr; - } - } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { - if (al->sym) - name = al->sym->name; - else - ip = sample->ip; - } + name = resolve_branch_sym(sample, evsel, thread, al, &ip); if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) { dlen += fprintf(fp, "("); @@ -1646,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script, } } +static bool show_event(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al) +{ + int depth = thread_stack__depth(thread); + + if (!symbol_conf.graph_function) + return true; + + if (thread->filter) { + if (depth <= thread->filter_entry_depth) { + thread->filter = false; + return false; + } + return true; + } else { + const char *s = symbol_conf.graph_function; + u64 ip; + const char *name = resolve_branch_sym(sample, evsel, thread, al, + &ip); + unsigned nlen; + + if (!name) + return false; + nlen = strlen(name); + while (*s) { + unsigned len = strcspn(s, ","); + if (nlen == len && !strncmp(name, s, len)) { + thread->filter = true; + thread->filter_entry_depth = depth; + return true; + } + s += len; + if (*s == ',') + s++; + } + return false; + } +} + static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -1660,6 +1718,9 @@ static void process_event(struct perf_script *script, if (output[type].fields == 0) return; + if (!show_event(sample, evsel, thread, al)) + return; + ++es->samples; perf_sample__fprintf_start(sample, thread, evsel, @@ -1737,6 +1798,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(METRIC)) perf_sample__fprint_metric(script, thread, evsel, sample, fp); + + if (verbose) + fflush(fp); } static struct scripting_ops *scripting_ops; @@ -3100,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_session *session, #define perf_script__process_auxtrace_info 0 #endif +static int parse_insn_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "+insn,-event,-period", 0); + itrace_parse_synth_opts(opt, "i0ns", 0); + nanosecs = true; + return 0; +} + +static int parse_xed(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + force_pager("xed -F insn: -A -64 | less"); + return 0; +} + +static int parse_call_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); + itrace_parse_synth_opts(opt, "cewp", 0); + nanosecs = true; + return 0; +} + +static int parse_callret_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0); + itrace_parse_synth_opts(opt, "crewp", 0); + nanosecs = true; + return 0; +} + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -3109,7 +3211,10 @@ int cmd_script(int argc, const char **argv) char *rec_script_path = NULL; char *rep_script_path = NULL; struct perf_session *session; - struct itrace_synth_opts itrace_synth_opts = { .set = false, }; + struct itrace_synth_opts itrace_synth_opts = { + .set = false, + .default_no_sample = true, + }; char *script_path = NULL; const char **__argv; int i, j, err = 0; @@ -3184,6 +3289,16 @@ int cmd_script(int argc, const char **argv) "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL, + "Decode instructions from itrace", parse_insn_trace), + OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, + "Run xed disassembler on output", parse_xed), + OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL, + "Decode calls from from itrace", parse_call_trace), + OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL, + "Decode calls and returns from itrace", parse_callret_trace), + OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]", + "Only print symbols and callees with --call-trace/--call-ret-trace"), OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]", "Stop display of callgraph at these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), @@ -3417,8 +3532,10 @@ int cmd_script(int argc, const char **argv) exit(-1); } - if (!script_name) + if (!script_name) { setup_pager(); + use_browser = 0; + } session = perf_session__new(&data, false, &script.tool); if (session == NULL) @@ -3439,7 +3556,8 @@ int cmd_script(int argc, const char **argv) script.session = session; script__setup_sample_type(&script); - if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) + if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) || + symbol_conf.graph_function) itrace_synth_opts.thread_stack = true; session->itrace_synth_opts = &itrace_synth_opts; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c @@ -409,6 +409,28 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) return leader; } +static bool is_target_alive(struct target *_target, + struct thread_map *threads) +{ + struct stat st; + int i; + + if (!target__has_task(_target)) + return true; + + for (i = 0; i < threads->nr; i++) { + char path[PATH_MAX]; + + scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(), + threads->map[i].pid); + + if (!stat(path, &st)) + return true; + } + + return false; +} + static int __run_perf_stat(int argc, const char **argv, int run_idx) { int interval = stat_config.interval; @@ -579,6 +601,8 @@ try_again: enable_counters(); while (!done) { nanosleep(&ts, NULL); + if (!is_target_alive(&target, evsel_list->threads)) + break; if (timeout) break; if (interval) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c @@ -1134,11 +1134,6 @@ static int __cmd_top(struct perf_top *top) if (!target__none(&opts->target)) perf_evlist__enable(top->evlist); - /* Wait for a minimal set of events before starting the snapshot */ - perf_evlist__poll(top->evlist, 100); - - perf_top__mmap_read(top); - ret = -1; if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : display_thread), top)) { @@ -1156,6 +1151,11 @@ static int __cmd_top(struct perf_top *top) } } + /* Wait for a minimal set of events before starting the snapshot */ + perf_evlist__poll(top->evlist, 100); + + perf_top__mmap_read(top); + while (!done) { u64 hits = top->samples; @@ -1257,7 +1257,14 @@ int cmd_top(int argc, const char **argv) .uses_mmap = true, }, .proc_map_timeout = 500, - .overwrite = 1, + /* + * FIXME: This will lose PERF_RECORD_MMAP and other metadata + * when we pause, fix that and reenable. Probably using a + * separate evlist with a dummy event, i.e. a non-overwrite + * ring buffer just for metadata events, while PERF_RECORD_SAMPLE + * stays in overwrite mode. -acme + * */ + .overwrite = 0, }, .max_stack = sysctl__max_stack(), .annotation_opts = annotation__default_options, @@ -1372,6 +1379,8 @@ int cmd_top(int argc, const char **argv) "Show raw trace event output (do not use print fmt or plugins)"), OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), + OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite, + "Use a backward ring buffer, default: no"), OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, "number of thread to run event synthesize"), diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c @@ -89,6 +89,8 @@ struct trace { u64 base_time; FILE *output; unsigned long nr_events; + unsigned long nr_events_printed; + unsigned long max_events; struct strlist *ev_qualifier; struct { size_t nr; @@ -612,6 +614,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); void *parm; const char *name; bool show_zero; @@ -723,6 +726,10 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, + { .name = "mount", + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, + [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ + .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, @@ -832,7 +839,8 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "tkill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "umount2", .alias = "umount", }, + { .name = "umount2", .alias = "umount", + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, }, { .name = "uname", .alias = "newuname", }, { .name = "unlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, @@ -856,6 +864,18 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); } +static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +{ + int i, nmemb = ARRAY_SIZE(syscall_fmts); + + for (i = 0; i < nmemb; ++i) { + if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0) + return &syscall_fmts[i]; + } + + return NULL; +} + /* * is_exit: is this "exit" or "exit_group"? * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. @@ -1485,6 +1505,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, return scnprintf(bf, size, "arg%d: ", arg->idx); } +/* + * Check if the value is in fact zero, i.e. mask whatever needs masking, such + * as mount 'flags' argument that needs ignoring some magic flag, see comment + * in tools/perf/trace/beauty/mount_flags.c + */ +static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val) +{ + if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val) + return sc->arg_fmt[arg->idx].mask_val(arg, val); + + return val; +} + static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, struct syscall_arg *arg, unsigned long val) { @@ -1533,6 +1566,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, continue; val = syscall_arg__val(&arg, arg.idx); + /* + * Some syscall args need some mask, most don't and + * return val untouched. + */ + val = syscall__mask_val(sc, &arg, val); /* * Suppress this argument if its value is zero and @@ -1664,6 +1702,8 @@ static int trace__printf_interrupted_entry(struct trace *trace) printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; + ++trace->nr_events_printed; + return printed; } @@ -1810,12 +1850,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse int max_stack = evsel->attr.sample_max_stack ? evsel->attr.sample_max_stack : trace->max_stack; + int err; - if (machine__resolve(trace->host, &al, sample) < 0 || - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack)) + if (machine__resolve(trace->host, &al, sample) < 0) return -1; - return 0; + err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack); + addr_location__put(&al); + return err; } static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) @@ -1940,6 +1982,13 @@ errno_print: { fputc('\n', trace->output); + /* + * We only consider an 'event' for the sake of --max-events a non-filtered + * sys_enter + sys_exit and other tracepoint events. + */ + if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX) + interrupted = true; + if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) @@ -2072,14 +2121,25 @@ static void bpf_output__fprintf(struct trace *trace, { binary__fprintf(sample->raw_data, sample->raw_size, 8, bpf_output__printer, NULL, trace->output); + ++trace->nr_events_printed; } static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { - struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + struct thread *thread; int callchain_ret = 0; + /* + * Check if we called perf_evsel__disable(evsel) due to, for instance, + * this event's max_events having been hit and this is an entry coming + * from the ring buffer that we should discard, since the max events + * have already been considered/printed. + */ + if (evsel->disabled) + return 0; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); if (sample->callchain) { callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); @@ -2127,6 +2187,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); + ++trace->nr_events_printed; + + if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { + perf_evsel__disable(evsel); + perf_evsel__close(evsel); + } } } @@ -2137,8 +2203,8 @@ newline: trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); - thread__put(thread); out: + thread__put(thread); return 0; } @@ -2225,6 +2291,8 @@ static int trace__pgfault(struct trace *trace, trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + + ++trace->nr_events_printed; out: err = 0; out_put: @@ -2402,6 +2470,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st tracepoint_handler handler = evsel->handler; handler(trace, evsel, event, sample); } + + if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX) + interrupted = true; } static int trace__add_syscall_newtp(struct trace *trace) @@ -2706,7 +2777,7 @@ next_event: int timeout = done ? 100 : -1; if (!draining && perf_evlist__poll(evlist, timeout) > 0) { - if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) + if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) draining = true; goto again; @@ -3138,6 +3209,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, int len = strlen(str) + 1, err = -1, list, idx; char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); char group_name[PATH_MAX]; + struct syscall_fmt *fmt; if (strace_groups_dir == NULL) return -1; @@ -3155,12 +3227,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str, if (syscalltbl__id(trace->sctbl, s) >= 0 || syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { list = 1; + goto do_concat; + } + + fmt = syscall_fmt__find_by_alias(s); + if (fmt != NULL) { + list = 1; + s = fmt->name; } else { path__join(group_name, sizeof(group_name), strace_groups_dir, s); if (access(group_name, R_OK) == 0) list = 1; } - +do_concat: if (lists[list]) { sprintf(lists[list] + strlen(lists[list]), ",%s", s); } else { @@ -3249,6 +3328,7 @@ int cmd_trace(int argc, const char **argv) .trace_syscalls = false, .kernel_syscallchains = false, .max_stack = UINT_MAX, + .max_events = ULONG_MAX, }; const char *output_name = NULL; const struct option trace_options[] = { @@ -3301,6 +3381,8 @@ int cmd_trace(int argc, const char **argv) &record_parse_callchain_opt), OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, "Show the kernel callchains on the syscall exit path"), + OPT_ULONG(0, "max-events", &trace.max_events, + "Set the maximum number of events to print, exit after that is reached. "), OPT_UINTEGER(0, "min-stack", &trace.min_stack, "Set the minimum stack depth when parsing the callchain, " "anything below the specified depth will be ignored."), diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh @@ -5,6 +5,7 @@ HEADERS=' include/uapi/drm/drm.h include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h +include/uapi/linux/fs.h include/uapi/linux/kcmp.h include/uapi/linux/kvm.h include/uapi/linux/in.h diff --git a/tools/perf/perf.h b/tools/perf/perf.h @@ -81,6 +81,7 @@ struct record_opts { unsigned initial_delay; bool use_clockid; clockid_t clockid; + u64 clockid_res_ns; unsigned int proc_map_timeout; }; diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py @@ -1,339 +0,0 @@ -#!/usr/bin/python2 -# call-graph-from-sql.py: create call-graph from sql database -# Copyright (c) 2014-2017, Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. - -# To use this script you will need to have exported data using either the -# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those -# scripts for details. -# -# Following on from the example in the export scripts, a -# call-graph can be displayed for the pt_example database like this: -# -# python tools/perf/scripts/python/call-graph-from-sql.py pt_example -# -# Note that for PostgreSQL, this script supports connecting to remote databases -# by setting hostname, port, username, password, and dbname e.g. -# -# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" -# -# The result is a GUI window with a tree representing a context-sensitive -# call-graph. Expanding a couple of levels of the tree and adjusting column -# widths to suit will display something like: -# -# Call Graph: pt_example -# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) -# v- ls -# v- 2638:2638 -# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 -# |- unknown unknown 1 13198 0.1 1 0.0 -# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 -# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 -# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 -# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 -# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 -# >- __libc_csu_init ls 1 10354 0.1 10 0.0 -# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 -# v- main ls 1 8182043 99.6 180254 99.9 -# -# Points to note: -# The top level is a command name (comm) -# The next level is a thread (pid:tid) -# Subsequent levels are functions -# 'Count' is the number of calls -# 'Time' is the elapsed time until the function returns -# Percentages are relative to the level above -# 'Branch Count' is the total number of branches for that function and all -# functions that it calls - -import sys -from PySide.QtCore import * -from PySide.QtGui import * -from PySide.QtSql import * -from decimal import * - -class TreeItem(): - - def __init__(self, db, row, parent_item): - self.db = db - self.row = row - self.parent_item = parent_item - self.query_done = False; - self.child_count = 0 - self.child_items = [] - self.data = ["", "", "", "", "", "", ""] - self.comm_id = 0 - self.thread_id = 0 - self.call_path_id = 1 - self.branch_count = 0 - self.time = 0 - if not parent_item: - self.setUpRoot() - - def setUpRoot(self): - self.query_done = True - query = QSqlQuery(self.db) - ret = query.exec_('SELECT id, comm FROM comms') - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - while query.next(): - if not query.value(0): - continue - child_item = TreeItem(self.db, self.child_count, self) - self.child_items.append(child_item) - self.child_count += 1 - child_item.setUpLevel1(query.value(0), query.value(1)) - - def setUpLevel1(self, comm_id, comm): - self.query_done = True; - self.comm_id = comm_id - self.data[0] = comm - self.child_items = [] - self.child_count = 0 - query = QSqlQuery(self.db) - ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - while query.next(): - child_item = TreeItem(self.db, self.child_count, self) - self.child_items.append(child_item) - self.child_count += 1 - child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2)) - - def setUpLevel2(self, comm_id, thread_id, pid, tid): - self.comm_id = comm_id - self.thread_id = thread_id - self.data[0] = str(pid) + ":" + str(tid) - - def getChildItem(self, row): - return self.child_items[row] - - def getParentItem(self): - return self.parent_item - - def getRow(self): - return self.row - - def timePercent(self, b): - if not self.time: - return "0.0" - x = (b * Decimal(100)) / self.time - return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) - - def branchPercent(self, b): - if not self.branch_count: - return "0.0" - x = (b * Decimal(100)) / self.branch_count - return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) - - def addChild(self, call_path_id, name, dso, count, time, branch_count): - child_item = TreeItem(self.db, self.child_count, self) - child_item.comm_id = self.comm_id - child_item.thread_id = self.thread_id - child_item.call_path_id = call_path_id - child_item.branch_count = branch_count - child_item.time = time - child_item.data[0] = name - if dso == "[kernel.kallsyms]": - dso = "[kernel]" - child_item.data[1] = dso - child_item.data[2] = str(count) - child_item.data[3] = str(time) - child_item.data[4] = self.timePercent(time) - child_item.data[5] = str(branch_count) - child_item.data[6] = self.branchPercent(branch_count) - self.child_items.append(child_item) - self.child_count += 1 - - def selectCalls(self): - self.query_done = True; - query = QSqlQuery(self.db) - ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, ' - '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), ' - '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' - '( SELECT ip FROM call_paths where id = call_path_id ) ' - 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + - ' ORDER BY call_path_id') - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - last_call_path_id = 0 - name = "" - dso = "" - count = 0 - branch_count = 0 - total_branch_count = 0 - time = 0 - total_time = 0 - while query.next(): - if query.value(1) == last_call_path_id: - count += 1 - branch_count += query.value(2) - time += query.value(4) - query.value(3) - else: - if count: - self.addChild(last_call_path_id, name, dso, count, time, branch_count) - last_call_path_id = query.value(1) - name = query.value(5) - dso = query.value(6) - count = 1 - total_branch_count += branch_count - total_time += time - branch_count = query.value(2) - time = query.value(4) - query.value(3) - if count: - self.addChild(last_call_path_id, name, dso, count, time, branch_count) - total_branch_count += branch_count - total_time += time - # Top level does not have time or branch count, so fix that here - if total_branch_count > self.branch_count: - self.branch_count = total_branch_count - if self.branch_count: - for child_item in self.child_items: - child_item.data[6] = self.branchPercent(child_item.branch_count) - if total_time > self.time: - self.time = total_time - if self.time: - for child_item in self.child_items: - child_item.data[4] = self.timePercent(child_item.time) - - def childCount(self): - if not self.query_done: - self.selectCalls() - return self.child_count - - def columnCount(self): - return 7 - - def columnHeader(self, column): - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] - return headers[column] - - def getData(self, column): - return self.data[column] - -class TreeModel(QAbstractItemModel): - - def __init__(self, db, parent=None): - super(TreeModel, self).__init__(parent) - self.db = db - self.root = TreeItem(db, 0, None) - - def columnCount(self, parent): - return self.root.columnCount() - - def rowCount(self, parent): - if parent.isValid(): - parent_item = parent.internalPointer() - else: - parent_item = self.root - return parent_item.childCount() - - def headerData(self, section, orientation, role): - if role == Qt.TextAlignmentRole: - if section > 1: - return Qt.AlignRight - if role != Qt.DisplayRole: - return None - if orientation != Qt.Horizontal: - return None - return self.root.columnHeader(section) - - def parent(self, child): - child_item = child.internalPointer() - if child_item is self.root: - return QModelIndex() - parent_item = child_item.getParentItem() - return self.createIndex(parent_item.getRow(), 0, parent_item) - - def index(self, row, column, parent): - if parent.isValid(): - parent_item = parent.internalPointer() - else: - parent_item = self.root - child_item = parent_item.getChildItem(row) - return self.createIndex(row, column, child_item) - - def data(self, index, role): - if role == Qt.TextAlignmentRole: - if index.column() > 1: - return Qt.AlignRight - if role != Qt.DisplayRole: - return None - index_item = index.internalPointer() - return index_item.getData(index.column()) - -class MainWindow(QMainWindow): - - def __init__(self, db, dbname, parent=None): - super(MainWindow, self).__init__(parent) - - self.setObjectName("MainWindow") - self.setWindowTitle("Call Graph: " + dbname) - self.move(100, 100) - self.resize(800, 600) - style = self.style() - icon = style.standardIcon(QStyle.SP_MessageBoxInformation) - self.setWindowIcon(icon); - - self.model = TreeModel(db) - - self.view = QTreeView() - self.view.setModel(self.model) - - self.setCentralWidget(self.view) - -if __name__ == '__main__': - if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>" - raise Exception("Too few arguments") - - dbname = sys.argv[1] - - is_sqlite3 = False - try: - f = open(dbname) - if f.read(15) == "SQLite format 3": - is_sqlite3 = True - f.close() - except: - pass - - if is_sqlite3: - db = QSqlDatabase.addDatabase('QSQLITE') - else: - db = QSqlDatabase.addDatabase('QPSQL') - opts = dbname.split() - for opt in opts: - if '=' in opt: - opt = opt.split('=') - if opt[0] == 'hostname': - db.setHostName(opt[1]) - elif opt[0] == 'port': - db.setPort(int(opt[1])) - elif opt[0] == 'username': - db.setUserName(opt[1]) - elif opt[0] == 'password': - db.setPassword(opt[1]) - elif opt[0] == 'dbname': - dbname = opt[1] - else: - dbname = opt - - db.setDatabaseName(dbname) - if not db.open(): - raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) - - app = QApplication(sys.argv) - window = MainWindow(db, dbname) - window.show() - err = app.exec_() - db.close() - sys.exit(err) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py @@ -59,7 +59,7 @@ import datetime # pt_example=# \q # # An example of using the database is provided by the script -# call-graph-from-sql.py. Refer to that script for details. +# exported-sql-viewer.py. Refer to that script for details. # # Tables: # diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py @@ -40,7 +40,7 @@ import datetime # sqlite> .quit # # An example of using the database is provided by the script -# call-graph-from-sql.py. Refer to that script for details. +# exported-sql-viewer.py. Refer to that script for details. # # The database structure is practically the same as created by the script # export-to-postgresql.py. Refer to that script for details. A notable diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py @@ -0,0 +1,2128 @@ +#!/usr/bin/python2 +# SPDX-License-Identifier: GPL-2.0 +# exported-sql-viewer.py: view data from sql database +# Copyright (c) 2014-2018, Intel Corporation. + +# To use this script you will need to have exported data using either the +# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those +# scripts for details. +# +# Following on from the example in the export scripts, a +# call-graph can be displayed for the pt_example database like this: +# +# python tools/perf/scripts/python/exported-sql-viewer.py pt_example +# +# Note that for PostgreSQL, this script supports connecting to remote databases +# by setting hostname, port, username, password, and dbname e.g. +# +# python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" +# +# The result is a GUI window with a tree representing a context-sensitive +# call-graph. Expanding a couple of levels of the tree and adjusting column +# widths to suit will display something like: +# +# Call Graph: pt_example +# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +# v- ls +# v- 2638:2638 +# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 +# |- unknown unknown 1 13198 0.1 1 0.0 +# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 +# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 +# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 +# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 +# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 +# >- __libc_csu_init ls 1 10354 0.1 10 0.0 +# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 +# v- main ls 1 8182043 99.6 180254 99.9 +# +# Points to note: +# The top level is a command name (comm) +# The next level is a thread (pid:tid) +# Subsequent levels are functions +# 'Count' is the number of calls +# 'Time' is the elapsed time until the function returns +# Percentages are relative to the level above +# 'Branch Count' is the total number of branches for that function and all +# functions that it calls + +# There is also a "All branches" report, which displays branches and +# possibly disassembly. However, presently, the only supported disassembler is +# Intel XED, and additionally the object code must be present in perf build ID +# cache. To use Intel XED, libxed.so must be present. To build and install +# libxed.so: +# git clone https://github.com/intelxed/mbuild.git mbuild +# git clone https://github.com/intelxed/xed +# cd xed +# ./mfile.py --share +# sudo ./mfile.py --prefix=/usr/local install +# sudo ldconfig +# +# Example report: +# +# Time CPU Command PID TID Branch Type In Tx Branch +# 8107675239590 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so) +# 7fab593ea260 48 89 e7 mov %rsp, %rdi +# 8107675239899 2 ls 22011 22011 hardware interrupt No 7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +# 8107675241900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so) +# 7fab593ea260 48 89 e7 mov %rsp, %rdi +# 7fab593ea263 e8 c8 06 00 00 callq 0x7fab593ea930 +# 8107675241900 2 ls 22011 22011 call No 7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so) +# 7fab593ea930 55 pushq %rbp +# 7fab593ea931 48 89 e5 mov %rsp, %rbp +# 7fab593ea934 41 57 pushq %r15 +# 7fab593ea936 41 56 pushq %r14 +# 7fab593ea938 41 55 pushq %r13 +# 7fab593ea93a 41 54 pushq %r12 +# 7fab593ea93c 53 pushq %rbx +# 7fab593ea93d 48 89 fb mov %rdi, %rbx +# 7fab593ea940 48 83 ec 68 sub $0x68, %rsp +# 7fab593ea944 0f 31 rdtsc +# 7fab593ea946 48 c1 e2 20 shl $0x20, %rdx +# 7fab593ea94a 89 c0 mov %eax, %eax +# 7fab593ea94c 48 09 c2 or %rax, %rdx +# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax +# 8107675242232 2 ls 22011 22011 hardware interrupt No 7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +# 8107675242900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so) +# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax +# 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip) +# 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) + +import sys +import weakref +import threading +import string +import cPickle +import re +import os +from PySide.QtCore import * +from PySide.QtGui import * +from PySide.QtSql import * +from decimal import * +from ctypes import * +from multiprocessing import Process, Array, Value, Event + +# Data formatting helpers + +def tohex(ip): + if ip < 0: + ip += 1 << 64 + return "%x" % ip + +def offstr(offset): + if offset: + return "+0x%x" % offset + return "" + +def dsoname(name): + if name == "[kernel.kallsyms]": + return "[kernel]" + return name + +# Percent to one decimal place + +def PercentToOneDP(n, d): + if not d: + return "0.0" + x = (n * Decimal(100)) / d + return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP)) + +# Helper for queries that must not fail + +def QueryExec(query, stmt): + ret = query.exec_(stmt) + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + +# Background thread + +class Thread(QThread): + + done = Signal(object) + + def __init__(self, task, param=None, parent=None): + super(Thread, self).__init__(parent) + self.task = task + self.param = param + + def run(self): + while True: + if self.param is None: + done, result = self.task() + else: + done, result = self.task(self.param) + self.done.emit(result) + if done: + break + +# Tree data model + +class TreeModel(QAbstractItemModel): + + def __init__(self, root, parent=None): + super(TreeModel, self).__init__(parent) + self.root = root + self.last_row_read = 0 + + def Item(self, parent): + if parent.isValid(): + return parent.internalPointer() + else: + return self.root + + def rowCount(self, parent): + result = self.Item(parent).childCount() + if result < 0: + result = 0 + self.dataChanged.emit(parent, parent) + return result + + def hasChildren(self, parent): + return self.Item(parent).hasChildren() + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(section) + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def parent(self, child): + child_item = child.internalPointer() + if child_item is self.root: + return QModelIndex() + parent_item = child_item.getParentItem() + return self.createIndex(parent_item.getRow(), 0, parent_item) + + def index(self, row, column, parent): + child_item = self.Item(parent).getChildItem(row) + return self.createIndex(row, column, child_item) + + def DisplayData(self, item, index): + return item.getData(index.column()) + + def FetchIfNeeded(self, row): + if row > self.last_row_read: + self.last_row_read = row + if row + 10 >= self.root.child_count: + self.fetcher.Fetch(glb_chunk_sz) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) + if role != Qt.DisplayRole: + return None + item = index.internalPointer() + return self.DisplayData(item, index) + +# Table data model + +class TableModel(QAbstractTableModel): + + def __init__(self, parent=None): + super(TableModel, self).__init__(parent) + self.child_count = 0 + self.child_items = [] + self.last_row_read = 0 + + def Item(self, parent): + if parent.isValid(): + return parent.internalPointer() + else: + return self + + def rowCount(self, parent): + return self.child_count + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(section) + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def index(self, row, column, parent): + return self.createIndex(row, column, self.child_items[row]) + + def DisplayData(self, item, index): + return item.getData(index.column()) + + def FetchIfNeeded(self, row): + if row > self.last_row_read: + self.last_row_read = row + if row + 10 >= self.child_count: + self.fetcher.Fetch(glb_chunk_sz) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) + if role != Qt.DisplayRole: + return None + item = index.internalPointer() + return self.DisplayData(item, index) + +# Model cache + +model_cache = weakref.WeakValueDictionary() +model_cache_lock = threading.Lock() + +def LookupCreateModel(model_name, create_fn): + model_cache_lock.acquire() + try: + model = model_cache[model_name] + except: + model = None + if model is None: + model = create_fn() + model_cache[model_name] = model + model_cache_lock.release() + return model + +# Find bar + +class FindBar(): + + def __init__(self, parent, finder, is_reg_expr=False): + self.finder = finder + self.context = [] + self.last_value = None + self.last_pattern = None + + label = QLabel("Find:") + label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.textbox = QComboBox() + self.textbox.setEditable(True) + self.textbox.currentIndexChanged.connect(self.ValueChanged) + + self.progress = QProgressBar() + self.progress.setRange(0, 0) + self.progress.hide() + + if is_reg_expr: + self.pattern = QCheckBox("Regular Expression") + else: + self.pattern = QCheckBox("Pattern") + self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.next_button = QToolButton() + self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown)) + self.next_button.released.connect(lambda: self.NextPrev(1)) + + self.prev_button = QToolButton() + self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp)) + self.prev_button.released.connect(lambda: self.NextPrev(-1)) + + self.close_button = QToolButton() + self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton)) + self.close_button.released.connect(self.Deactivate) + + self.hbox = QHBoxLayout() + self.hbox.setContentsMargins(0, 0, 0, 0) + + self.hbox.addWidget(label) + self.hbox.addWidget(self.textbox) + self.hbox.addWidget(self.progress) + self.hbox.addWidget(self.pattern) + self.hbox.addWidget(self.next_button) + self.hbox.addWidget(self.prev_button) + self.hbox.addWidget(self.close_button) + + self.bar = QWidget() + self.bar.setLayout(self.hbox); + self.bar.hide() + + def Widget(self): + return self.bar + + def Activate(self): + self.bar.show() + self.textbox.setFocus() + + def Deactivate(self): + self.bar.hide() + + def Busy(self): + self.textbox.setEnabled(False) + self.pattern.hide() + self.next_button.hide() + self.prev_button.hide() + self.progress.show() + + def Idle(self): + self.textbox.setEnabled(True) + self.progress.hide() + self.pattern.show() + self.next_button.show() + self.prev_button.show() + + def Find(self, direction): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + self.last_value = value + self.last_pattern = pattern + self.finder.Find(value, direction, pattern, self.context) + + def ValueChanged(self): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + index = self.textbox.currentIndex() + data = self.textbox.itemData(index) + # Store the pattern in the combo box to keep it with the text value + if data == None: + self.textbox.setItemData(index, pattern) + else: + self.pattern.setChecked(data) + self.Find(0) + + def NextPrev(self, direction): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + if value != self.last_value: + index = self.textbox.findText(value) + # Allow for a button press before the value has been added to the combo box + if index < 0: + index = self.textbox.count() + self.textbox.addItem(value, pattern) + self.textbox.setCurrentIndex(index) + return + else: + self.textbox.setItemData(index, pattern) + elif pattern != self.last_pattern: + # Keep the pattern recorded in the combo box up to date + index = self.textbox.currentIndex() + self.textbox.setItemData(index, pattern) + self.Find(direction) + + def NotFound(self): + QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found") + +# Context-sensitive call graph data model item base + +class CallGraphLevelItemBase(object): + + def __init__(self, glb, row, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.query_done = False; + self.child_count = 0 + self.child_items = [] + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Context-sensitive call graph data model level 2+ item base + +class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + self.comm_id = comm_id + self.thread_id = thread_id + self.call_path_id = call_path_id + self.branch_count = branch_count + self.time = time + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " INNER JOIN dsos ON symbols.dso_id = dsos.id" + " WHERE parent_call_path_id = " + str(self.call_path_id) + + " AND comm_id = " + str(self.comm_id) + + " AND thread_id = " + str(self.thread_id) + + " GROUP BY call_path_id, name, short_name" + " ORDER BY call_path_id") + while query.next(): + child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model level three item + +class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + dso = dsoname(dso) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + self.dbid = call_path_id + +# Context-sensitive call graph data model level two item + +class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + self.dbid = thread_id + + def Select(self): + super(CallGraphLevelTwoItem, self).Select() + for child_item in self.child_items: + self.time += child_item.time + self.branch_count += child_item.branch_count + for child_item in self.child_items: + child_item.data[4] = PercentToOneDP(child_item.time, self.time) + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + +# Context-sensitive call graph data model level one item + +class CallGraphLevelOneItem(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, comm, parent_item): + super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) + self.data = [comm, "", "", "", "", "", ""] + self.dbid = comm_id + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT thread_id, pid, tid" + " FROM comm_threads" + " INNER JOIN threads ON thread_id = threads.id" + " WHERE comm_id = " + str(self.dbid)) + while query.next(): + child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model root item + +class CallGraphRootItem(CallGraphLevelItemBase): + + def __init__(self, glb): + super(CallGraphRootItem, self).__init__(glb, 0, None) + self.dbid = 0 + self.query_done = True; + query = QSqlQuery(glb.db) + QueryExec(query, "SELECT id, comm FROM comms") + while query.next(): + if not query.value(0): + continue + child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model + +class CallGraphModel(TreeModel): + + def __init__(self, glb, parent=None): + super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent) + self.glb = glb + + def columnCount(self, parent=None): + return 7 + + def columnHeader(self, column): + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + return headers[column] + + def columnAlignment(self, column): + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + return alignment[column] + + def FindSelect(self, value, pattern, query): + if pattern: + # postgresql and sqlite pattern patching differences: + # postgresql LIKE is case sensitive but sqlite LIKE is not + # postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not + # postgresql supports ILIKE which is case insensitive + # sqlite supports GLOB (text only) which uses * and ? and is case sensitive + if not self.glb.dbref.is_sqlite3: + # Escape % and _ + s = value.replace("%", "\%") + s = s.replace("_", "\_") + # Translate * and ? into SQL LIKE pattern characters % and _ + trans = string.maketrans("*?", "%_") + match = " LIKE '" + str(s).translate(trans) + "'" + else: + match = " GLOB '" + str(value) + "'" + else: + match = " = '" + str(value) + "'" + QueryExec(query, "SELECT call_path_id, comm_id, thread_id" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " WHERE symbols.name" + match + + " GROUP BY comm_id, thread_id, call_path_id" + " ORDER BY comm_id, thread_id, call_path_id") + + def FindPath(self, query): + # Turn the query result into a list of ids that the tree view can walk + # to open the tree at the right place. + ids = [] + parent_id = query.value(0) + while parent_id: + ids.insert(0, parent_id) + q2 = QSqlQuery(self.glb.db) + QueryExec(q2, "SELECT parent_id" + " FROM call_paths" + " WHERE id = " + str(parent_id)) + if not q2.next(): + break + parent_id = q2.value(0) + # The call path root is not used + if ids[0] == 1: + del ids[0] + ids.insert(0, query.value(2)) + ids.insert(0, query.value(1)) + return ids + + def Found(self, query, found): + if found: + return self.FindPath(query) + return [] + + def FindValue(self, value, pattern, query, last_value, last_pattern): + if last_value == value and pattern == last_pattern: + found = query.first() + else: + self.FindSelect(value, pattern, query) + found = query.next() + return self.Found(query, found) + + def FindNext(self, query): + found = query.next() + if not found: + found = query.first() + return self.Found(query, found) + + def FindPrev(self, query): + found = query.previous() + if not found: + found = query.last() + return self.Found(query, found) + + def FindThread(self, c): + if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern: + ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern) + elif c.direction > 0: + ids = self.FindNext(c.query) + else: + ids = self.FindPrev(c.query) + return (True, ids) + + def Find(self, value, direction, pattern, context, callback): + class Context(): + def __init__(self, *x): + self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x + def Update(self, *x): + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern) + if len(context): + context[0].Update(value, direction, pattern) + else: + context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None)) + # Use a thread so the UI is not blocked during the SELECT + thread = Thread(self.FindThread, context[0]) + thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection) + thread.start() + + def FindDone(self, thread, callback, ids): + callback(ids) + +# Vertical widget layout + +class VBox(): + + def __init__(self, w1, w2, w3=None): + self.vbox = QWidget() + self.vbox.setLayout(QVBoxLayout()); + + self.vbox.layout().setContentsMargins(0, 0, 0, 0) + + self.vbox.layout().addWidget(w1) + self.vbox.layout().addWidget(w2) + if w3: + self.vbox.layout().addWidget(w3) + + def Widget(self): + return self.vbox + +# Context-sensitive call graph window + +class CallGraphWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(CallGraphWindow, self).__init__(parent) + + self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x)) + + self.view = QTreeView() + self.view.setModel(self.model) + + for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): + self.view.setColumnWidth(c, w) + + self.find_bar = FindBar(self, self) + + self.vbox = VBox(self.view, self.find_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph") + + def DisplayFound(self, ids): + if not len(ids): + return False + parent = QModelIndex() + for dbid in ids: + found = False + n = self.model.rowCount(parent) + for row in xrange(n): + child = self.model.index(row, 0, parent) + if child.internalPointer().dbid == dbid: + found = True + self.view.setCurrentIndex(child) + parent = child + break + if not found: + break + return found + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.model.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, ids): + found = True + if not self.DisplayFound(ids): + found = False + self.find_bar.Idle() + if not found: + self.find_bar.NotFound() + +# Child data item finder + +class ChildDataItemFinder(): + + def __init__(self, root): + self.root = root + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5 + self.rows = [] + self.pos = 0 + + def FindSelect(self): + self.rows = [] + if self.pattern: + pattern = re.compile(self.value) + for child in self.root.child_items: + for column_data in child.data: + if re.search(pattern, str(column_data)) is not None: + self.rows.append(child.row) + break + else: + for child in self.root.child_items: + for column_data in child.data: + if self.value in str(column_data): + self.rows.append(child.row) + break + + def FindValue(self): + self.pos = 0 + if self.last_value != self.value or self.pattern != self.last_pattern: + self.FindSelect() + if not len(self.rows): + return -1 + return self.rows[self.pos] + + def FindThread(self): + if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern: + row = self.FindValue() + elif len(self.rows): + if self.direction > 0: + self.pos += 1 + if self.pos >= len(self.rows): + self.pos = 0 + else: + self.pos -= 1 + if self.pos < 0: + self.pos = len(self.rows) - 1 + row = self.rows[self.pos] + else: + row = -1 + return (True, row) + + def Find(self, value, direction, pattern, context, callback): + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern) + # Use a thread so the UI is not blocked + thread = Thread(self.FindThread) + thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection) + thread.start() + + def FindDone(self, thread, callback, row): + callback(row) + +# Number of database records to fetch in one go + +glb_chunk_sz = 10000 + +# size of pickled integer big enough for record size + +glb_nsz = 8 + +# Background process for SQL data fetcher + +class SQLFetcherProcess(): + + def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep): + # Need a unique connection name + conn_name = "SQLFetcher" + str(os.getpid()) + self.db, dbname = dbref.Open(conn_name) + self.sql = sql + self.buffer = buffer + self.head = head + self.tail = tail + self.fetch_count = fetch_count + self.fetching_done = fetching_done + self.process_target = process_target + self.wait_event = wait_event + self.fetched_event = fetched_event + self.prep = prep + self.query = QSqlQuery(self.db) + self.query_limit = 0 if "$$last_id$$" in sql else 2 + self.last_id = -1 + self.fetched = 0 + self.more = True + self.local_head = self.head.value + self.local_tail = self.tail.value + + def Select(self): + if self.query_limit: + if self.query_limit == 1: + return + self.query_limit -= 1 + stmt = self.sql.replace("$$last_id$$", str(self.last_id)) + QueryExec(self.query, stmt) + + def Next(self): + if not self.query.next(): + self.Select() + if not self.query.next(): + return None + self.last_id = self.query.value(0) + return self.prep(self.query) + + def WaitForTarget(self): + while True: + self.wait_event.clear() + target = self.process_target.value + if target > self.fetched or target < 0: + break + self.wait_event.wait() + return target + + def HasSpace(self, sz): + if self.local_tail <= self.local_head: + space = len(self.buffer) - self.local_head + if space > sz: + return True + if space >= glb_nsz: + # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer + nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL) + self.buffer[self.local_head : self.local_head + len(nd)] = nd + self.local_head = 0 + if self.local_tail - self.local_head > sz: + return True + return False + + def WaitForSpace(self, sz): + if self.HasSpace(sz): + return + while True: + self.wait_event.clear() + self.local_tail = self.tail.value + if self.HasSpace(sz): + return + self.wait_event.wait() + + def AddToBuffer(self, obj): + d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL) + n = len(d) + nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL) + sz = n + glb_nsz + self.WaitForSpace(sz) + pos = self.local_head + self.buffer[pos : pos + len(nd)] = nd + self.buffer[pos + glb_nsz : pos + sz] = d + self.local_head += sz + + def FetchBatch(self, batch_size): + fetched = 0 + while batch_size > fetched: + obj = self.Next() + if obj is None: + self.more = False + break + self.AddToBuffer(obj) + fetched += 1 + if fetched: + self.fetched += fetched + with self.fetch_count.get_lock(): + self.fetch_count.value += fetched + self.head.value = self.local_head + self.fetched_event.set() + + def Run(self): + while self.more: + target = self.WaitForTarget() + if target < 0: + break + batch_size = min(glb_chunk_sz, target - self.fetched) + self.FetchBatch(batch_size) + self.fetching_done.value = True + self.fetched_event.set() + +def SQLFetcherFn(*x): + process = SQLFetcherProcess(*x) + process.Run() + +# SQL data fetcher + +class SQLFetcher(QObject): + + done = Signal(object) + + def __init__(self, glb, sql, prep, process_data, parent=None): + super(SQLFetcher, self).__init__(parent) + self.process_data = process_data + self.more = True + self.target = 0 + self.last_target = 0 + self.fetched = 0 + self.buffer_size = 16 * 1024 * 1024 + self.buffer = Array(c_char, self.buffer_size, lock=False) + self.head = Value(c_longlong) + self.tail = Value(c_longlong) + self.local_tail = 0 + self.fetch_count = Value(c_longlong) + self.fetching_done = Value(c_bool) + self.last_count = 0 + self.process_target = Value(c_longlong) + self.wait_event = Event() + self.fetched_event = Event() + glb.AddInstanceToShutdownOnExit(self) + self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep)) + self.process.start() + self.thread = Thread(self.Thread) + self.thread.done.connect(self.ProcessData, Qt.QueuedConnection) + self.thread.start() + + def Shutdown(self): + # Tell the thread and process to exit + self.process_target.value = -1 + self.wait_event.set() + self.more = False + self.fetching_done.value = True + self.fetched_event.set() + + def Thread(self): + if not self.more: + return True, 0 + while True: + self.fetched_event.clear() + fetch_count = self.fetch_count.value + if fetch_count != self.last_count: + break + if self.fetching_done.value: + self.more = False + return True, 0 + self.fetched_event.wait() + count = fetch_count - self.last_count + self.last_count = fetch_count + self.fetched += count + return False, count + + def Fetch(self, nr): + if not self.more: + # -1 inidcates there are no more + return -1 + result = self.fetched + extra = result + nr - self.target + if extra > 0: + self.target += extra + # process_target < 0 indicates shutting down + if self.process_target.value >= 0: + self.process_target.value = self.target + self.wait_event.set() + return result + + def RemoveFromBuffer(self): + pos = self.local_tail + if len(self.buffer) - pos < glb_nsz: + pos = 0 + n = cPickle.loads(self.buffer[pos : pos + glb_nsz]) + if n == 0: + pos = 0 + n = cPickle.loads(self.buffer[0 : glb_nsz]) + pos += glb_nsz + obj = cPickle.loads(self.buffer[pos : pos + n]) + self.local_tail = pos + n + return obj + + def ProcessData(self, count): + for i in xrange(count): + obj = self.RemoveFromBuffer() + self.process_data(obj) + self.tail.value = self.local_tail + self.wait_event.set() + self.done.emit(count) + +# Fetch more records bar + +class FetchMoreRecordsBar(): + + def __init__(self, model, parent): + self.model = model + + self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:") + self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.fetch_count = QSpinBox() + self.fetch_count.setRange(1, 1000000) + self.fetch_count.setValue(10) + self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.fetch = QPushButton("Go!") + self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + self.fetch.released.connect(self.FetchMoreRecords) + + self.progress = QProgressBar() + self.progress.setRange(0, 100) + self.progress.hide() + + self.done_label = QLabel("All records fetched") + self.done_label.hide() + + self.spacer = QLabel("") + + self.close_button = QToolButton() + self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton)) + self.close_button.released.connect(self.Deactivate) + + self.hbox = QHBoxLayout() + self.hbox.setContentsMargins(0, 0, 0, 0) + + self.hbox.addWidget(self.label) + self.hbox.addWidget(self.fetch_count) + self.hbox.addWidget(self.fetch) + self.hbox.addWidget(self.spacer) + self.hbox.addWidget(self.progress) + self.hbox.addWidget(self.done_label) + self.hbox.addWidget(self.close_button) + + self.bar = QWidget() + self.bar.setLayout(self.hbox); + self.bar.show() + + self.in_progress = False + self.model.progress.connect(self.Progress) + + self.done = False + + if not model.HasMoreRecords(): + self.Done() + + def Widget(self): + return self.bar + + def Activate(self): + self.bar.show() + self.fetch.setFocus() + + def Deactivate(self): + self.bar.hide() + + def Enable(self, enable): + self.fetch.setEnabled(enable) + self.fetch_count.setEnabled(enable) + + def Busy(self): + self.Enable(False) + self.fetch.hide() + self.spacer.hide() + self.progress.show() + + def Idle(self): + self.in_progress = False + self.Enable(True) + self.progress.hide() + self.fetch.show() + self.spacer.show() + + def Target(self): + return self.fetch_count.value() * glb_chunk_sz + + def Done(self): + self.done = True + self.Idle() + self.label.hide() + self.fetch_count.hide() + self.fetch.hide() + self.spacer.hide() + self.done_label.show() + + def Progress(self, count): + if self.in_progress: + if count: + percent = ((count - self.start) * 100) / self.Target() + if percent >= 100: + self.Idle() + else: + self.progress.setValue(percent) + if not count: + # Count value of zero means no more records + self.Done() + + def FetchMoreRecords(self): + if self.done: + return + self.progress.setValue(0) + self.Busy() + self.in_progress = True + self.start = self.model.FetchMoreRecords(self.Target()) + +# Brance data model level two item + +class BranchLevelTwoItem(): + + def __init__(self, row, text, parent_item): + self.row = row + self.parent_item = parent_item + self.data = [""] * 8 + self.data[7] = text + self.level = 2 + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + return 0 + + def hasChildren(self): + return False + + def getData(self, column): + return self.data[column] + +# Brance data model level one item + +class BranchLevelOneItem(): + + def __init__(self, glb, row, data, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.child_count = 0 + self.child_items = [] + self.data = data[1:] + self.dbid = data[0] + self.level = 1 + self.query_done = False + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def Select(self): + self.query_done = True + + if not self.glb.have_disassembler: + return + + query = QSqlQuery(self.glb.db) + + QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip" + " FROM samples" + " INNER JOIN dsos ON samples.to_dso_id = dsos.id" + " INNER JOIN symbols ON samples.to_symbol_id = symbols.id" + " WHERE samples.id = " + str(self.dbid)) + if not query.next(): + return + cpu = query.value(0) + dso = query.value(1) + sym = query.value(2) + if dso == 0 or sym == 0: + return + off = query.value(3) + short_name = query.value(4) + long_name = query.value(5) + build_id = query.value(6) + sym_start = query.value(7) + ip = query.value(8) + + QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start" + " FROM samples" + " INNER JOIN symbols ON samples.symbol_id = symbols.id" + " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) + + " ORDER BY samples.id" + " LIMIT 1") + if not query.next(): + return + if query.value(0) != dso: + # Cannot disassemble from one dso to another + return + bsym = query.value(1) + boff = query.value(2) + bsym_start = query.value(3) + if bsym == 0: + return + tot = bsym_start + boff + 1 - sym_start - off + if tot <= 0 or tot > 16384: + return + + inst = self.glb.disassembler.Instruction() + f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id) + if not f: + return + mode = 0 if Is64Bit(f) else 1 + self.glb.disassembler.SetMode(inst, mode) + + buf_sz = tot + 16 + buf = create_string_buffer(tot + 16) + f.seek(sym_start + off) + buf.value = f.read(buf_sz) + buf_ptr = addressof(buf) + i = 0 + while tot > 0: + cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip) + if cnt: + byte_str = tohex(ip).rjust(16) + for k in xrange(cnt): + byte_str += " %02x" % ord(buf[i]) + i += 1 + while k < 15: + byte_str += " " + k += 1 + self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) + self.child_count += 1 + else: + return + buf_ptr += cnt + tot -= cnt + buf_sz -= cnt + ip += cnt + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Brance data model root item + +class BranchRootItem(): + + def __init__(self): + self.child_count = 0 + self.child_items = [] + self.level = 0 + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return None + + def getRow(self): + return 0 + + def childCount(self): + return self.child_count + + def hasChildren(self): + return self.child_count > 0 + + def getData(self, column): + return "" + +# Branch data preparation + +def BranchDataPrep(query): + data = [] + for i in xrange(0, 8): + data.append(query.value(i)) + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + + " (" + dsoname(query.value(11)) + ")" + " -> " + + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + + " (" + dsoname(query.value(15)) + ")") + return data + +# Branch data model + +class BranchModel(TreeModel): + + progress = Signal(object) + + def __init__(self, glb, event_id, where_clause, parent=None): + super(BranchModel, self).__init__(BranchRootItem(), parent) + self.glb = glb + self.event_id = event_id + self.more = True + self.populated = 0 + sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," + " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," + " ip, symbols.name, sym_offset, dsos.short_name," + " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" + " FROM samples" + " INNER JOIN comms ON comm_id = comms.id" + " INNER JOIN threads ON thread_id = threads.id" + " INNER JOIN branch_types ON branch_type = branch_types.id" + " INNER JOIN symbols ON symbol_id = symbols.id" + " INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id" + " INNER JOIN dsos ON samples.dso_id = dsos.id" + " INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id" + " WHERE samples.id > $$last_id$$" + where_clause + + " AND evsel_id = " + str(self.event_id) + + " ORDER BY samples.id" + " LIMIT " + str(glb_chunk_sz)) + self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample) + self.fetcher.done.connect(self.Update) + self.fetcher.Fetch(glb_chunk_sz) + + def columnCount(self, parent=None): + return 8 + + def columnHeader(self, column): + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] + + def columnFont(self, column): + if column != 7: + return None + return QFont("Monospace") + + def DisplayData(self, item, index): + if item.level == 1: + self.FetchIfNeeded(item.row) + return item.getData(index.column()) + + def AddSample(self, data): + child = BranchLevelOneItem(self.glb, self.populated, data, self.root) + self.root.child_items.append(child) + self.populated += 1 + + def Update(self, fetched): + if not fetched: + self.more = False + self.progress.emit(0) + child_count = self.root.child_count + count = self.populated - child_count + if count > 0: + parent = QModelIndex() + self.beginInsertRows(parent, child_count, child_count + count - 1) + self.insertRows(child_count, count, parent) + self.root.child_count += count + self.endInsertRows() + self.progress.emit(self.root.child_count) + + def FetchMoreRecords(self, count): + current = self.root.child_count + if self.more: + self.fetcher.Fetch(count) + else: + self.progress.emit(0) + return current + + def HasMoreRecords(self): + return self.more + +# Branch window + +class BranchWindow(QMdiSubWindow): + + def __init__(self, glb, event_id, name, where_clause, parent=None): + super(BranchWindow, self).__init__(parent) + + model_name = "Branch Events " + str(event_id) + if len(where_clause): + model_name = where_clause + " " + model_name + + self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause)) + + self.view = QTreeView() + self.view.setUniformRowHeights(True) + self.view.setModel(self.model) + + self.ResizeColumnsToContents() + + self.find_bar = FindBar(self, self, True) + + self.finder = ChildDataItemFinder(self.model.root) + + self.fetch_bar = FetchMoreRecordsBar(self.model, self) + + self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events") + + def ResizeColumnToContents(self, column, n): + # Using the view's resizeColumnToContents() here is extrememly slow + # so implement a crude alternative + mm = "MM" if column else "MMMM" + font = self.view.font() + metrics = QFontMetrics(font) + max = 0 + for row in xrange(n): + val = self.model.root.child_items[row].data[column] + len = metrics.width(str(val) + mm) + max = len if len > max else max + val = self.model.columnHeader(column) + len = metrics.width(str(val) + mm) + max = len if len > max else max + self.view.setColumnWidth(column, max) + + def ResizeColumnsToContents(self): + n = min(self.model.root.child_count, 100) + if n < 1: + # No data yet, so connect a signal to notify when there is + self.model.rowsInserted.connect(self.UpdateColumnWidths) + return + columns = self.model.columnCount() + for i in xrange(columns): + self.ResizeColumnToContents(i, n) + + def UpdateColumnWidths(self, *x): + # This only needs to be done once, so disconnect the signal now + self.model.rowsInserted.disconnect(self.UpdateColumnWidths) + self.ResizeColumnsToContents() + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.finder.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, row): + self.find_bar.Idle() + if row >= 0: + self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + else: + self.find_bar.NotFound() + +# Event list + +def GetEventList(db): + events = [] + query = QSqlQuery(db) + QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id") + while query.next(): + events.append(query.value(0)) + return events + +# SQL data preparation + +def SQLTableDataPrep(query, count): + data = [] + for i in xrange(count): + data.append(query.value(i)) + return data + +# SQL table data model item + +class SQLTableItem(): + + def __init__(self, row, data): + self.row = row + self.data = data + + def getData(self, column): + return self.data[column] + +# SQL table data model + +class SQLTableModel(TableModel): + + progress = Signal(object) + + def __init__(self, glb, sql, column_count, parent=None): + super(SQLTableModel, self).__init__(parent) + self.glb = glb + self.more = True + self.populated = 0 + self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample) + self.fetcher.done.connect(self.Update) + self.fetcher.Fetch(glb_chunk_sz) + + def DisplayData(self, item, index): + self.FetchIfNeeded(item.row) + return item.getData(index.column()) + + def AddSample(self, data): + child = SQLTableItem(self.populated, data) + self.child_items.append(child) + self.populated += 1 + + def Update(self, fetched): + if not fetched: + self.more = False + self.progress.emit(0) + child_count = self.child_count + count = self.populated - child_count + if count > 0: + parent = QModelIndex() + self.beginInsertRows(parent, child_count, child_count + count - 1) + self.insertRows(child_count, count, parent) + self.child_count += count + self.endInsertRows() + self.progress.emit(self.child_count) + + def FetchMoreRecords(self, count): + current = self.child_count + if self.more: + self.fetcher.Fetch(count) + else: + self.progress.emit(0) + return current + + def HasMoreRecords(self): + return self.more + +# SQL automatic table data model + +class SQLAutoTableModel(SQLTableModel): + + def __init__(self, glb, table_name, parent=None): + sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz) + if table_name == "comm_threads_view": + # For now, comm_threads_view has no id column + sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz) + self.column_headers = [] + query = QSqlQuery(glb.db) + if glb.dbref.is_sqlite3: + QueryExec(query, "PRAGMA table_info(" + table_name + ")") + while query.next(): + self.column_headers.append(query.value(1)) + if table_name == "sqlite_master": + sql = "SELECT * FROM " + table_name + else: + if table_name[:19] == "information_schema.": + sql = "SELECT * FROM " + table_name + select_table_name = table_name[19:] + schema = "information_schema" + else: + select_table_name = table_name + schema = "public" + QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") + while query.next(): + self.column_headers.append(query.value(0)) + super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent) + + def columnCount(self, parent=None): + return len(self.column_headers) + + def columnHeader(self, column): + return self.column_headers[column] + +# Base class for custom ResizeColumnsToContents + +class ResizeColumnsToContentsBase(QObject): + + def __init__(self, parent=None): + super(ResizeColumnsToContentsBase, self).__init__(parent) + + def ResizeColumnToContents(self, column, n): + # Using the view's resizeColumnToContents() here is extrememly slow + # so implement a crude alternative + font = self.view.font() + metrics = QFontMetrics(font) + max = 0 + for row in xrange(n): + val = self.data_model.child_items[row].data[column] + len = metrics.width(str(val) + "MM") + max = len if len > max else max + val = self.data_model.columnHeader(column) + len = metrics.width(str(val) + "MM") + max = len if len > max else max + self.view.setColumnWidth(column, max) + + def ResizeColumnsToContents(self): + n = min(self.data_model.child_count, 100) + if n < 1: + # No data yet, so connect a signal to notify when there is + self.data_model.rowsInserted.connect(self.UpdateColumnWidths) + return + columns = self.data_model.columnCount() + for i in xrange(columns): + self.ResizeColumnToContents(i, n) + + def UpdateColumnWidths(self, *x): + # This only needs to be done once, so disconnect the signal now + self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths) + self.ResizeColumnsToContents() + +# Table window + +class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): + + def __init__(self, glb, table_name, parent=None): + super(TableWindow, self).__init__(parent) + + self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name)) + + self.model = QSortFilterProxyModel() + self.model.setSourceModel(self.data_model) + + self.view = QTableView() + self.view.setModel(self.model) + self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) + self.view.verticalHeader().setVisible(False) + self.view.sortByColumn(-1, Qt.AscendingOrder) + self.view.setSortingEnabled(True) + + self.ResizeColumnsToContents() + + self.find_bar = FindBar(self, self, True) + + self.finder = ChildDataItemFinder(self.data_model) + + self.fetch_bar = FetchMoreRecordsBar(self.data_model, self) + + self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table") + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.finder.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, row): + self.find_bar.Idle() + if row >= 0: + self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + else: + self.find_bar.NotFound() + +# Table list + +def GetTableList(glb): + tables = [] + query = QSqlQuery(glb.db) + if glb.dbref.is_sqlite3: + QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name") + else: + QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name") + while query.next(): + tables.append(query.value(0)) + if glb.dbref.is_sqlite3: + tables.append("sqlite_master") + else: + tables.append("information_schema.tables") + tables.append("information_schema.views") + tables.append("information_schema.columns") + return tables + +# Action Definition + +def CreateAction(label, tip, callback, parent=None, shortcut=None): + action = QAction(label, parent) + if shortcut != None: + action.setShortcuts(shortcut) + action.setStatusTip(tip) + action.triggered.connect(callback) + return action + +# Typical application actions + +def CreateExitAction(app, parent=None): + return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit) + +# Typical MDI actions + +def CreateCloseActiveWindowAction(mdi_area): + return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area) + +def CreateCloseAllWindowsAction(mdi_area): + return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area) + +def CreateTileWindowsAction(mdi_area): + return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area) + +def CreateCascadeWindowsAction(mdi_area): + return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area) + +def CreateNextWindowAction(mdi_area): + return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild) + +def CreatePreviousWindowAction(mdi_area): + return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild) + +# Typical MDI window menu + +class WindowMenu(): + + def __init__(self, mdi_area, menu): + self.mdi_area = mdi_area + self.window_menu = menu.addMenu("&Windows") + self.close_active_window = CreateCloseActiveWindowAction(mdi_area) + self.close_all_windows = CreateCloseAllWindowsAction(mdi_area) + self.tile_windows = CreateTileWindowsAction(mdi_area) + self.cascade_windows = CreateCascadeWindowsAction(mdi_area) + self.next_window = CreateNextWindowAction(mdi_area) + self.previous_window = CreatePreviousWindowAction(mdi_area) + self.window_menu.aboutToShow.connect(self.Update) + + def Update(self): + self.window_menu.clear() + sub_window_count = len(self.mdi_area.subWindowList()) + have_sub_windows = sub_window_count != 0 + self.close_active_window.setEnabled(have_sub_windows) + self.close_all_windows.setEnabled(have_sub_windows) + self.tile_windows.setEnabled(have_sub_windows) + self.cascade_windows.setEnabled(have_sub_windows) + self.next_window.setEnabled(have_sub_windows) + self.previous_window.setEnabled(have_sub_windows) + self.window_menu.addAction(self.close_active_window) + self.window_menu.addAction(self.close_all_windows) + self.window_menu.addSeparator() + self.window_menu.addAction(self.tile_windows) + self.window_menu.addAction(self.cascade_windows) + self.window_menu.addSeparator() + self.window_menu.addAction(self.next_window) + self.window_menu.addAction(self.previous_window) + if sub_window_count == 0: + return + self.window_menu.addSeparator() + nr = 1 + for sub_window in self.mdi_area.subWindowList(): + label = str(nr) + " " + sub_window.name + if nr < 10: + label = "&" + label + action = self.window_menu.addAction(label) + action.setCheckable(True) + action.setChecked(sub_window == self.mdi_area.activeSubWindow()) + action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) + self.window_menu.addAction(action) + nr += 1 + + def setActiveSubWindow(self, nr): + self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) + +# Font resize + +def ResizeFont(widget, diff): + font = widget.font() + sz = font.pointSize() + font.setPointSize(sz + diff) + widget.setFont(font) + +def ShrinkFont(widget): + ResizeFont(widget, -1) + +def EnlargeFont(widget): + ResizeFont(widget, 1) + +# Unique name for sub-windows + +def NumberedWindowName(name, nr): + if nr > 1: + name += " <" + str(nr) + ">" + return name + +def UniqueSubWindowName(mdi_area, name): + nr = 1 + while True: + unique_name = NumberedWindowName(name, nr) + ok = True + for sub_window in mdi_area.subWindowList(): + if sub_window.name == unique_name: + ok = False + break + if ok: + return unique_name + nr += 1 + +# Add a sub-window + +def AddSubWindow(mdi_area, sub_window, name): + unique_name = UniqueSubWindowName(mdi_area, name) + sub_window.setMinimumSize(200, 100) + sub_window.resize(800, 600) + sub_window.setWindowTitle(unique_name) + sub_window.setAttribute(Qt.WA_DeleteOnClose) + sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon)) + sub_window.name = unique_name + mdi_area.addSubWindow(sub_window) + sub_window.show() + +# Main window + +class MainWindow(QMainWindow): + + def __init__(self, glb, parent=None): + super(MainWindow, self).__init__(parent) + + self.glb = glb + + self.setWindowTitle("Exported SQL Viewer: " + glb.dbname) + self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) + self.setMinimumSize(200, 100) + + self.mdi_area = QMdiArea() + self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded) + self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) + + self.setCentralWidget(self.mdi_area) + + menu = self.menuBar() + + file_menu = menu.addMenu("&File") + file_menu.addAction(CreateExitAction(glb.app, self)) + + edit_menu = menu.addMenu("&Edit") + edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) + edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)])) + edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")])) + edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) + + reports_menu = menu.addMenu("&Reports") + reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) + + self.EventMenu(GetEventList(glb.db), reports_menu) + + self.TableMenu(GetTableList(glb), menu) + + self.window_menu = WindowMenu(self.mdi_area, menu) + + def Find(self): + win = self.mdi_area.activeSubWindow() + if win: + try: + win.find_bar.Activate() + except: + pass + + def FetchMoreRecords(self): + win = self.mdi_area.activeSubWindow() + if win: + try: + win.fetch_bar.Activate() + except: + pass + + def ShrinkFont(self): + win = self.mdi_area.activeSubWindow() + ShrinkFont(win.view) + + def EnlargeFont(self): + win = self.mdi_area.activeSubWindow() + EnlargeFont(win.view) + + def EventMenu(self, events, reports_menu): + branches_events = 0 + for event in events: + event = event.split(":")[0] + if event == "branches": + branches_events += 1 + dbid = 0 + for event in events: + dbid += 1 + event = event.split(":")[0] + if event == "branches": + label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + + def TableMenu(self, tables, menu): + table_menu = menu.addMenu("&Tables") + for table in tables: + table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) + + def NewCallGraph(self): + CallGraphWindow(self.glb, self) + + def NewBranchView(self, event_id): + BranchWindow(self.glb, event_id, "", "", self) + + def NewTableView(self, table_name): + TableWindow(self.glb, table_name, self) + +# XED Disassembler + +class xed_state_t(Structure): + + _fields_ = [ + ("mode", c_int), + ("width", c_int) + ] + +class XEDInstruction(): + + def __init__(self, libxed): + # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion + xedd_t = c_byte * 512 + self.xedd = xedd_t() + self.xedp = addressof(self.xedd) + libxed.xed_decoded_inst_zero(self.xedp) + self.state = xed_state_t() + self.statep = addressof(self.state) + # Buffer for disassembled instruction text + self.buffer = create_string_buffer(256) + self.bufferp = addressof(self.buffer) + +class LibXED(): + + def __init__(self): + self.libxed = CDLL("libxed.so") + + self.xed_tables_init = self.libxed.xed_tables_init + self.xed_tables_init.restype = None + self.xed_tables_init.argtypes = [] + + self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero + self.xed_decoded_inst_zero.restype = None + self.xed_decoded_inst_zero.argtypes = [ c_void_p ] + + self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode + self.xed_operand_values_set_mode.restype = None + self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ] + + self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode + self.xed_decoded_inst_zero_keep_mode.restype = None + self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ] + + self.xed_decode = self.libxed.xed_decode + self.xed_decode.restype = c_int + self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ] + + self.xed_format_context = self.libxed.xed_format_context + self.xed_format_context.restype = c_uint + self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ] + + self.xed_tables_init() + + def Instruction(self): + return XEDInstruction(self) + + def SetMode(self, inst, mode): + if mode: + inst.state.mode = 4 # 32-bit + inst.state.width = 4 # 4 bytes + else: + inst.state.mode = 1 # 64-bit + inst.state.width = 8 # 8 bytes + self.xed_operand_values_set_mode(inst.xedp, inst.statep) + + def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip): + self.xed_decoded_inst_zero_keep_mode(inst.xedp) + err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt) + if err: + return 0, "" + # Use AT&T mode (2), alternative is Intel (3) + ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0) + if not ok: + return 0, "" + # Return instruction length and the disassembled instruction text + # For now, assume the length is in byte 166 + return inst.xedd[166], inst.buffer.value + +def TryOpen(file_name): + try: + return open(file_name, "rb") + except: + return None + +def Is64Bit(f): + result = sizeof(c_void_p) + # ELF support only + pos = f.tell() + f.seek(0) + header = f.read(7) + f.seek(pos) + magic = header[0:4] + eclass = ord(header[4]) + encoding = ord(header[5]) + version = ord(header[6]) + if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1: + result = True if eclass == 2 else False + return result + +# Global data + +class Glb(): + + def __init__(self, dbref, db, dbname): + self.dbref = dbref + self.db = db + self.dbname = dbname + self.home_dir = os.path.expanduser("~") + self.buildid_dir = os.getenv("PERF_BUILDID_DIR") + if self.buildid_dir: + self.buildid_dir += "/.build-id/" + else: + self.buildid_dir = self.home_dir + "/.debug/.build-id/" + self.app = None + self.mainwindow = None + self.instances_to_shutdown_on_exit = weakref.WeakSet() + try: + self.disassembler = LibXED() + self.have_disassembler = True + except: + self.have_disassembler = False + + def FileFromBuildId(self, build_id): + file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf" + return TryOpen(file_name) + + def FileFromNamesAndBuildId(self, short_name, long_name, build_id): + # Assume current machine i.e. no support for virtualization + if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore": + file_name = os.getenv("PERF_KCORE") + f = TryOpen(file_name) if file_name else None + if f: + return f + # For now, no special handling if long_name is /proc/kcore + f = TryOpen(long_name) + if f: + return f + f = self.FileFromBuildId(build_id) + if f: + return f + return None + + def AddInstanceToShutdownOnExit(self, instance): + self.instances_to_shutdown_on_exit.add(instance) + + # Shutdown any background processes or threads + def ShutdownInstances(self): + for x in self.instances_to_shutdown_on_exit: + try: + x.Shutdown() + except: + pass + +# Database reference + +class DBRef(): + + def __init__(self, is_sqlite3, dbname): + self.is_sqlite3 = is_sqlite3 + self.dbname = dbname + + def Open(self, connection_name): + dbname = self.dbname + if self.is_sqlite3: + db = QSqlDatabase.addDatabase("QSQLITE", connection_name) + else: + db = QSqlDatabase.addDatabase("QPSQL", connection_name) + opts = dbname.split() + for opt in opts: + if "=" in opt: + opt = opt.split("=") + if opt[0] == "hostname": + db.setHostName(opt[1]) + elif opt[0] == "port": + db.setPort(int(opt[1])) + elif opt[0] == "username": + db.setUserName(opt[1]) + elif opt[0] == "password": + db.setPassword(opt[1]) + elif opt[0] == "dbname": + dbname = opt[1] + else: + dbname = opt + + db.setDatabaseName(dbname) + if not db.open(): + raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) + return db, dbname + +# Main + +def Main(): + if (len(sys.argv) < 2): + print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>" + raise Exception("Too few arguments") + + dbname = sys.argv[1] + + is_sqlite3 = False + try: + f = open(dbname) + if f.read(15) == "SQLite format 3": + is_sqlite3 = True + f.close() + except: + pass + + dbref = DBRef(is_sqlite3, dbname) + db, dbname = dbref.Open("main") + glb = Glb(dbref, db, dbname) + app = QApplication(sys.argv) + glb.app = app + mainwindow = MainWindow(glb) + glb.mainwindow = mainwindow + mainwindow.show() + err = app.exec_() + glb.ShutdownInstances() + db.close() + sys.exit(err) + +if __name__ == "__main__": + Main() diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build @@ -5,6 +5,7 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif libperf-y += kcmp.o +libperf-y += mount_flags.o libperf-y += pkey_alloc.o libperf-y += prctl.o libperf-y += sockaddr.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h @@ -24,6 +24,7 @@ struct strarray { } size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val); +size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags); struct trace; struct thread; @@ -122,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg); #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx +unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags); +#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags + +size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/cone.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/ diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef EFD_SEMAPHORE #define EFD_SEMAPHORE 1 #endif diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/fcntl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include "trace/beauty/beauty.h" #include <linux/kernel.h> diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <linux/futex.h> #ifndef FUTEX_WAIT_BITSET diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <linux/futex.h> #ifndef FUTEX_BITSET_MATCH_ANY diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/ioctl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/kcmp.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/ diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <uapi/linux/mman.h> +#include <linux/log2.h> static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) @@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot +static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ +#include "trace/beauty/generated/mmap_flags_array.c" + static DEFINE_STRARRAY(mmap_flags); + + return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags); +} + static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, struct syscall_arg *arg) { - int printed = 0, flags = arg->val; + unsigned long flags = arg->val; if (flags & MAP_ANONYMOUS) arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */ -#define P_MMAP_FLAG(n) \ - if (flags & MAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MAP_##n; \ - } - - P_MMAP_FLAG(SHARED); - P_MMAP_FLAG(PRIVATE); -#ifdef MAP_32BIT - P_MMAP_FLAG(32BIT); -#endif - P_MMAP_FLAG(ANONYMOUS); - P_MMAP_FLAG(DENYWRITE); - P_MMAP_FLAG(EXECUTABLE); - P_MMAP_FLAG(FILE); - P_MMAP_FLAG(FIXED); -#ifdef MAP_FIXED_NOREPLACE - P_MMAP_FLAG(FIXED_NOREPLACE); -#endif - P_MMAP_FLAG(GROWSDOWN); - P_MMAP_FLAG(HUGETLB); - P_MMAP_FLAG(LOCKED); - P_MMAP_FLAG(NONBLOCK); - P_MMAP_FLAG(NORESERVE); - P_MMAP_FLAG(POPULATE); - P_MMAP_FLAG(STACK); - P_MMAP_FLAG(UNINITIALIZED); -#ifdef MAP_SYNC - P_MMAP_FLAG(SYNC); -#endif -#undef P_MMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; + return mmap__scnprintf_flags(flags, bf, size); } #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 2 ] ; then + [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + header_dir=tools/include/uapi/asm-generic + arch_header_dir=tools/arch/${hostarch}/include/uapi/asm +else + header_dir=$1 + arch_header_dir=$2 +fi + +arch_mman=${arch_header_dir}/mman.h + +# those in egrep -vw are flags, we want just the bits + +printf "static const char *mmap_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +egrep -q $regex ${arch_mman} && \ +(egrep $regex ${arch_mman} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} && +(egrep $regex ${header_dir}/mman-common.h | \ + egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} && +(egrep $regex ${header_dir}/mman.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +printf "};\n" diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/mount_flags.c + * + * Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <sys/mount.h> + +static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ +#include "trace/beauty/generated/mount_flags_array.c" + static DEFINE_STRARRAY(mount_flags); + + return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags); +} + +unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags) +{ + // do_mount in fs/namespace.c: + /* + * Pre-0.97 versions of mount() didn't have a flags word. When the + * flags word was introduced its top half was required to have the + * magic value 0xC0ED, and this remained so until 2.4.0-test9. + * Therefore, if this magic number is present, it carries no + * information and must be discarded. + */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; + + return flags; +} + +size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return mount__scnprintf_flags(flags, bf, size); +} diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ + +printf "static const char *mount_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*' +egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \ + sed -r "s/$regex/\2 \2 \1/g" | sort -n | \ + xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*' +egrep $regex ${header_dir}/fs.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s + 1] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/socket.h> diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef PERF_FLAG_FD_NO_GROUP # define PERF_FLAG_FD_NO_GROUP (1UL << 0) #endif diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c @@ -1,4 +1,5 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 + size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) { int pid = arg->val; diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c @@ -1,40 +1,36 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/pkey_alloc.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" #include <linux/kernel.h> #include <linux/log2.h> -static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags) { int i, printed = 0; -#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" - static DEFINE_STRARRAY(pkey_alloc_access_rights); - - if (access_rights == 0) { - const char *s = strarray__pkey_alloc_access_rights.entries[0]; + if (flags == 0) { + const char *s = sa->entries[0]; if (s) return scnprintf(bf, size, "%s", s); return scnprintf(bf, size, "%d", 0); } - for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) { - int bit = 1 << (i - 1); + for (i = 1; i < sa->nr_entries; ++i) { + unsigned long bit = 1UL << (i - 1); - if (!(access_rights & bit)) + if (!(flags & bit)) continue; if (printed != 0) printed += scnprintf(bf + printed, size - printed, "|"); - if (strarray__pkey_alloc_access_rights.entries[i] != NULL) - printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]); + if (sa->entries[i] != NULL) + printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]); else printed += scnprintf(bf + printed, size - printed, "0x%#", bit); } @@ -42,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s return printed; } +static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +{ +#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" + static DEFINE_STRARRAY(pkey_alloc_access_rights); + + return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights); +} + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg) { unsigned long cmd = arg->val; diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/ diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/prctl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sched.h> /* diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef SECCOMP_SET_MODE_STRICT #define SECCOMP_SET_MODE_STRICT 0 #endif diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <signal.h> static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/ diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/ diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 // Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/socket.c * diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/socket.h> diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/statx.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/wait.h> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c @@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" +#include "arch/sparc/annotate/instructions.c" static struct arch architectures[] = { { @@ -170,6 +171,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "sparc", + .init = sparc__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c @@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session, #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample) { - synth_opts->instructions = true; synth_opts->branches = true; synth_opts->transactions = true; synth_opts->ptwrites = true; synth_opts->pwr_events = true; synth_opts->errors = true; - synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + if (no_sample) { + synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + synth_opts->period = 1; + synth_opts->calls = true; + } else { + synth_opts->instructions = true; + synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; + synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + } synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; synth_opts->initial_skip = 0; @@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } if (!str) { - itrace_synth_opts__set_default(synth_opts); + itrace_synth_opts__set_default(synth_opts, false); return 0; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h @@ -58,6 +58,7 @@ enum itrace_period_type { /** * struct itrace_synth_opts - AUX area tracing synthesis options. * @set: indicates whether or not options have been set + * @default_no_sample: Default to no sampling. * @inject: indicates the event (not just the sample) must be fully synthesized * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events @@ -82,6 +83,7 @@ enum itrace_period_type { */ struct itrace_synth_opts { bool set; + bool default_no_sample; bool inject; bool instructions; bool branches; @@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session, union perf_event *event); int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset); -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts); +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample); size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp); void perf_session__auxtrace_error_inc(struct perf_session *session, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c @@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) +{ + struct machine *machine; + + machine = etmq->etm->machine; + + if (address >= etmq->etm->kernel_start) { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_KERNEL; + else + return PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_USER; + else if (perf_guest) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_HYPERVISOR; + } +} + static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, size_t size, u8 *buffer) { @@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return -1; machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = cs_etm__cpu_mode(etmq, address); thread = etmq->thread; if (!thread) { @@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); sample.ip = addr; @@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.insn_len = 1; - sample.cpumode = event->header.misc; + sample.cpumode = event->sample.header.misc; if (etm->synth_opts.last_branch) { cs_etm__copy_last_branch_rb(etmq); @@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) u64 nr; struct branch_entry entries; } dummy_bs; + u64 ip; + + ip = cs_etm__last_executed_instr(etmq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); + sample.ip = ip; sample.pid = etmq->pid; sample.tid = etmq->tid; sample.addr = cs_etm__first_executed_instr(etmq->packet); @@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) sample.period = 1; sample.cpu = etmq->packet->cpu; sample.flags = 0; - sample.cpumode = PERF_RECORD_MISC_USER; + sample.cpumode = event->sample.header.misc; /* * perf report cannot handle events without a branch stack @@ -1432,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&etm->synth_opts); + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); etm->synth_opts.callchain = false; } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h @@ -63,6 +63,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; + u64 clockid_res_ns; }; extern struct perf_env perf_env; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c @@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c @@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist) struct perf_evsel *pos; evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->fd) + if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) continue; perf_evsel__disable(pos); } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c @@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->max_events = ULONG_MAX; evsel->evlist = NULL; evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->node); @@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_MAX_STACK: max_stack = term->val.max_stack; break; + case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: + evsel->max_events = term->val.max_events; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -1203,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter) int perf_evsel__enable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_ENABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0); + + if (!err) + evsel->disabled = false; + + return err; } int perf_evsel__disable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_DISABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0); + /* + * We mark it disabled here so that tools that disable a event can + * ignore events after they disable it. I.e. the ring buffer may have + * already a few more events queued up before the kernel got the stop + * request. + */ + if (!err) + evsel->disabled = true; + + return err; } int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h @@ -46,6 +46,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, @@ -65,6 +66,7 @@ struct perf_evsel_config_term { bool inherit; bool overwrite; char *branch; + unsigned long max_events; } val; bool weak; }; @@ -99,6 +101,8 @@ struct perf_evsel { struct perf_counts *prev_raw_counts; int idx; u32 ids; + unsigned long max_events; + unsigned long nr_events_printed; char *name; double scale; const char *unit; @@ -119,6 +123,7 @@ struct perf_evsel { bool snapshot; bool supported; bool needs_swap; + bool disabled; bool no_aux_samples; bool immediate; bool system_wide; diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h @@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__powerpc__) #define GEN_ELF_ARCH EM_PPC #define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__sparc__) && defined(__arch64__) +#define GEN_ELF_ARCH EM_SPARCV9 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__sparc__) +#define GEN_ELF_ARCH EM_SPARC +#define GEN_ELF_CLASS ELFCLASS32 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c @@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff, return err; } +static int write_clockid(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + return do_write(ff, &ff->ph->env.clockid_res_ns, + sizeof(ff->ph->env.clockid_res_ns)); +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) fprintf(fp, "# Core ID and Socket ID information is not available\n"); } +static void print_clockid(struct feat_fd *ff, FILE *fp) +{ + fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", + ff->ph->env.clockid_res_ns * 1000); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2531,6 +2544,15 @@ out: return ret; } +static int process_clockid(struct feat_fd *ff, + void *data __maybe_unused) +{ + if (do_read_u64(ff, &ff->ph->env.clockid_res_ns)) + return -1; + + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), + FEAT_OPR(CLOCKID, clockid, false) }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h @@ -38,6 +38,7 @@ enum { HEADER_CACHE, HEADER_SAMPLE_TIME, HEADER_MEM_TOPOLOGY, + HEADER_CLOCKID, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c @@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, return 0; } +static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip) +{ + return machine__kernel_ip(bts->machine, ip) ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, struct branch *branch) { @@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, bts->num_events++ <= bts->synth_opts.initial_skip) return 0; - event.sample.header.type = PERF_RECORD_SAMPLE; - event.sample.header.misc = PERF_RECORD_MISC_USER; - event.sample.header.size = sizeof(struct perf_event_header); - - sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = le64_to_cpu(branch->from); + sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; sample.tid = btsq->tid; sample.addr = le64_to_cpu(branch->to); @@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, sample.insn_len = btsq->intel_pt_insn.length; memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + if (bts->synth_opts.inject) { event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, @@ -910,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { bts->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&bts->synth_opts); + itrace_synth_opts__set_default(&bts->synth_opts, + session->itrace_synth_opts->default_no_sample); if (session->itrace_synth_opts) bts->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c @@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +{ + return ip >= pt->kernel_start ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) goto out_no_cache; - if (*ip >= ptq->pt->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = intel_pt_cpumode(ptq->pt, *ip); thread = ptq->thread; if (!thread) { @@ -759,7 +763,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->synth_opts.callchain) { size_t sz = sizeof(struct ip_callchain); - sz += pt->synth_opts.callchain_sz * sizeof(u64); + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); ptq->chain = zalloc(sz); if (!ptq->chain) goto out_free; @@ -1058,15 +1063,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); - if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); - sample->cpumode = PERF_RECORD_MISC_USER; sample->ip = ptq->state->from_ip; + sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->pid = ptq->pid; sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; @@ -1075,6 +1076,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->flags = ptq->flags; sample->insn_len = ptq->insn_len; memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = sample->cpumode; + event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event, @@ -1160,7 +1165,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt, if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample->ip); + pt->synth_opts.callchain_sz + 1, + sample->ip, pt->kernel_start); sample->callchain = ptq->chain; } @@ -2559,7 +2565,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&pt->synth_opts); + itrace_synth_opts__set_default(&pt->synth_opts, + session->itrace_synth_opts->default_no_sample); if (use_browser != -1) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c @@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + bool do_maps_clone = true; int err = 0; if (dump_trace) @@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + /* + * When synthesizing FORK events, we are trying to create thread + * objects for the already running tasks on the machine. + * + * Normally, for a kernel FORK event, we want to clone the parent's + * maps because that is what the kernel just did. + * + * But when synthesizing, this should not be done. If we do, we end up + * with overlapping maps as we process the sythesized MMAP2 events that + * get delivered shortly thereafter. + * + * Use the FORK event misc flags in an internal way to signal this + * situation, so we can elide the map clone when appropriate. + */ + if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC) + do_maps_clone = false; if (thread == NULL || parent == NULL || - thread__fork(thread, parent, sample->time) < 0) { + thread__fork(thread, parent, sample->time, do_maps_clone) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); err = -1; } @@ -2140,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread, return 0; } +static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, + struct callchain_cursor *cursor, + struct symbol **parent, + struct addr_location *root_al, + u8 *cpumode, int ent) +{ + int err = 0; + + while (--ent >= 0) { + u64 ip = chain->ips[ent]; + + if (ip >= PERF_CONTEXT_MAX) { + err = add_callchain_ip(thread, cursor, parent, + root_al, cpumode, ip, + false, NULL, NULL, 0); + break; + } + } + return err; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct perf_evsel *evsel, @@ -2246,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: + if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, root_al, + &cpumode, chain->nr - first_call); + if (err) + return (err < 0) ? err : 0; + } for (i = first_call, nr_entries = 0; i < chain_nr && nr_entries < max_stack; i++) { u64 ip; @@ -2260,9 +2304,15 @@ check_calls: continue; #endif ip = chain->ips[j]; - if (ip < PERF_CONTEXT_MAX) ++nr_entries; + else if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, + root_al, &cpumode, j); + if (err) + return (err < 0) ? err : 0; + continue; + } err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c @@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr", [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", @@ -1037,6 +1038,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: return config_term_common(attr, term, err); @@ -1162,6 +1167,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); break; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ enum { PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, PARSE_EVENTS__TERM_TYPE_MAX_STACK, + PARSE_EVENTS__TERM_TYPE_MAX_EVENTS, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, PARSE_EVENTS__TERM_TYPE_OVERWRITE, PARSE_EVENTS__TERM_TYPE_DRV_CFG, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l @@ -269,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } +nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c @@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) plt_entry_size = 16; break; - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + case EM_SPARC: + plt_header_size = 48; + plt_entry_size = 12; + break; + + case EM_SPARCV9: + plt_header_size = 128; + plt_entry_size = 32; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ plt_header_size = shdr_plt.sh_entsize; plt_entry_size = shdr_plt.sh_entsize; break; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h @@ -123,7 +123,8 @@ struct symbol_conf { const char *vmlinux_name, *kallsyms_name, *source_prefix, - *field_sep; + *field_sep, + *graph_function; const char *default_guest_vmlinux_name, *default_guest_kallsyms, *default_guest_modules; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c @@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread) } } +static inline u64 callchain_context(u64 ip, u64 kernel_start) +{ + return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; +} + void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip) + size_t sz, u64 ip, u64 kernel_start) { - size_t i; + u64 context = callchain_context(ip, kernel_start); + u64 last_context; + size_t i, j; - if (!thread || !thread->ts) - chain->nr = 1; - else - chain->nr = min(sz, thread->ts->cnt + 1); + if (sz < 2) { + chain->nr = 0; + return; + } - chain->ips[0] = ip; + chain->ips[0] = context; + chain->ips[1] = ip; + + if (!thread || !thread->ts) { + chain->nr = 2; + return; + } + + last_context = context; + + for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { + ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (i >= sz - 1) + break; + chain->ips[i++] = context; + last_context = context; + } + chain->ips[i] = ip; + } - for (i = 1; i < chain->nr; i++) - chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; + chain->nr = i; } struct call_return_processor * diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h @@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip); + size_t sz, u64 ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c @@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread) } static int thread__clone_map_groups(struct thread *thread, - struct thread *parent) + struct thread *parent, + bool do_maps_clone) { /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) @@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread, thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } - /* But this one is new process, copy maps. */ - if (map_groups__clone(thread, parent->mg) < 0) - return -ENOMEM; - - return 0; + return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; } -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent); + return thread__clone_map_groups(thread, parent, do_maps_clone); } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h @@ -42,6 +42,8 @@ struct thread { void *addr_space; struct unwind_libunwind_ops *unwind_libunwind_ops; #endif + bool filter; + int filter_entry_depth; }; struct machine; @@ -87,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c @@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip, Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start) + if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;