862 lines
32 KiB
Python
Executable File
862 lines
32 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
# Collect information about a crash and create a report in the directory
|
|
# specified by apport.fileutils.report_dir.
|
|
# See https://wiki.ubuntu.com/Apport for details.
|
|
#
|
|
# Copyright (c) 2006 - 2016 Canonical Ltd.
|
|
# Author: Martin Pitt <martin.pitt@ubuntu.com>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 2 of the License, or (at your
|
|
# option) any later version. See http://www.gnu.org/copyleft/gpl.html for
|
|
# the full text of the license.
|
|
|
|
import sys, os, os.path, subprocess, time, traceback, pwd, io
|
|
import signal, inspect, grp, fcntl, socket, atexit, array, struct
|
|
import errno, argparse, stat
|
|
|
|
import apport, apport.fileutils
|
|
|
|
#################################################################
|
|
#
|
|
# functions
|
|
#
|
|
#################################################################
|
|
|
|
|
|
def check_lock():
|
|
'''Abort if another instance of apport is already running.
|
|
|
|
This avoids bringing down the system to its knees if there is a series of
|
|
crashes.'''
|
|
|
|
# create a lock file
|
|
try:
|
|
fd = os.open("/var/run/apport.lock",
|
|
os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW, mode=0o600)
|
|
except OSError as e:
|
|
error_log('cannot create lock file (uid %i): %s' % (os.getuid(), str(e)))
|
|
sys.exit(1)
|
|
|
|
def error_running(*args):
|
|
error_log('another apport instance is already running, aborting')
|
|
sys.exit(1)
|
|
|
|
original_handler = signal.signal(signal.SIGALRM, error_running)
|
|
signal.alarm(30) # Timeout after that many seconds
|
|
try:
|
|
fcntl.lockf(fd, fcntl.LOCK_EX)
|
|
except IOError:
|
|
error_running()
|
|
finally:
|
|
signal.alarm(0)
|
|
signal.signal(signal.SIGALRM, original_handler)
|
|
|
|
|
|
(pidstat, crash_uid, crash_gid, cwd, proc_pid_fd) = (None, None, None, None, None)
|
|
|
|
|
|
def proc_pid_opener(path, flags):
|
|
return os.open(path, flags, dir_fd=proc_pid_fd)
|
|
|
|
|
|
def get_pid_info(pid):
|
|
'''Read /proc information about pid'''
|
|
|
|
global pidstat, crash_uid, crash_gid, cwd, proc_pid_fd
|
|
|
|
proc_pid_fd = os.open('/proc/%s' % pid, os.O_RDONLY | os.O_PATH | os.O_DIRECTORY)
|
|
|
|
# unhandled exceptions on missing or invalidly formatted files are okay
|
|
# here -- we want to know in the log file
|
|
pidstat = os.stat('stat', dir_fd=proc_pid_fd)
|
|
|
|
# determine UID and GID of the target process; do *not* use the owner of
|
|
# /proc/pid/stat, as that will be root for setuid or unreadable programs!
|
|
# (this matters when suid_dumpable is enabled)
|
|
with open('status', opener=proc_pid_opener) as status_file:
|
|
contents = status_file.read()
|
|
(crash_uid, crash_gid) = apport.fileutils.get_uid_and_gid(contents)
|
|
|
|
assert crash_uid is not None, 'failed to parse Uid'
|
|
assert crash_gid is not None, 'failed to parse Gid'
|
|
|
|
cwd = os.open('cwd', os.O_RDONLY | os.O_PATH | os.O_DIRECTORY, dir_fd=proc_pid_fd)
|
|
|
|
|
|
def get_process_starttime():
|
|
'''Get the starttime of the process using proc_pid_fd'''
|
|
|
|
with open("stat", opener=proc_pid_opener) as stat_file:
|
|
contents = stat_file.read()
|
|
return apport.fileutils.get_starttime(contents)
|
|
|
|
|
|
def get_apport_starttime():
|
|
'''Get the Apport process starttime'''
|
|
|
|
with open("/proc/%s/stat" % os.getpid()) as stat_file:
|
|
contents = stat_file.read()
|
|
return apport.fileutils.get_starttime(contents)
|
|
|
|
|
|
def drop_privileges():
|
|
'''Change effective user and group to crash_[ug]id
|
|
'''
|
|
# Drop any supplemental groups, or we'll still be in the root group
|
|
if os.getuid() == 0:
|
|
os.setgroups([])
|
|
assert os.getgroups() == []
|
|
os.setregid(-1, crash_gid)
|
|
os.setreuid(-1, crash_uid)
|
|
assert os.getegid() == crash_gid
|
|
assert os.geteuid() == crash_uid
|
|
|
|
|
|
def recover_privileges():
|
|
'''Change effective user and group back to real uid and gid
|
|
'''
|
|
os.setregid(-1, os.getgid())
|
|
os.setreuid(-1, os.getuid())
|
|
assert os.getegid() == os.getgid()
|
|
assert os.geteuid() == os.getuid()
|
|
|
|
|
|
def init_error_log():
|
|
'''Open a suitable error log if sys.stderr is not a tty.'''
|
|
|
|
if not os.isatty(2):
|
|
log = os.environ.get('APPORT_LOG_FILE', '/var/log/apport.log')
|
|
try:
|
|
f = os.open(log, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
|
|
try:
|
|
admgid = grp.getgrnam('adm')[2]
|
|
os.chown(log, -1, admgid)
|
|
os.chmod(log, 0o640)
|
|
except KeyError:
|
|
pass # if group adm doesn't exist, just leave it as root
|
|
except OSError: # on a permission error, don't touch stderr
|
|
return
|
|
os.dup2(f, 1)
|
|
os.dup2(f, 2)
|
|
sys.stderr = os.fdopen(2, 'wb')
|
|
if sys.version_info.major >= 3:
|
|
sys.stderr = io.TextIOWrapper(sys.stderr)
|
|
sys.stdout = sys.stderr
|
|
|
|
|
|
def error_log(msg):
|
|
'''Output something to the error log.'''
|
|
|
|
apport.error('apport (pid %s) %s: %s', os.getpid(), time.asctime(), msg)
|
|
|
|
|
|
def _log_signal_handler(sgn, frame):
|
|
'''Internal apport signal handler. Just log the signal handler and exit.'''
|
|
|
|
# reset handler so that we do not get stuck in loops
|
|
signal.signal(sgn, signal.SIG_IGN)
|
|
try:
|
|
error_log('Got signal %i, aborting; frame:' % sgn)
|
|
for s in inspect.stack():
|
|
error_log(str(s))
|
|
except Exception:
|
|
pass
|
|
sys.exit(1)
|
|
|
|
|
|
def setup_signals():
|
|
'''Install a signal handler for all crash-like signals, so that apport is
|
|
not called on itself when apport crashed.'''
|
|
|
|
signal.signal(signal.SIGILL, _log_signal_handler)
|
|
signal.signal(signal.SIGABRT, _log_signal_handler)
|
|
signal.signal(signal.SIGFPE, _log_signal_handler)
|
|
signal.signal(signal.SIGSEGV, _log_signal_handler)
|
|
signal.signal(signal.SIGPIPE, _log_signal_handler)
|
|
signal.signal(signal.SIGBUS, _log_signal_handler)
|
|
|
|
|
|
def write_user_coredump(
|
|
pid, timestamp, limit, coredump_fd=None, from_report=None
|
|
):
|
|
'''Write the core into a directory if ulimit requests it.'''
|
|
|
|
# three cases:
|
|
# limit == 0: do not write anything
|
|
# limit < 0: unlimited, write out everything
|
|
# limit nonzero: crashed process' core size ulimit in bytes
|
|
|
|
if limit == 0:
|
|
return
|
|
|
|
# don't write a core dump for suid/sgid/unreadable or otherwise
|
|
# protected executables, in accordance with core(5)
|
|
# (suid_dumpable==2 and core_pattern restrictions); when this happens,
|
|
# /proc/pid/stat is owned by root (or the user suid'ed to), but we already
|
|
# changed to the crashed process' uid
|
|
assert pidstat, 'pidstat not initialized'
|
|
if pidstat.st_uid != crash_uid or pidstat.st_gid != crash_gid:
|
|
error_log('disabling core dump for suid/sgid/unreadable executable')
|
|
return
|
|
|
|
(core_name, core_path) = apport.fileutils.get_core_path(pid,
|
|
options.executable_path,
|
|
crash_uid,
|
|
timestamp,
|
|
proc_pid_fd)
|
|
|
|
try:
|
|
# Limit number of core files to prevent DoS
|
|
apport.fileutils.clean_core_directory(crash_uid)
|
|
core_file = os.open(core_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode=0o400, dir_fd=cwd)
|
|
except (OSError, IOError):
|
|
return
|
|
|
|
error_log('writing core dump to %s (limit: %s)' % (core_name, str(limit)))
|
|
|
|
written = 0
|
|
|
|
# Priming read
|
|
if from_report:
|
|
r = apport.Report()
|
|
r.load(from_report)
|
|
core_size = len(r['CoreDump'])
|
|
if limit > 0 and core_size > limit:
|
|
error_log('aborting core dump writing, size %i exceeds current limit' % core_size)
|
|
os.close(core_file)
|
|
os.unlink(core_path, dir_fd=cwd)
|
|
return
|
|
error_log('writing core dump %s of size %i' % (core_name, core_size))
|
|
os.write(core_file, r['CoreDump'])
|
|
else:
|
|
block = os.read(coredump_fd, 1048576)
|
|
|
|
while True:
|
|
size = len(block)
|
|
if size == 0:
|
|
break
|
|
written += size
|
|
if limit > 0 and written > limit:
|
|
error_log('aborting core dump writing, size exceeds current limit %i' % limit)
|
|
os.close(core_file)
|
|
os.unlink(core_path, dir_fd=cwd)
|
|
return
|
|
if os.write(core_file, block) != size:
|
|
error_log('aborting core dump writing, could not write')
|
|
os.close(core_file)
|
|
os.unlink(core_path, dir_fd=cwd)
|
|
return
|
|
block = os.read(coredump_fd, 1048576)
|
|
|
|
# Make sure the user can read it
|
|
os.fchown(core_file, crash_uid, -1)
|
|
os.close(core_file)
|
|
|
|
|
|
def usable_ram():
|
|
'''Return how many bytes of RAM is currently available that can be
|
|
allocated without causing major thrashing.'''
|
|
|
|
# abuse our excellent RFC822 parser to parse /proc/meminfo
|
|
r = apport.Report()
|
|
with open('/proc/meminfo', 'rb') as f:
|
|
r.load(f)
|
|
|
|
memfree = int(r['MemFree'].split()[0])
|
|
cached = int(r['Cached'].split()[0])
|
|
writeback = int(r['Writeback'].split()[0])
|
|
|
|
return (memfree + cached - writeback) * 1024
|
|
|
|
|
|
def _run_with_output_limit_and_timeout(args, output_limit, timeout, close_fds=True, env=None):
|
|
'''Run command like subprocess.run() but with output limit and timeout.
|
|
|
|
Return (stdout, stderr).'''
|
|
|
|
stdout = b""
|
|
stderr = b""
|
|
|
|
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
close_fds=close_fds, env=env)
|
|
try:
|
|
# Don't block so we don't deadlock
|
|
os.set_blocking(process.stdout.fileno(), False)
|
|
os.set_blocking(process.stderr.fileno(), False)
|
|
|
|
for _ in range(timeout):
|
|
alive = process.poll() is None
|
|
|
|
while len(stdout) < output_limit and len(stderr) < output_limit:
|
|
tempout = process.stdout.read(100)
|
|
if tempout:
|
|
stdout += tempout
|
|
temperr = process.stderr.read(100)
|
|
if temperr:
|
|
stderr += temperr
|
|
if not tempout and not temperr:
|
|
break
|
|
|
|
if not alive or len(stdout) >= output_limit or len(stderr) >= output_limit:
|
|
break
|
|
time.sleep(1)
|
|
finally:
|
|
process.kill()
|
|
|
|
return stdout, stderr
|
|
|
|
|
|
def is_closing_session():
|
|
'''Check if pid is in a closing user session.
|
|
|
|
During that, crashes are common as the session D-BUS and X.org are going
|
|
away, etc. These crash reports are mostly noise, so should be ignored.
|
|
'''
|
|
# Sanity check, don't do anything for root processes
|
|
if crash_uid == 0 or crash_gid == 0:
|
|
return False
|
|
|
|
with open('environ', 'rb', opener=proc_pid_opener) as e:
|
|
env = e.read().split(b'\0')
|
|
for e in env:
|
|
if e.startswith(b'DBUS_SESSION_BUS_ADDRESS='):
|
|
dbus_addr = e.split(b'=', 1)[1].decode()
|
|
break
|
|
else:
|
|
error_log('is_closing_session(): no DBUS_SESSION_BUS_ADDRESS in environment')
|
|
return False
|
|
|
|
dbus_socket = apport.fileutils.get_dbus_socket(dbus_addr)
|
|
if not dbus_socket:
|
|
error_log('is_closing_session(): Could not determine DBUS socket.')
|
|
return False
|
|
|
|
if not os.path.exists(dbus_socket):
|
|
error_log("is_closing_session(): DBUS socket doesn't exist.")
|
|
return False
|
|
|
|
# We need to drop both the real and effective uid/gid before calling
|
|
# gdbus because DBUS_SESSION_BUS_ADDRESS is untrusted and may allow
|
|
# reading arbitrary files as a noncefile. We can't just drop effective
|
|
# uid/gid as gdbus has a check to make sure it's not running in a
|
|
# setuid environment and it does so by comparing the real and effective
|
|
# ids. We don't need to drop supplemental groups here, as the privilege
|
|
# dropping code elsewhere has already done so.
|
|
real_uid = os.getuid()
|
|
real_gid = os.getgid()
|
|
try:
|
|
os.setresgid(crash_gid, crash_gid, real_gid)
|
|
os.setresuid(crash_uid, crash_uid, real_uid)
|
|
out, err = _run_with_output_limit_and_timeout(['/usr/bin/gdbus', 'call', '-e', '-d',
|
|
'org.gnome.SessionManager', '-o', '/org/gnome/SessionManager', '-m',
|
|
'org.gnome.SessionManager.IsSessionRunning', '-t', '5'],
|
|
1000, 5, env={'DBUS_SESSION_BUS_ADDRESS': dbus_addr})
|
|
|
|
if err:
|
|
error_log('gdbus call error: ' + err.decode('UTF-8'))
|
|
except OSError as e:
|
|
error_log('gdbus call failed, cannot determine running session: ' + str(e))
|
|
return False
|
|
finally:
|
|
os.setresuid(real_uid, real_uid, -1)
|
|
os.setresgid(real_gid, real_gid, -1)
|
|
|
|
error_log('debug: session gdbus call: ' + out.decode('UTF-8'))
|
|
if out.startswith(b'(false,'):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def is_systemd_watchdog_restart(signum):
|
|
'''Check if this is a restart by systemd's watchdog'''
|
|
|
|
if signum != str(signal.SIGABRT) or not os.path.isdir('/run/systemd/system'):
|
|
return False
|
|
|
|
try:
|
|
with open('cgroup', opener=proc_pid_opener) as f:
|
|
for line in f:
|
|
if 'name=systemd:' in line:
|
|
unit = line.split('/')[-1].strip()
|
|
break
|
|
else:
|
|
return False
|
|
|
|
journalctl = subprocess.Popen(['/bin/journalctl', '--output=cat', '--since=-5min', '--priority=warning',
|
|
'--unit', unit], stdout=subprocess.PIPE)
|
|
out = journalctl.communicate()[0]
|
|
return b'Watchdog timeout' in out
|
|
except (IOError, OSError) as e:
|
|
error_log('cannot determine if this crash is from systemd watchdog: %s' % e)
|
|
return False
|
|
|
|
|
|
def is_same_ns(pid, ns):
|
|
if not os.path.exists('/proc/self/ns/%s' % ns) or \
|
|
not os.path.exists('/proc/%s/ns/%s' % (pid, ns)):
|
|
# If the namespace doesn't exist, then it's obviously shared
|
|
return True
|
|
|
|
try:
|
|
if os.readlink('/proc/%s/ns/%s' % (pid, ns)) == os.readlink('/proc/self/ns/%s' % ns):
|
|
# Check that the inode for both namespaces is the same
|
|
return True
|
|
except OSError as e:
|
|
if e.errno == errno.ENOENT:
|
|
return True
|
|
else:
|
|
raise
|
|
|
|
# check to see if the process is part of the system.slice (LP: #1870060)
|
|
if os.path.exists('/proc/%s/cgroup' % pid):
|
|
|
|
global proc_pid_fd
|
|
proc_pid_fd = os.open('/proc/%s' % pid, os.O_RDONLY | os.O_PATH | os.O_DIRECTORY)
|
|
|
|
with open('cgroup', opener=proc_pid_opener) as cgroup:
|
|
for line in cgroup:
|
|
fields = line.split(':')
|
|
if fields[-1].startswith('/system.slice'):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def parse_arguments():
|
|
parser = argparse.ArgumentParser(epilog="""
|
|
Alternatively, the following command line is understood for legacy hosts:
|
|
<pid> <signal number> <core file ulimit> <dump mode> [global pid] [exe path]
|
|
""")
|
|
|
|
# TODO: Use type=int
|
|
parser.add_argument("-p", "--pid", help="process id (%%p)")
|
|
parser.add_argument("-s", "--signal-number", help="signal number (%%s)")
|
|
parser.add_argument("-c", "--core-ulimit", help="core ulimit (%%c)")
|
|
parser.add_argument("-d", "--dump-mode", help="dump mode (%%d)")
|
|
parser.add_argument("-P", "--global-pid", help="pid in root namespace (%%P)")
|
|
parser.add_argument("-u", "--uid", type=int, help="real UID (%%u)")
|
|
parser.add_argument("-g", "--gid", type=int, help="real GID (%%g)")
|
|
parser.add_argument("executable_path", nargs='*', help="path of executable (%%E)")
|
|
|
|
options, rest = parser.parse_known_args()
|
|
|
|
# Legacy command line needs to remain for the scenario where a more
|
|
# recent apport is running inside a container with an older apport
|
|
# running on the host.
|
|
if options.pid is None and len(sys.argv) == 5:
|
|
# Translate legacy command line
|
|
return argparse.Namespace(
|
|
pid=sys.argv[1],
|
|
signal_number=sys.argv[2],
|
|
core_ulimit=sys.argv[3],
|
|
dump_mode=sys.argv[4],
|
|
global_pid=None,
|
|
uid=None,
|
|
gid=None,
|
|
executable_path=None,
|
|
)
|
|
|
|
if options.pid is None:
|
|
parser.print_usage()
|
|
sys.exit(1)
|
|
|
|
for arg in rest:
|
|
error_log("Unknown argument: %s" % arg)
|
|
|
|
# In kernels before 5.3.0, an executable path with spaces may be split
|
|
# into separate arguments. If options.executable_path is a list, join
|
|
# it back into a string. Also restore directory separators.
|
|
if isinstance(options.executable_path, list):
|
|
options.executable_path = " ".join(options.executable_path)
|
|
options.executable_path = options.executable_path.replace('!', '/')
|
|
# Sanity check to prevent trickery later on
|
|
if '../' in options.executable_path:
|
|
options.executable_path = None
|
|
|
|
return options
|
|
|
|
|
|
#################################################################
|
|
#
|
|
# main
|
|
#
|
|
#################################################################
|
|
|
|
init_error_log()
|
|
|
|
# systemd socket activation
|
|
if 'LISTEN_FDS' in os.environ:
|
|
try:
|
|
from systemd.daemon import listen_fds
|
|
except ImportError:
|
|
error_log('Received a crash via apport-forward.socket, but systemd python module is not installed')
|
|
sys.exit(0)
|
|
|
|
# Extract and validate the fd
|
|
fds = listen_fds()
|
|
if len(fds) < 1:
|
|
error_log('Invalid socket activation, no fd provided')
|
|
sys.exit(1)
|
|
|
|
# Open the socket
|
|
sock = socket.fromfd(int(fds[0]), socket.AF_UNIX, socket.SOCK_STREAM)
|
|
atexit.register(sock.shutdown, socket.SHUT_RDWR)
|
|
|
|
# Replace stdin by the socket activation fd
|
|
sys.stdin.close()
|
|
|
|
fds = array.array('i')
|
|
ucreds = array.array('i')
|
|
msg, ancdata, flags, addr = sock.recvmsg(4096, 4096)
|
|
for cmsg_level, cmsg_type, cmsg_data in ancdata:
|
|
if (cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS):
|
|
fds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)])
|
|
elif (cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_CREDENTIALS):
|
|
ucreds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % ucreds.itemsize)])
|
|
|
|
sys.stdin = os.fdopen(int(fds[0]), 'r')
|
|
|
|
# Replace argv by the arguments received over the socket
|
|
sys.argv = [sys.argv[0]]
|
|
sys.argv += msg.decode().split()
|
|
if len(ucreds) >= 3:
|
|
sys.argv[1] = "%d" % ucreds[0]
|
|
|
|
if len(sys.argv) != 5:
|
|
error_log('Received a bad number of arguments from forwarder, received %d, expected 5, aborting.' % len(sys.argv))
|
|
sys.exit(1)
|
|
|
|
|
|
options = parse_arguments()
|
|
|
|
# Check if we received a valid global PID (kernel >= 3.12). If we do,
|
|
# then compare it with the local PID. If they don't match, it's an
|
|
# indication that the crash originated from another PID namespace.
|
|
# Simply log an entry in the host error log and exit 0.
|
|
if options.global_pid is not None:
|
|
host_pid = int(options.global_pid)
|
|
|
|
if not is_same_ns(host_pid, "pid") and not is_same_ns(host_pid, "mnt"):
|
|
# If the crash came from a container, don't attempt to handle
|
|
# locally as that would just result in wrong system information.
|
|
|
|
# Instead, attempt to find apport inside the container and
|
|
# forward the process information there.
|
|
|
|
proc_host_pid_fd = os.open('/proc/%d' % host_pid, os.O_RDONLY | os.O_PATH | os.O_DIRECTORY)
|
|
|
|
def proc_host_pid_opener(path, flags):
|
|
return os.open(path, flags, dir_fd=proc_host_pid_fd)
|
|
|
|
# Validate that the target socket is owned by the user namespace of the process
|
|
try:
|
|
sock_fd = os.open("root/run/apport.socket", os.O_RDONLY | os.O_PATH, dir_fd=proc_host_pid_fd)
|
|
socket_uid = os.fstat(sock_fd).st_uid
|
|
except FileNotFoundError:
|
|
error_log('host pid %s crashed in a container without apport support' %
|
|
options.global_pid)
|
|
sys.exit(0)
|
|
|
|
try:
|
|
with open("uid_map", "r", opener=proc_host_pid_opener) as fd:
|
|
if not apport.fileutils.search_map(fd, socket_uid):
|
|
error_log("user is trying to trick apport into accessing a socket that doesn't belong to the container")
|
|
sys.exit(0)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
# Validate that the crashed binary is owned by the user namespace of the process
|
|
task_uid = os.stat("exe", dir_fd=proc_host_pid_fd).st_uid
|
|
try:
|
|
with open("uid_map", "r", opener=proc_host_pid_opener) as fd:
|
|
if not apport.fileutils.search_map(fd, task_uid):
|
|
error_log("host pid %s crashed in a container with no access to the binary"
|
|
% options.global_pid)
|
|
sys.exit(0)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
task_gid = os.stat("exe", dir_fd=proc_host_pid_fd).st_gid
|
|
try:
|
|
with open("gid_map", "r", opener=proc_host_pid_opener) as fd:
|
|
if not apport.fileutils.search_map(fd, task_gid):
|
|
error_log("host pid %s crashed in a container with no access to the binary"
|
|
% options.global_pid)
|
|
sys.exit(0)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
# Now open the socket
|
|
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
try:
|
|
sock.connect('/proc/self/fd/%s' % sock_fd)
|
|
except Exception:
|
|
error_log('host pid %s crashed in a container with a broken apport' %
|
|
options.global_pid)
|
|
sys.exit(0)
|
|
|
|
# Send main arguments only
|
|
# Older apport in containers doesn't support positional arguments
|
|
args = "%s %s %s %s" % (options.pid,
|
|
options.signal_number,
|
|
options.core_ulimit,
|
|
options.dump_mode)
|
|
try:
|
|
sock.sendmsg([args.encode()], [
|
|
# Send a ucred containing the global pid
|
|
(socket.SOL_SOCKET, socket.SCM_CREDENTIALS, struct.pack("3i", host_pid, 0, 0)),
|
|
|
|
# Send fd 0 (the coredump)
|
|
(socket.SOL_SOCKET, socket.SCM_RIGHTS, array.array('i', [0]))])
|
|
sock.shutdown(socket.SHUT_RDWR)
|
|
except Exception:
|
|
error_log('Container apport failed to process crash within 30s')
|
|
|
|
sys.exit(0)
|
|
elif not is_same_ns(host_pid, "mnt"):
|
|
error_log('host pid %s crashed in a separate mount namespace, ignoring' % host_pid)
|
|
sys.exit(0)
|
|
else:
|
|
# If it doesn't look like the crash originated from within a
|
|
# full container or if the is_same_ns() function fails open (returning
|
|
# True), then take the global pid and replace the local pid with it,
|
|
# then move on to normal handling.
|
|
|
|
# This bit is needed because some software like the chrome
|
|
# sandbox will use container namespaces as a security measure but are
|
|
# still otherwise host processes. When that's the case, we need to keep
|
|
# handling those crashes locally using the global pid.
|
|
options.pid = str(host_pid)
|
|
|
|
check_lock()
|
|
|
|
try:
|
|
setup_signals()
|
|
|
|
pid = options.pid
|
|
signum = options.signal_number
|
|
core_ulimit = options.core_ulimit
|
|
dump_mode = options.dump_mode
|
|
coredump_fd = sys.stdin.fileno()
|
|
|
|
get_pid_info(pid)
|
|
|
|
# Sanity check to make sure the process wasn't replaced after the crash
|
|
# happened. The start time isn't fine-grained enough to be an adequate
|
|
# security check.
|
|
apport_start = get_apport_starttime()
|
|
process_start = get_process_starttime()
|
|
if process_start > apport_start:
|
|
error_log('process was replaced after Apport started, ignoring')
|
|
sys.exit(0)
|
|
|
|
# Make sure the process uid/gid match the ones provided by the kernel
|
|
# if available, if not, it may have been replaced
|
|
if (options.uid is not None) and (options.gid is not None):
|
|
if (options.uid != crash_uid) or (options.gid != crash_gid):
|
|
error_log("process uid/gid doesn't match expected, ignoring")
|
|
sys.exit(0)
|
|
|
|
# check if the executable was modified after the process started (e. g.
|
|
# package got upgraded in between).
|
|
exe_mtime = os.stat('exe', dir_fd=proc_pid_fd).st_mtime
|
|
process_mtime = os.lstat('cmdline', dir_fd=proc_pid_fd).st_mtime
|
|
if not os.path.exists(os.readlink('exe', dir_fd=proc_pid_fd)) or exe_mtime > process_mtime:
|
|
error_log('executable was modified after program start, ignoring')
|
|
sys.exit(0)
|
|
|
|
error_log('called for pid %s, signal %s, core limit %s, dump mode %s' % (pid, signum, core_ulimit, dump_mode))
|
|
|
|
try:
|
|
core_ulimit = int(core_ulimit)
|
|
except ValueError:
|
|
error_log('core limit is invalid, disabling core files')
|
|
core_ulimit = 0
|
|
# clamp core_ulimit to a sensible size, for -1 the kernel reports something
|
|
# absurdly big
|
|
if core_ulimit > 9223372036854775807:
|
|
error_log('ignoring implausibly big core limit, treating as unlimited')
|
|
core_ulimit = -1
|
|
|
|
if dump_mode == '2':
|
|
error_log('not creating core for pid with dump mode of %s' % (dump_mode))
|
|
# a report should be created but not a core file
|
|
core_ulimit = 0
|
|
|
|
# ignore SIGQUIT (it's usually deliberately generated by users)
|
|
if signum == str(int(signal.SIGQUIT)):
|
|
write_user_coredump(pid, process_start, core_ulimit, coredump_fd)
|
|
sys.exit(0)
|
|
|
|
info = apport.Report('Crash')
|
|
info['Signal'] = signum
|
|
core_size_limit = usable_ram() * 3 / 4
|
|
if sys.version_info.major < 3:
|
|
info['CoreDump'] = (sys.stdin, True, core_size_limit, True)
|
|
else:
|
|
# read binary data from stdio
|
|
info['CoreDump'] = (sys.stdin.detach(), True, core_size_limit, True)
|
|
|
|
# We already need this here to figure out the ExecutableName (for scripts,
|
|
# etc).
|
|
if options.executable_path is not None and os.path.exists(options.executable_path):
|
|
info['ExecutablePath'] = options.executable_path
|
|
else:
|
|
info['ExecutablePath'] = os.readlink('exe', dir_fd=proc_pid_fd)
|
|
|
|
# Drop privileges temporarily to make sure that we don't
|
|
# include information in the crash report that the user should
|
|
# not be allowed to access.
|
|
drop_privileges()
|
|
|
|
info.add_proc_info(proc_pid_fd=proc_pid_fd)
|
|
|
|
if 'ExecutablePath' not in info:
|
|
error_log('could not determine ExecutablePath, aborting')
|
|
sys.exit(1)
|
|
|
|
subject = info['ExecutablePath'].replace('/', '_')
|
|
base = '%s.%s.%s.hanging' % (subject, str(pidstat.st_uid), pid)
|
|
hanging = os.path.join(apport.fileutils.report_dir, base)
|
|
|
|
if os.path.exists(hanging):
|
|
if (os.stat('/proc/uptime').st_ctime < os.stat(hanging).st_mtime):
|
|
info['ProblemType'] = 'Hang'
|
|
os.unlink(hanging)
|
|
|
|
if 'InterpreterPath' in info:
|
|
error_log('script: %s, interpreted by %s (command line "%s")' %
|
|
(info['ExecutablePath'], info['InterpreterPath'],
|
|
info['ProcCmdline']))
|
|
else:
|
|
error_log('executable: %s (command line "%s")' %
|
|
(info['ExecutablePath'], info['ProcCmdline']))
|
|
|
|
# ignore non-package binaries (unless configured otherwise)
|
|
if not apport.fileutils.likely_packaged(info['ExecutablePath']):
|
|
if not apport.fileutils.get_config('main', 'unpackaged', False, bool=True):
|
|
error_log('executable does not belong to a package, ignoring')
|
|
# check if the user wants a core dump
|
|
recover_privileges()
|
|
write_user_coredump(pid, process_start, core_ulimit, coredump_fd)
|
|
sys.exit(0)
|
|
|
|
# ignore SIGXCPU and SIGXFSZ since this indicates some external
|
|
# influence changing soft RLIMIT values when running programs.
|
|
if signum in [str(signal.SIGXCPU), str(signal.SIGXFSZ)]:
|
|
error_log('Ignoring signal %s (caused by exceeding soft RLIMIT)' % signum)
|
|
recover_privileges()
|
|
write_user_coredump(pid, process_start, core_ulimit, coredump_fd)
|
|
sys.exit(0)
|
|
|
|
# ignore blacklisted binaries
|
|
if info.check_ignored():
|
|
error_log('executable version is blacklisted, ignoring')
|
|
sys.exit(0)
|
|
|
|
# We can now recover privileges to create the crash report file and
|
|
# write out the user coredumps
|
|
recover_privileges()
|
|
|
|
if is_closing_session():
|
|
error_log('happens for shutting down session, ignoring')
|
|
sys.exit(0)
|
|
|
|
# ignore systemd watchdog kills; most often they don't tell us the actual
|
|
# reason (kernel hang, etc.), LP #1433320
|
|
if is_systemd_watchdog_restart(signum):
|
|
error_log('Ignoring systemd watchdog restart')
|
|
sys.exit(0)
|
|
|
|
crash_counter = 0
|
|
|
|
# Create crash report file descriptor for writing the report into
|
|
# report_dir
|
|
try:
|
|
report = '%s/%s.%i.crash' % (apport.fileutils.report_dir, info['ExecutablePath'].replace('/', '_'), pidstat.st_uid)
|
|
if os.path.exists(report):
|
|
if apport.fileutils.seen_report(report):
|
|
# do not flood the logs and the user with repeated crashes
|
|
# and make sure the file isn't a FIFO or symlink
|
|
fd = os.open(report, os.O_NOFOLLOW | os.O_RDONLY | os.O_NONBLOCK)
|
|
st = os.fstat(fd)
|
|
if stat.S_ISREG(st.st_mode):
|
|
with os.fdopen(fd, 'rb') as f:
|
|
crash_counter = apport.fileutils.get_recent_crashes(f)
|
|
crash_counter += 1
|
|
if crash_counter > 1:
|
|
write_user_coredump(
|
|
pid, process_start, core_ulimit, coredump_fd
|
|
)
|
|
error_log('this executable already crashed %i times, ignoring' % crash_counter)
|
|
sys.exit(0)
|
|
# remove the old file, so that we can create the new one with
|
|
# os.O_CREAT|os.O_EXCL
|
|
os.unlink(report)
|
|
else:
|
|
error_log('apport: report %s already exists and unseen, skipping to avoid disk usage DoS' % report)
|
|
write_user_coredump(
|
|
pid, process_start, core_ulimit, coredump_fd
|
|
)
|
|
sys.exit(0)
|
|
# we prefer having a file mode of 0 while writing;
|
|
fd = os.open(report, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0)
|
|
reportfile = os.fdopen(fd, 'w+b')
|
|
assert reportfile.fileno() > sys.stderr.fileno()
|
|
|
|
# Make sure the crash reporting daemon can read this report
|
|
try:
|
|
gid = pwd.getpwnam('whoopsie').pw_gid
|
|
os.fchown(fd, pidstat.st_uid, gid)
|
|
except (OSError, KeyError):
|
|
os.fchown(fd, pidstat.st_uid, pidstat.st_gid)
|
|
except (OSError, IOError) as e:
|
|
error_log('Could not create report file: %s' % str(e))
|
|
sys.exit(1)
|
|
|
|
# Drop privileges before writing out the reportfile.
|
|
drop_privileges()
|
|
|
|
info.add_user_info()
|
|
info.add_os_info()
|
|
|
|
if crash_counter > 0:
|
|
info['CrashCounter'] = '%i' % crash_counter
|
|
|
|
try:
|
|
info.write(reportfile)
|
|
except IOError:
|
|
os.unlink(report)
|
|
raise
|
|
if 'CoreDump' not in info:
|
|
error_log('core dump exceeded %i MiB, dropped from %s to avoid memory overflow'
|
|
% (core_size_limit / 1048576, report))
|
|
|
|
# Get privileges back so the core file can be written to root-owned
|
|
# corefile directory
|
|
recover_privileges()
|
|
|
|
# make the report writable now, when it's completely written
|
|
os.fchmod(fd, 0o640)
|
|
error_log('wrote report %s' % report)
|
|
|
|
# Check if the user wants a core file. We need to create that from the
|
|
# written report, as we can only read stdin once and write_user_coredump()
|
|
# might abort reading from stdin and remove the written core file when
|
|
# core_ulimit is > 0 and smaller than the core size.
|
|
reportfile.seek(0)
|
|
write_user_coredump(pid, process_start, core_ulimit, from_report=reportfile)
|
|
|
|
except (SystemExit, KeyboardInterrupt):
|
|
raise
|
|
except Exception:
|
|
error_log('Unhandled exception:')
|
|
traceback.print_exc()
|
|
error_log('pid: %i, uid: %i, gid: %i, euid: %i, egid: %i' % (
|
|
os.getpid(), os.getuid(), os.getgid(), os.geteuid(), os.getegid()))
|
|
error_log('environment: %s' % str(os.environ))
|