3 # Copyright (C) 2010 The Android Open Source Project
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
19 # Default product ID in crash report (used if GOOGLE_CRASH_* is undefined).
22 # Base directory that contains any crash reporter state files.
23 CRASH_STATE_DIR="/data/misc/crash_reporter"
25 # File containing crash_reporter's anonymized guid.
26 GUID_FILE="${CRASH_STATE_DIR}/guid"
28 # Crash sender lock in case the sender is already running.
29 CRASH_SENDER_LOCK="${CRASH_STATE_DIR}/lock/crash_sender"
31 # Path to file that indicates a crash test is currently running.
32 CRASH_TEST_IN_PROGRESS_FILE="${CRASH_STATE_DIR}/tmp/crash-test-in-progress"
34 # Set this to 1 in the environment to allow uploading crash reports
35 # for unofficial versions.
36 FORCE_OFFICIAL=${FORCE_OFFICIAL:-0}
38 # Path to hardware class description.
39 HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID"
41 # Path to file that indicates this is a developer image.
42 LEAVE_CORE_FILE="${CRASH_STATE_DIR}/.leave_core"
44 # Path to list_proxies.
45 LIST_PROXIES="list_proxies"
47 # Maximum crashes to send per day.
48 MAX_CRASH_RATE=${MAX_CRASH_RATE:-32}
50 # File whose existence mocks crash sending. If empty we pretend the
51 # crash sending was successful, otherwise unsuccessful.
52 MOCK_CRASH_SENDING="${CRASH_STATE_DIR}/tmp/mock-crash-sending"
54 # Set this to 1 in the environment to pretend to have booted in developer
55 # mode. This is used by autotests.
56 MOCK_DEVELOPER_MODE=${MOCK_DEVELOPER_MODE:-0}
58 # Ignore PAUSE_CRASH_SENDING file if set.
59 OVERRIDE_PAUSE_SENDING=${OVERRIDE_PAUSE_SENDING:-0}
61 # File whose existence causes crash sending to be delayed (for testing).
62 # Must be stateful to enable testing kernel crashes.
63 PAUSE_CRASH_SENDING="${CRASH_STATE_DIR}/lock/crash_sender_paused"
65 # Path to a directory of restricted certificates which includes
66 # a certificate for the crash server.
67 RESTRICTED_CERTIFICATES_PATH="/system/etc/security/cacerts"
68 RESTRICTED_CERTIFICATES_PATH_GOOGLE="/system/etc/security/cacerts_google"
70 # File whose existence implies we're running and not to start again.
71 RUN_FILE="${CRASH_STATE_DIR}/run/crash_sender.pid"
73 # Maximum time to sleep between sends.
74 SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600}
76 # Set this to 1 to allow uploading of device coredumps.
77 DEVCOREDUMP_UPLOAD_FLAG_FILE="${CRASH_STATE_DIR}/device_coredump_upload_allowed"
79 # The weave configuration file.
80 WEAVE_CONF_FILE="/etc/weaved/weaved.conf"
82 # The os-release.d folder.
83 OSRELEASED_FOLDER="/etc/os-release.d"
85 # The syslog tag for all logging we emit.
86 TAG="$(basename $0)[$$]"
88 # Directory to store timestamp files indicating the uploads in the past 24
90 TIMESTAMPS_DIR="${CRASH_STATE_DIR}/crash_sender"
92 # Temp directory for this process.
95 # Crash report log file.
96 CRASH_LOG="${CRASH_STATE_DIR}/log/uploads.log"
103 lecho -psyslog.warn "$@"
106 # Returns true if mock is enabled.
108 [ -f "${MOCK_CRASH_SENDING}" ] && return 0
112 is_mock_successful() {
113 local mock_in=$(cat "${MOCK_CRASH_SENDING}")
114 [ "${mock_in}" = "" ] && return 0 # empty file means success
119 if [ -n "${TMP_DIR}" ]; then
123 if [ -n "${CRASH_SENDER_LOCK}" ]; then
124 rm -rf "${CRASH_SENDER_LOCK}"
131 # For testing purposes, emit a message to log so that we
132 # know when the test has received all the messages from this run.
133 lecho "crash_sender done."
137 is_official_image() {
138 [ ${FORCE_OFFICIAL} -ne 0 ] && return 0
139 if [ "$(getprop ro.secure)" = "1" ]; then
146 # Returns 0 if the a crash test is currently running. NOTE: Mirrors
147 # crash_collector.cc:CrashCollector::IsCrashTestInProgress().
148 is_crash_test_in_progress() {
149 [ -f "${CRASH_TEST_IN_PROGRESS_FILE}" ] && return 0
153 # Returns 0 if we should consider ourselves to be running on a developer
154 # image. NOTE: Mirrors crash_collector.cc:CrashCollector::IsDeveloperImage().
155 is_developer_image() {
156 # If we're testing crash reporter itself, we don't want to special-case
157 # for developer images.
158 is_crash_test_in_progress && return 1
159 [ -f "${LEAVE_CORE_FILE}" ] && return 0
163 # Returns 0 if we should consider ourselves to be running on a test image.
165 # If we're testing crash reporter itself, we don't want to special-case
167 is_crash_test_in_progress && return 1
168 case $(get_channel) in
174 # Returns 0 if the machine booted up in developer mode.
175 is_developer_mode() {
176 [ ${MOCK_DEVELOPER_MODE} -ne 0 ] && return 0
177 # If we're testing crash reporter itself, we don't want to special-case
178 # for developer mode.
179 is_crash_test_in_progress && return 1
180 if [ "$(getprop ro.debuggable)" = "1" ]; then
187 # Returns the path of the certificates directory to be used when sending
188 # reports to the crash server.
189 # If crash_reporter.full_certs=1, return the full certificates path.
190 # Otherwise return the Google-specific certificates path.
191 get_certificates_path() {
192 if [ "$(getprop crash_reporter.full_certs)" = "1" ]; then
193 echo "${RESTRICTED_CERTIFICATES_PATH}"
195 echo "${RESTRICTED_CERTIFICATES_PATH_GOOGLE}"
199 # Return 0 if the uploading of device coredumps is allowed.
200 is_device_coredump_upload_allowed() {
201 [ -f "${DEVCOREDUMP_UPLOAD_FLAG_FILE}" ] && return 0
205 # Generate a uniform random number in 0..max-1.
206 # POSIX arithmetic expansion requires support of at least signed long integers.
207 # On 32-bit systems, that may mean 32-bit signed integers, in which case the
208 # 32-bit random number read from /dev/urandom may be interpreted as negative
209 # when used inside an arithmetic expansion (since the high bit might be set).
210 # mksh at least is known to behave this way.
211 # For this case, simply take the absolute value, which will still give a
212 # roughly uniform random distribution for the modulo (as we are merely ignoring
213 # the high/sign bit).
214 # See corresponding Arithmetic Expansion and Arithmetic Expression sections:
215 # POSIX: http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_04
216 # mksh: http://linux.die.net/man/1/mksh
217 generate_uniform_random() {
219 local random="$(od -An -N4 -tu /dev/urandom)"
220 echo $(((random < 0 ? -random : random) % max))
223 # Check if sending a crash now does not exceed the maximum 24hr rate and
224 # commit to doing so, if not.
226 mkdir -p ${TIMESTAMPS_DIR}
227 # Only consider minidumps written in the past 24 hours by removing all older.
228 find "${TIMESTAMPS_DIR}" -mindepth 1 -mtime +1 \
230 local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w)
231 lecho "Current send rate: ${sends_in_24hrs}sends/24hrs"
232 if [ ${sends_in_24hrs} -ge ${MAX_CRASH_RATE} ]; then
233 lecho "Cannot send more crashes:"
234 lecho " current ${sends_in_24hrs}send/24hrs >= " \
235 "max ${MAX_CRASH_RATE}send/24hrs"
238 mktemp "${TIMESTAMPS_DIR}"/XXXXXX > /dev/null
242 # Gets the base part of a crash report file, such as name.01234.5678.9012 from
243 # name.01234.5678.9012.meta or name.01234.5678.9012.log.tar.xz. We make sure
244 # "name" is sanitized in CrashCollector::Sanitize to not include any periods.
246 echo "$1" | cut -d. -f-4
250 local extension="${1##*.}"
251 local filename="${1%.*}"
252 # For gzipped file, we ignore .gz and get the real extension
253 if [ "${extension}" = "gz" ]; then
254 echo "${filename##*.}"
260 # Return which kind of report the given metadata file relates to
262 local payload="$(get_key_value "$1" "payload")"
263 if [ ! -r "${payload}" ]; then
264 lecho "Missing payload: ${payload}"
268 local kind="$(get_extension "${payload}")"
269 if [ "${kind}" = "dmp" ]; then
277 local file="$1" key="$2" value
279 if [ -f "${file}/${key}" ]; then
280 # Get the value from a folder where each key is its own file. The key
281 # file's entire contents is the value.
282 value=$(cat "${file}/${key}")
283 elif [ -f "${file}" ]; then
284 # Get the value from a file that has multiple key=value combinations.
285 # Return the first entry. There shouldn't be more than one anyways.
286 # Substr at length($1) + 2 skips past the key and following = sign (awk
287 # uses 1-based indexes), but preserves embedded = characters.
288 value=$(sed -n "/^${key}[[:space:]]*=/{s:^[^=]*=::p;q}" "${file}")
291 echo "${value:-undefined}"
295 local file="$1" regex="$2"
297 cut -d '=' -f1 "${file}" | grep --color=never "${regex}"
300 # Return the channel name (sans "-channel" suffix).
302 getprop ro.product.channel | sed 's:-channel$::'
305 # Return the hardware class or "undefined".
306 get_hardware_class() {
307 if [ -r "${HWCLASS_PATH}" ]; then
308 cat "${HWCLASS_PATH}"
314 # Return the log string filtered with only JSON-safe white-listed characters.
315 filter_log_string() {
316 echo "$1" | tr -cd '[:alnum:]_.\-:;'
321 local report_payload="$(get_key_value "${meta_path}" "payload")"
322 local kind="$(get_kind "${meta_path}")"
323 local exec_name="$(get_key_value "${meta_path}" "exec_name")"
324 local url="$(get_key_value "${OSRELEASED_FOLDER}" "crash_server")"
325 local bdk_version="$(get_key_value "${meta_path}" "bdk_version")"
326 local hwclass="$(get_hardware_class)"
327 local write_payload_size="$(get_key_value "${meta_path}" "payload_size")"
328 local log="$(get_key_value "${meta_path}" "log")"
329 local sig="$(get_key_value "${meta_path}" "sig")"
330 local send_payload_size="$(stat -c "%s" "${report_payload}" 2>/dev/null)"
331 local product="$(get_key_value "${meta_path}" "product_id")"
332 local version="$(get_key_value "${meta_path}" "product_version")"
333 local upload_prefix="$(get_key_value "${meta_path}" "upload_prefix")"
335 local model_manifest_id="$(get_key_value "${WEAVE_CONF_FILE}" "model_id")"
337 # If crash_reporter.server is not set return with an error.
338 if [ -z "${url}" ]; then
339 lecho "Configuration error: crash_reporter.server not set."
344 -F "write_payload_size=${write_payload_size}" \
345 -F "send_payload_size=${send_payload_size}"
346 if [ "${sig}" != "undefined" ]; then
351 if [ -r "${report_payload}" ]; then
353 -F "upload_file_${kind}=@${report_payload}"
355 if [ "${log}" != "undefined" -a -r "${log}" ]; then
360 if [ "${upload_prefix}" = "undefined" ]; then
364 # Grab any variable that begins with upload_.
366 for k in $(get_keys "${meta_path}" "^upload_"); do
367 v="$(get_key_value "${meta_path}" "${k}")"
369 # Product & version are handled separately.
373 set -- "$@" -F "${upload_prefix}${k#upload_var_}=${v}"
376 if [ -r "${v}" ]; then
377 set -- "$@" -F "${upload_prefix}${k#upload_file_}=@${v}"
383 # If ID or VERSION_ID is undefined, we use the default product name
384 # and bdk_version from /etc/os-release.d.
385 if [ "${product}" = "undefined" ]; then
386 product="${BRILLO_PRODUCT}"
388 if [ "${version}" = "undefined" ]; then
389 version="${bdk_version}"
393 if is_test_image; then
395 elif is_developer_image; then
397 elif [ ${FORCE_OFFICIAL} -ne 0 ]; then
398 image_type="force-official"
399 elif is_mock && ! is_mock_successful; then
400 image_type="mock-fail"
404 if is_developer_mode; then
408 # Need to strip dashes ourselves as Chrome preserves it in the file
409 # nowadays. This is also what the Chrome breakpad client does.
410 guid=$(tr -d '-' < "${GUID_FILE}")
412 local error_type="$(get_key_value "${meta_path}" "error_type")"
413 [ "${error_type}" = "undefined" ] && error_type=
415 lecho "Sending crash:"
416 if [ "${product}" != "${BRILLO_PRODUCT}" ]; then
417 lecho " Sending crash report on behalf of ${product}"
419 lecho " Metadata: ${meta_path} (${kind})"
420 lecho " Payload: ${report_payload}"
421 lecho " Version: ${version}"
422 lecho " Bdk Version: ${bdk_version}"
423 [ -n "${image_type}" ] && lecho " Image type: ${image_type}"
424 [ -n "${boot_mode}" ] && lecho " Boot mode: ${boot_mode}"
426 lecho " Product: ${product}"
428 lecho " HWClass: ${hwclass}"
429 lecho " write_payload_size: ${write_payload_size}"
430 lecho " send_payload_size: ${send_payload_size}"
431 if [ "${log}" != "undefined" ]; then
432 lecho " log: @${log}"
434 if [ "${sig}" != "undefined" ]; then
438 lecho " Exec name: ${exec_name}"
439 [ -n "${error_type}" ] && lecho " Error type: ${error_type}"
441 if ! is_mock_successful; then
442 lecho "Mocking unsuccessful send"
445 lecho "Mocking successful send"
449 # Read in the first proxy, if any, for a given URL. NOTE: The
450 # double-quotes are necessary due to a bug in dash with the "local"
451 # builtin command and values that have spaces in them (see
452 # "https://bugs.launchpad.net/ubuntu/+source/dash/+bug/139097").
453 if [ -f "${LIST_PROXIES}" ]; then
455 proxy=$("${LIST_PROXIES}" --quiet "${url}")
457 if [ ${ret} -ne 0 ]; then
459 lwarn "Listing proxies failed with exit code ${ret}"
461 proxy=$(echo "${proxy}" | head -1)
464 # if a direct connection should be used, unset the proxy variable.
465 [ "${proxy}" = "direct://" ] && proxy=
466 local report_id="${TMP_DIR}/report_id"
467 local curl_stderr="${TMP_DIR}/curl_stderr"
470 curl "${url}" -f -v ${proxy:+--proxy "$proxy"} \
471 --capath "$(get_certificates_path)" --ciphers HIGH \
472 -F "prod=${product}" \
473 -F "ver=${version}" \
474 -F "bdk_version=${bdk_version}" \
475 -F "hwclass=${hwclass}" \
476 -F "exec_name=${exec_name}" \
477 -F "model_manifest_id=${model_manifest_id}" \
478 ${image_type:+-F "image_type=${image_type}"} \
479 ${boot_mode:+-F "boot_mode=${boot_mode}"} \
480 ${error_type:+-F "error_type=${error_type}"} \
488 if [ ${curl_result} -eq 0 ]; then
489 local id="$(cat "${report_id}")"
490 local timestamp="$(date +%s)"
491 local filter_prod="$(filter_log_string "${product}")"
492 local filter_exec="$(filter_log_string "${exec_name}")"
493 if [ "${filter_prod}" != "${product}" ]; then
494 lwarn "Product name filtered to: ${filter_prod}."
496 if [ "${filter_exec}" != "${exec_name}" ]; then
497 lwarn "Exec name filtered to: ${filter_exec}."
499 printf "{'time':%s,'id':'%s','product':'%s','exec_name':'%s'}\n" \
500 "${timestamp}" "${id}" "${filter_prod}" "${filter_exec}" >> "${CRASH_LOG}"
501 lecho "Crash report receipt ID ${id}"
503 lecho "Crash sending failed with exit code ${curl_result}: " \
504 "$(cat "${curl_stderr}")"
509 return ${curl_result}
512 # *.meta files always end with done=1 so we can tell if they are complete.
513 is_complete_metadata() {
514 grep -q "done=1" "$1"
517 # Remove the given report path.
523 # Send all crashes from the given directory. This applies even when we're on a
524 # 3G connection (see crosbug.com/3304 for discussion).
527 lecho "Sending crashes for ${dir}"
529 if [ ! -d "${dir}" ]; then
533 # Consider any old files which still have no corresponding meta file
534 # as orphaned, and remove them.
535 for old_file in $(find "${dir}" -mindepth 1 \
536 -mtime +1 -type f); do
537 if [ ! -e "$(get_base "${old_file}").meta" ]; then
538 lecho "Removing old orphaned file: ${old_file}."
539 rm -f -- "${old_file}"
543 # Look through all metadata (*.meta) files, oldest first. That way, the rate
544 # limit does not stall old crashes if there's a high amount of new crashes
546 # For each crash report, first evaluate conditions that might lead to its
547 # removal to honor user choice and to free disk space as soon as possible,
548 # then decide whether it should be sent right now or kept for later sending.
549 for meta_path in $(ls -1tr "${dir}"/*.meta 2>/dev/null); do
550 lecho "Considering metadata ${meta_path}."
552 local kind=$(get_kind "${meta_path}")
553 if [ "${kind}" != "minidump" ] && \
554 [ "${kind}" != "kcrash" ] && \
555 [ "${kind}" != "log" ] &&
556 [ "${kind}" != "devcore" ]; then
557 lecho "Unknown report kind ${kind}. Removing report."
558 remove_report "${meta_path}"
562 if ! is_complete_metadata "${meta_path}"; then
563 # This report is incomplete, so if it's old, just remove it.
564 local old_meta=$(find "${dir}" -mindepth 1 -name \
565 $(basename "${meta_path}") -mtime +1 -type f)
566 if [ -n "${old_meta}" ]; then
567 lecho "Removing old incomplete metadata."
568 remove_report "${meta_path}"
570 lecho "Ignoring recent incomplete metadata."
575 # Ignore device coredump if device coredump uploading is not allowed.
576 if [ "${kind}" = "devcore" ] && ! is_device_coredump_upload_allowed; then
577 lecho "Ignoring device coredump. Device coredump upload not allowed."
581 if ! is_mock && ! is_official_image; then
582 lecho "Not an official OS version. Removing crash."
583 remove_report "${meta_path}"
587 # Remove existing crashes in case user consent has not (yet) been given or
588 # has been revoked. This must come after the guest mode check because
589 # metrics_client always returns "not consented" in guest mode.
590 if ! metrics_client -c; then
591 lecho "Crash reporting is disabled. Removing crash."
592 remove_report "${meta_path}"
596 # Skip report if the upload rate is exceeded. (Don't exit right now because
597 # subsequent reports may be candidates for deletion.)
598 if ! check_rate; then
599 lecho "Sending ${meta_path} would exceed rate. Leaving for later."
603 # The .meta file should be written *after* all to-be-uploaded files that it
604 # references. Nevertheless, as a safeguard, a hold-off time of thirty
605 # seconds after writing the .meta file is ensured. Also, sending of crash
606 # reports is spread out randomly by up to SECONDS_SEND_SPREAD. Thus, for
607 # the sleep call the greater of the two delays is used.
608 local now=$(date +%s)
609 local holdoff_time=$(($(stat -c "%Y" "${meta_path}") + 30 - ${now}))
610 local spread_time=$(generate_uniform_random "${SECONDS_SEND_SPREAD}")
612 if [ ${spread_time} -gt ${holdoff_time} ]; then
613 sleep_time="${spread_time}"
615 sleep_time="${holdoff_time}"
617 lecho "Scheduled to send in ${sleep_time}s."
619 if ! sleep "${sleep_time}"; then
626 if ! send_crash "${meta_path}"; then
627 lecho "Problem sending ${meta_path}, not removing."
631 # Send was successful, now remove.
632 lecho "Successfully sent crash ${meta_path} and removing."
633 remove_report "${meta_path}"
639 Usage: crash_sender [options]
642 -e <var>=<val> Set env |var| to |val| (only some vars)
648 # Parse the command line arguments.
649 while [ $# -gt 0 ]; do
656 MOCK_DEVELOPER_MODE=*|\
657 OVERRIDE_PAUSE_SENDING=*|\
658 SECONDS_SEND_SPREAD=*)
662 lecho "Unknown var passed to -e: $1"
671 lecho "Unknown options: $*"
682 if [ -e "${PAUSE_CRASH_SENDING}" ] && \
683 [ ${OVERRIDE_PAUSE_SENDING} -eq 0 ]; then
684 lecho "Exiting early due to ${PAUSE_CRASH_SENDING}."
688 if is_test_image; then
689 lecho "Exiting early due to test image."
693 # We don't perform checks on this because we have a master lock with the
694 # CRASH_SENDER_LOCK file. This pid file is for the system to keep track
695 # (like with autotests) that we're still running.
696 echo $$ > "${RUN_FILE}"
698 for dependency in "$(get_certificates_path)"; do
699 if [ ! -x "${dependency}" ]; then
700 lecho "Fatal: Crash sending disabled: ${dependency} not found."
705 TMP_DIR="$(mktemp -d "${CRASH_STATE_DIR}/tmp/crash_sender.XXXXXX")"
707 # Send system-wide crashes
708 send_crashes "${CRASH_STATE_DIR}/crash"
711 trap cleanup EXIT INT TERM
713 #TODO(http://b/23937249): Change the locking logic back to using flock.
714 if ! mkdir "${CRASH_SENDER_LOCK}" 2>/dev/null; then
715 lecho "Already running; quitting."