From 4a7884384b65fdc8a3b261316f2a8656b4d0ac7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ege=20G=C3=BCne=C5=9F?= Date: Mon, 12 May 2025 13:12:53 +0300 Subject: [PATCH] K8SPSMDB-1308: Improve physical restore logs --- build/physical-restore-ps-entry.sh | 35 ++++++++++++++++++----- e2e-tests/functions | 46 ++++++++++++++++++++++++------ 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/build/physical-restore-ps-entry.sh b/build/physical-restore-ps-entry.sh index 8927c16f1d..41b627537e 100755 --- a/build/physical-restore-ps-entry.sh +++ b/build/physical-restore-ps-entry.sh @@ -1,15 +1,36 @@ #!/bin/bash -set -Eeuo pipefail +set -e set -o xtrace -log=/tmp/pbm-agent.log +PBM_AGENT_LOG=/tmp/pbm-agent.log +MONGOD_LOG=/tmp/mongod.log +PHYSICAL_RESTORE_DIR=/data/db/pbm-restore-logs + +function handle_sigterm() { + echo "Received SIGTERM, cleaning up..." + + mkdir ${PHYSICAL_RESTORE_DIR} + mv pbm.restore.log.* ${PBM_AGENT_LOG} ${MONGOD_LOG} ${PHYSICAL_RESTORE_DIR}/ + + echo "Restore finished, you can find logs in ${PHYSICAL_RESTORE_DIR}" + exit 0 +} + +trap 'handle_sigterm' 15 touch /opt/percona/restore-in-progress -/opt/percona/pbm-agent 1>&2 2>${log} & -/opt/percona/ps-entry.sh "$@" 1>&2 2>/tmp/mongod.log +/opt/percona/pbm-agent >${PBM_AGENT_LOG} 2>&1 & +pbm_pid=$! + +/opt/percona/ps-entry.sh "$@" >${MONGOD_LOG} 2>&1 & +mongod_pid=$! -echo "Physical restore in progress" -tail -n +1 -f ${log} -sleep infinity +set +o xtrace +echo "Physical restore in progress... pbm-agent logs: ${PBM_AGENT_LOG} mongod logs: ${MONGOD_LOG}" +echo "Script PID: $$, pbm-agent PID: $pbm_pid, mongod PID: $mongod_pid" +while true; do + echo "Still in progress at $(date)" + sleep 10 +done diff --git a/e2e-tests/functions b/e2e-tests/functions index f2c8dbe627..546bf2cec2 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -337,6 +337,36 @@ simple_data_check() { fi } +get_mongod_pods() { + local cluster=$1 + + kubectl_bin get pod \ + --no-headers \ + -l app.kubernetes.io/instance=${cluster} \ + -l app.kubernetes.io/component=mongod + +} + +collect_physical_restore_logs() { + local cluster=$1 + local restore=$2 + + for pod in $(get_mongod_pods ${cluster}); do + desc "pbm-agent logs from ${pod}" + kubectl_bin exec -it ${pod} -- cat /tmp/pbm-agent.log || true + done +} + +is_physical_backup() { + local backup=$1 + + if [[ $(kubectl_bin get psmdb-backup ${backup} -o jsonpath={.status.type}) == "physical" ]]; then + return 0 + fi + + return 1 +} + wait_restore() { local backup_name=$1 local cluster_name=$2 @@ -359,15 +389,15 @@ wait_restore() { break fi if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then - kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ - | grep -v 'level=info' \ - | grep -v 'level=debug' \ - | grep -v 'Getting tasks for pod' \ - | grep -v 'Getting pods from source' \ - | tail -100 + desc "operator logs:" + kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) | tail -100 + + if is_physical_backup ${backup_name}; then + collect_physical_restore_logs + fi + kubectl_bin get psmdb-restore restore-${backup_name} -o yaml - echo "Restore object restore-${backup_name} is in ${current_state} state." - echo something went wrong with operator or kubernetes cluster + log "Restore object restore-${backup_name} is in ${current_state} state." exit 1 fi done