Skip to content

K8SPSMDB-1308: Improve physical restore logs #1915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions build/physical-restore-ps-entry.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,36 @@
#!/bin/bash

set -Eeuo pipefail
set -e
set -o xtrace

log=/tmp/pbm-agent.log
PBM_AGENT_LOG=/tmp/pbm-agent.log
MONGOD_LOG=/tmp/mongod.log
PHYSICAL_RESTORE_DIR=/data/db/pbm-restore-logs

function handle_sigterm() {
echo "Received SIGTERM, cleaning up..."

mkdir ${PHYSICAL_RESTORE_DIR}
mv pbm.restore.log.* ${PBM_AGENT_LOG} ${MONGOD_LOG} ${PHYSICAL_RESTORE_DIR}/

echo "Restore finished, you can find logs in ${PHYSICAL_RESTORE_DIR}"
exit 0
}

trap 'handle_sigterm' 15

touch /opt/percona/restore-in-progress

/opt/percona/pbm-agent 1>&2 2>${log} &
/opt/percona/ps-entry.sh "$@" 1>&2 2>/tmp/mongod.log
/opt/percona/pbm-agent >${PBM_AGENT_LOG} 2>&1 &
pbm_pid=$!

/opt/percona/ps-entry.sh "$@" >${MONGOD_LOG} 2>&1 &
mongod_pid=$!

echo "Physical restore in progress"
tail -n +1 -f ${log}
sleep infinity
set +o xtrace
echo "Physical restore in progress... pbm-agent logs: ${PBM_AGENT_LOG} mongod logs: ${MONGOD_LOG}"
echo "Script PID: $$, pbm-agent PID: $pbm_pid, mongod PID: $mongod_pid"
while true; do
echo "Still in progress at $(date)"
sleep 10
Comment on lines +34 to +35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[shfmt] reported by reviewdog 🐶

Suggested change
echo "Still in progress at $(date)"
sleep 10
echo "Still in progress at $(date)"
sleep 10

done
46 changes: 38 additions & 8 deletions e2e-tests/functions
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,36 @@ simple_data_check() {
fi
}

get_mongod_pods() {
local cluster=$1

kubectl_bin get pod \
--no-headers \
-l app.kubernetes.io/instance=${cluster} \
-l app.kubernetes.io/component=mongod

}

collect_physical_restore_logs() {
local cluster=$1
local restore=$2

for pod in $(get_mongod_pods ${cluster}); do
desc "pbm-agent logs from ${pod}"
kubectl_bin exec -it ${pod} -- cat /tmp/pbm-agent.log || true
done
}

is_physical_backup() {
local backup=$1

if [[ $(kubectl_bin get psmdb-backup ${backup} -o jsonpath={.status.type}) == "physical" ]]; then
return 0
fi

return 1
}

wait_restore() {
local backup_name=$1
local cluster_name=$2
Expand All @@ -359,15 +389,15 @@ wait_restore() {
break
fi
if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
| grep -v 'Getting tasks for pod' \
| grep -v 'Getting pods from source' \
| tail -100
desc "operator logs:"
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) | tail -100

if is_physical_backup ${backup_name}; then
collect_physical_restore_logs
fi

kubectl_bin get psmdb-restore restore-${backup_name} -o yaml
echo "Restore object restore-${backup_name} is in ${current_state} state."
echo something went wrong with operator or kubernetes cluster
log "Restore object restore-${backup_name} is in ${current_state} state."
exit 1
fi
done
Expand Down
Loading