Skip to content

Commit 7a70e0d

Browse files
committed
K8SPSMDB-1080 - Use trap to catch exit status
1 parent 31091a0 commit 7a70e0d

File tree

27 files changed

+14
-75
lines changed

27 files changed

+14
-75
lines changed

e2e-tests/arbiter/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ check_cr_config() {
3131
if [[ $(kubectl_bin get pod \
3232
--selector=statefulset.kubernetes.io/pod-name="${cluster}-arbiter-0" \
3333
-o jsonpath='{.items[*].status.containerStatuses[?(@.name == "mongod-arbiter")].restartCount}') -gt 0 ]]; then
34-
collect_k8s_logs
3534
echo "Something went wrong with arbiter. Exiting..."
3635
exit 1
3736
fi

e2e-tests/balancer/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ check_balancer() {
1515
| grep -E -v "Percona Server for MongoDB|connecting to:|Implicit session:|versions do not match|Error saving history file:|bye")
1616

1717
if [[ $balancer_running != "$expected" ]]; then
18-
collect_k8s_logs
1918
echo "Unexpected output from \"db.adminCommand({balancerStatus: 1}).mode\": $balancer_running"
2019
echo "Expected $expected"
2120
exit 1

e2e-tests/cross-site-sharded/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ for i in "rs0" "rs1"; do
101101
done
102102

103103
if [[ $shards -lt 2 ]]; then
104-
collect_k8s_logs
105104
echo "data is only on some of the shards, maybe sharding is not working"
106105
exit 1
107106
fi

e2e-tests/data-at-rest-encryption/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ encrypted_cluster_log=$(kubectl_bin logs some-name-rs0-0 -c mongod -n $namespace
8383

8484
echo "$encrypted_cluster_log"
8585
if [ -z "$encrypted_cluster_log" ]; then
86-
collect_k8s_logs
8786
echo "Cluster is not encrypted"
8887
exit 1
8988
fi
@@ -100,7 +99,6 @@ until [ "$retry" -ge 10 ]; do
10099
echo "Cluster is not encrypted already"
101100
break
102101
elif [ $retry == 15 ]; then
103-
collect_k8s_logs
104102
echo "Max retry count $retry reached. Cluster is still encrypted"
105103
exit 1
106104
else

e2e-tests/data-sharded/run

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ check_rs_proper_component_deletion() {
1717
until [[ $(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') -eq 0 ]]; do
1818
let retry+=1
1919
if [ $retry -ge 70 ]; then
20-
collect_k8s_logs
2120
sts_count=$(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length')
2221
echo "Replset $rs_name not properly removed, expected sts count of 0 but got $sts_count. Exiting after $retry tries..."
2322
exit 1
@@ -116,7 +115,6 @@ main() {
116115
done
117116

118117
if [[ $shards -lt 3 ]]; then
119-
collect_k8s_logs
120118
echo "data is only on some of the shards, maybe sharding is not working"
121119
exit 1
122120
fi
@@ -127,7 +125,6 @@ main() {
127125
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
128126
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls")
129127
if ! echo $res | grep -q '"ok" : 1'; then
130-
collect_k8s_logs
131128
echo "app database not dropped. Exiting.."
132129
exit 1
133130
fi

e2e-tests/default-cr/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ function stop_cluster() {
2727
let passed_time="${passed_time}+${sleep_time}"
2828
sleep ${sleep_time}
2929
if [[ ${passed_time} -gt ${max_wait_time} ]]; then
30-
collect_k8s_logs
3130
echo "We've been waiting for cluster stop for too long. Exiting..."
3231
exit 1
3332
fi

e2e-tests/demand-backup-physical-sharded/run

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ run_recovery_check() {
3838
wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800"
3939
kubectl_bin get psmdb ${cluster} -o yaml
4040
if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then
41-
collect_k8s_logs
4241
echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore"
4342
exit 1
4443
fi
@@ -53,7 +52,6 @@ check_exported_mongos_service_endpoint() {
5352
local host=$1
5453

5554
if [ "$host" != "$(kubectl_bin get psmdb $cluster -o=jsonpath='{.status.host}')" ]; then
56-
collect_k8s_logs
5755
echo "Exported host is not correct after the restore"
5856
exit 1
5957
fi
@@ -82,7 +80,6 @@ wait_cluster_consistency ${cluster}
8280
lbEndpoint=$(kubectl_bin get svc $cluster-mongos -o=jsonpath='{.status}' |
8381
jq -r 'select(.loadBalancer != null and .loadBalancer.ingress != null and .loadBalancer.ingress != []) | .loadBalancer.ingress[0][]')
8482
if [ -z $lbEndpoint ]; then
85-
collect_k8s_logs
8683
echo "mongos service not exported correctly"
8784
exit 1
8885
fi

e2e-tests/demand-backup-physical/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ run_recovery_check() {
3838
wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800"
3939
kubectl_bin get psmdb ${cluster} -o yaml
4040
if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then
41-
collect_k8s_logs
4241
echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore"
4342
exit 1
4443
fi

e2e-tests/demand-backup-sharded/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
166166
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
167167
| grep -c ${backup_dest_minio}_ | cat)
168168
if [[ $backup_exists -eq 1 ]]; then
169-
collect_k8s_logs
170169
echo "Backup was not removed from bucket -- minio"
171170
exit 1
172171
fi

e2e-tests/demand-backup/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
135135
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
136136
| grep -c ${backup_dest_minio} | cat)
137137
if [[ $backup_exists -eq 1 ]]; then
138-
collect_k8s_logs
139138
echo "Backup was not removed from bucket -- minio"
140139
exit 1
141140
fi
@@ -171,7 +170,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
171170
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
172171
| grep -c ${backup_dest_minio} | cat)
173172
if [[ $backup_exists -eq 1 ]]; then
174-
collect_k8s_logs
175173
echo "Backup was not removed from bucket -- minio"
176174
exit 1
177175
fi

e2e-tests/expose-sharded/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ function stop_cluster() {
2323
let passed_time="${passed_time}+${sleep_time}"
2424
sleep ${passed_time}
2525
if [[ ${passed_time} -gt ${max_wait_time} ]]; then
26-
collect_k8s_logs
2726
echo "We've been waiting for cluster stop for too long. Exiting..."
2827
exit 1
2928
fi
@@ -53,7 +52,6 @@ function compare_mongo_config() {
5352
rs0_0_endpoint_actual=$(run_mongo 'var host;var x=0;rs.conf().members.forEach(function(d){ if(d.tags.podName=="some-name-rs0-0"){ host=rs.conf().members[x].host;print(host)};x=x+1; })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0.${namespace}" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye')
5453

5554
if [[ $rs0_0_endpoint_actual != "$rs0_0_endpoint:27017" || $cfg_0_endpoint_actual != "$cfg_0_endpoint:27017" ]]; then
56-
collect_k8s_logs
5755
desc "Actual values rs $rs0_0_endpoint_actual and cfg $cfg_0_endpoint_actual do not match expected rs $rs0_0_endpoint:27017 and cfg $cfg_0_endpoint:27017"
5856
exit 1
5957
fi

e2e-tests/functions

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@ conf_dir=$(realpath $test_dir/../conf || :)
2828
src_dir=$(realpath $test_dir/../..)
2929
logs_dir=$(realpath $test_dir/../logs)
3030

31+
trap cleanup EXIT HUP INT QUIT TERM
32+
cleanup() {
33+
exit_code=$?
34+
if [[ ${exit_code} -ne 0 ]]; then
35+
collect_k8s_logs
36+
fi
37+
exit ${exit_code}
38+
}
39+
3140
if [[ ${ENABLE_LOGGING} == "true" ]]; then
3241
if [ ! -d "${logs_dir}" ]; then
3342
mkdir "${logs_dir}"
@@ -150,7 +159,6 @@ wait_pod() {
150159
echo -n .
151160
let retry+=1
152161
if [ $retry -ge 360 ]; then
153-
collect_k8s_logs
154162
kubectl_bin describe pod/$pod
155163
kubectl_bin logs $pod
156164
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
@@ -179,7 +187,6 @@ wait_cron() {
179187
echo -n .
180188
let retry+=1
181189
if [ $retry -ge 360 ]; then
182-
collect_k8s_logs
183190
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
184191
| grep -v 'level=info' \
185192
| grep -v 'level=debug' \
@@ -205,7 +212,6 @@ wait_backup_agent() {
205212
echo -n .
206213
let retry+=1
207214
if [ $retry -ge 360 ]; then
208-
collect_k8s_logs
209215
kubectl_bin logs $agent_pod -c backup-agent \
210216
| tail -100
211217

@@ -230,7 +236,6 @@ wait_backup() {
230236
let retry+=1
231237
current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}')
232238
if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then
233-
collect_k8s_logs
234239
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
235240
| grep -v 'level=info' \
236241
| grep -v 'level=debug' \
@@ -291,7 +296,6 @@ wait_deployment() {
291296
echo -n .
292297
let retry+=1
293298
if [ $retry -ge 360 ]; then
294-
collect_k8s_logs
295299
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
296300
| grep -v 'level=info' \
297301
| grep -v 'level=debug' \
@@ -339,7 +343,6 @@ wait_restore() {
339343
let retry+=1
340344
current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}')
341345
if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then
342-
collect_k8s_logs
343346
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
344347
| grep -v 'level=info' \
345348
| grep -v 'level=debug' \
@@ -553,7 +556,6 @@ retry() {
553556

554557
until "$@"; do
555558
if [[ $n -ge $max ]]; then
556-
collect_k8s_logs
557559
echo "The command '$@' has failed after $n attempts."
558560
exit 1
559561
fi
@@ -593,7 +595,6 @@ wait_for_running() {
593595
timeout=$((timeout + 1))
594596
echo -n '.'
595597
if [[ ${timeout} -gt 1500 ]]; then
596-
collect_k8s_logs
597598
echo
598599
echo "Waiting timeout has been reached. Exiting..."
599600
exit 1
@@ -616,7 +617,6 @@ wait_for_delete() {
616617
echo -n .
617618
let retry+=1
618619
if [ $retry -ge $wait_time ]; then
619-
collect_k8s_logs
620620
kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
621621
| grep -v 'level=info' \
622622
| grep -v 'level=debug' \
@@ -639,8 +639,6 @@ compare_generation() {
639639

640640
current_generation="$(kubectl_bin get ${resource_type} "${resource_name}" -o jsonpath='{.metadata.generation}')"
641641
if [[ ${generation} != "${current_generation}" ]]; then
642-
collect_k8s_logs
643-
644642
echo "Generation for ${resource_type}/${resource_name} is: ${current_generation}, but should be: ${generation}"
645643
exit 1
646644
fi
@@ -1011,7 +1009,6 @@ get_service_endpoint() {
10111009
return
10121010
fi
10131011

1014-
collect_k8s_logs
10151012
exit 1
10161013
}
10171014

@@ -1150,9 +1147,6 @@ kubectl_bin() {
11501147
cat "$LAST_OUT"
11511148
cat "$LAST_ERR" >&2
11521149
rm "$LAST_OUT" "$LAST_ERR"
1153-
if [ ${exit_status} != 0 ]; then
1154-
collect_k8s_logs
1155-
fi
11561150
return ${exit_status}
11571151
}
11581152

@@ -1191,7 +1185,6 @@ wait_cluster_consistency() {
11911185
until [[ "$(kubectl_bin get psmdb "${cluster_name}" -o jsonpath='{.status.state}')" == "ready" ]]; do
11921186
let retry+=1
11931187
if [ $retry -ge $wait_time ]; then
1194-
collect_k8s_logs
11951188
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
11961189
exit 1
11971190
fi
@@ -1218,7 +1211,6 @@ check_backup_deletion() {
12181211
retry=0
12191212
until [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 403 ]] || [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 404 ]]; do
12201213
if [ $retry -ge 10 ]; then
1221-
collect_k8s_logs
12221214
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
12231215
echo "Backup was not removed from bucket -- $storage_name"
12241216
exit 1
@@ -1280,7 +1272,6 @@ function get_mongod_ver_from_image() {
12801272
version_info=$(run_simple_cli_inside_image ${image} 'mongod --version' | $sed -r 's/^.*db version v(([0-9]+\.){2}[0-9]+-[0-9]+).*$/\1/g')
12811273

12821274
if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+-[0-9]+$ ]]; then
1283-
collect_k8s_logs
12841275
printf "No mongod version obtained from %s. Exiting" ${image}
12851276
exit 1
12861277
fi
@@ -1293,7 +1284,6 @@ function get_pbm_version() {
12931284
local version_info=$(run_simple_cli_inside_image ${image} 'pbm-agent version' | $sed -r 's/^Version:\ (([0-9]+\.){2}[0-9]+)\ .*/\1/g')
12941285

12951286
if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+$ ]]; then
1296-
collect_k8s_logs
12971287
printf "No pbm version obtained from %s. Exiting" ${image}
12981288
exit 1
12991289
fi

e2e-tests/init-deploy/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace"
6161
desc 'check number of connections'
6262
conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye')
6363
if [ ${conn_count} -gt ${max_conn} ]; then
64-
collect_k8s_logs
6564
echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}"
6665
exit 1
6766
fi

e2e-tests/mongod-major-upgrade-sharded/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ function main() {
9494
| grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version')
9595

9696
if [[ ${currentFCV} != ${version} ]]; then
97-
collect_k8s_logs
9897
echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..."
9998
exit 1
10099
fi

e2e-tests/mongod-major-upgrade/run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ function main() {
8989
| grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version')
9090

9191
if [[ ${currentFCV} != ${version} ]]; then
92-
collect_k8s_logs
9392
echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..."
9493
exit 1
9594
fi

e2e-tests/monitoring-2-0/run

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ until kubectl_bin exec monitoring-0 -- bash -c "ls -l /proc/*/exe 2>/dev/null| g
3737
sleep 5
3838
let retry+=1
3939
if [ $retry -ge 20 ]; then
40-
collect_k8s_logs
4140
echo "Max retry count $retry reached. Pmm-server can't start"
4241
exit 1
4342
fi
@@ -151,7 +150,6 @@ if [[ -n ${OPENSHIFT} ]]; then
151150
fi
152151

153152
if [[ $(kubectl_bin logs monitoring-rs0-0 pmm-client | grep -c 'cannot auto discover databases and collections') != 0 ]]; then
154-
collect_k8s_logs
155153
echo "error: cannot auto discover databases and collections"
156154
exit 1
157155
fi

e2e-tests/multi-cluster-service/run

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ wait_mcs_api() {
2323
until [[ $(kubectl_bin api-resources | grep ServiceExport | wc -l) -eq 1 ]]; do
2424
let retry+=1
2525
if [ $retry -ge 64 ]; then
26-
collect_k8s_logs
2726
echo max retry count $retry reached. Something went wrong with MCS, probably a problem on GCP side.
2827
exit 1
2928
fi
@@ -41,7 +40,6 @@ wait_service_import() {
4140
until [[ "$(kubectl_bin get serviceimport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do
4241
let retry+=1
4342
if [ $retry -ge 64 ]; then
44-
collect_k8s_logs
4543
echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-importer.
4644
exit 1
4745
fi
@@ -60,7 +58,6 @@ wait_service_export() {
6058
until [[ "$(kubectl_bin get serviceexport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do
6159
let retry+=1
6260
if [ $retry -ge 64 ]; then
63-
collect_k8s_logs
6461
echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-exporter.
6562
exit 1
6663
fi

e2e-tests/one-pod/compare/statefulset_one-pod-rs0.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ metadata:
1717
name: one-pod
1818
spec:
1919
podManagementPolicy: OrderedReady
20-
replicas: 1
20+
replicas: 2
2121
revisionHistoryLimit: 10
2222
selector:
2323
matchLabels:

e2e-tests/rs-shard-migration/run

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,10 @@ function main() {
3838
wait_cluster_consistency "${cluster}"
3939

4040
if [[ $(kubectl_bin get statefulset/${cluster}-mongos -o jsonpath='{.status.readyReplicas}') -lt 1 ]]; then
41-
collect_k8s_logs
4241
echo "Mongos hasn't been properly started. Exiting..."
4342
exit 1
4443
fi
4544
if [[ "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.replicas}')" != "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.readyReplicas}')" ]]; then
46-
collect_k8s_logs
4745
echo "Cfg pods haven't been properly started. Exiting..."
4846
exit 1
4947
fi
@@ -56,7 +54,6 @@ function main() {
5654

5755
if [[ -z "$(get_shard_parameter ${cluster} ${namespace} lastCommitedOpTime)" ]] \
5856
&& [[ -z "$(get_shard_parameter ${cluster} ${namespace} '$configServerState.opTime.ts')" ]]; then # for mongo 3.6
59-
collect_k8s_logs
6057
echo "Sharded cluster does not work properly"
6158
exit 1
6259
fi
@@ -73,7 +70,6 @@ function main() {
7370
|| [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-mongos"'")].metadata.name}')" ]] \
7471
|| [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]] \
7572
|| [[ -n "$(kubectl_bin get statefulset -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]]; then
76-
collect_k8s_logs
7773
echo "Transition to replicaset cluster has not been done well. Cluster does not work properly or some leftovers still exist"
7874
exit 1
7975
fi

0 commit comments

Comments
 (0)