diff --git a/br/tests/br_encryption/run.sh b/br/tests/br_encryption/run.sh index 3934dd3b6103c..df2c6206f86f4 100755 --- a/br/tests/br_encryption/run.sh +++ b/br/tests/br_encryption/run.sh @@ -59,39 +59,6 @@ insert_additional_data() { done } -wait_log_checkpoint_advance() { - echo "wait for log checkpoint to advance" - sleep 10 - local current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)") - echo "current ts: $current_ts" - i=0 - while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name $TASK_NAME --json 2>br.log) - echo "log backup status: $log_backup_status" - local checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi - done -} - calculate_checksum() { local db=$1 local checksum=$(run_sql "USE $db; ADMIN CHECKSUM TABLE $TABLE;" | awk '/CHECKSUM/{print $2}') @@ -170,7 +137,7 @@ run_backup_restore_test() { checksum_ori[${i}]=$(calculate_checksum "$DB${i}") || { echo "Failed to calculate checksum after insertion"; exit 1; } done - wait_log_checkpoint_advance || { echo "Failed to wait for log checkpoint"; exit 1; } + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME || { echo "Failed to wait for log checkpoint"; exit 1; } #sanity check pause still works run_br log pause --task-name $TASK_NAME --pd $PD_ADDR || { echo "Failed to pause log backup"; exit 1; } @@ -270,7 +237,7 @@ test_backup_encrypted_restore_unencrypted() { # Insert additional test data insert_additional_data "insert_after_full_backup" || { echo "Failed to insert additional data"; exit 1; } - wait_log_checkpoint_advance || { echo "Failed to wait for log checkpoint"; exit 1; } + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME || { echo "Failed to wait for log checkpoint"; exit 1; } # Stop and clean the cluster diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index 0ecc8cfb9d458..4b863a389903a 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -21,6 +21,7 @@ CUR=$(cd `dirname $0`; pwd) # const value PREFIX="pitr_backup" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +TASK_NAME="br_pitr" # start a new cluster echo "restart a services" @@ -38,7 +39,7 @@ echo "prepare_delete_range_count: $prepare_delete_range_count" # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # run snapshot backup echo "run snapshot backup" @@ -70,39 +71,8 @@ incremental_delete_range_count=$(run_sql "select count(*) DELETE_RANGE_CNT from echo "incremental_delete_range_count: $incremental_delete_range_count" # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)") -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # dump some info from upstream cluster # ... diff --git a/br/tests/br_pitr_failpoint/run.sh b/br/tests/br_pitr_failpoint/run.sh index 8a10e74ab81fe..1b5bf221d0be6 100644 --- a/br/tests/br_pitr_failpoint/run.sh +++ b/br/tests/br_pitr_failpoint/run.sh @@ -17,6 +17,7 @@ set -eu . run_services CUR=$(cd `dirname $0`; pwd) +TASK_NAME="br_pitr_failpoint" # const value PREFIX="pitr_backup_failpoint" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. @@ -42,7 +43,7 @@ sql_pid=$! # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # wait until the index creation is running retry_cnt=0 @@ -121,42 +122,9 @@ check_contains "Column_name: y" check_contains "Column_name: z" # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>/dev/null) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # start a new cluster -echo "restart a services" restart_services # PITR restore - 1 diff --git a/br/tests/br_pitr_gc_safepoint/run.sh b/br/tests/br_pitr_gc_safepoint/run.sh index 26b3b533c1d69..ef572a92fef67 100644 --- a/br/tests/br_pitr_gc_safepoint/run.sh +++ b/br/tests/br_pitr_gc_safepoint/run.sh @@ -21,6 +21,7 @@ CUR=$(cd `dirname $0`; pwd) # const value PREFIX="pitr_backup" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +TASK_NAME="br_pitr_gc_safepoint" # start a new cluster echo "restart a services" @@ -28,7 +29,7 @@ restart_services # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # prepare the data echo "prepare the data" @@ -41,41 +42,9 @@ prepare_delete_range_count=$(run_sql "select count(*) DELETE_RANGE_CNT from (sel echo "prepare_delete_range_count: $prepare_delete_range_count" # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done - -run_br --pd $PD_ADDR log pause --task-name integration_test +run_br --pd $PD_ADDR log pause --task-name $TASK_NAME safe_point=$(run_pd_ctl -u https://$PD_ADDR service-gc-safepoint) diff --git a/br/tests/br_restore_checkpoint/run.sh b/br/tests/br_restore_checkpoint/run.sh index 2a4b1104916de..da45692cdcb62 100644 --- a/br/tests/br_restore_checkpoint/run.sh +++ b/br/tests/br_restore_checkpoint/run.sh @@ -22,9 +22,9 @@ CUR=$(cd `dirname $0`; pwd) PREFIX="checkpoint" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. DB=$TEST_NAME res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +TASK_NAME="br_restore_checkpoint" # start a new cluster -echo "restart a services" restart_services # prepare snapshot data @@ -37,7 +37,7 @@ run_sql "INSERT INTO $DB.tbl2 values (2, 'b');" # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # run snapshot backup echo "run snapshot backup" @@ -53,41 +53,9 @@ run_sql "INSERT INTO $DB.tbl3 values (33, 'cc');" # wait checkpoint advance echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # start a new cluster -echo "restart a services" restart_services # PITR but failed in the snapshot restore stage diff --git a/br/tests/br_test_utils.sh b/br/tests/br_test_utils.sh new file mode 100644 index 0000000000000..9102415a77e14 --- /dev/null +++ b/br/tests/br_test_utils.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +wait_log_checkpoint_advance() { + local task_name=${1:-$TASK_NAME} + echo "wait for log checkpoint to advance for task: $task_name" + sleep 10 + local current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)") + echo "current ts: $current_ts" + i=0 + while true; do + # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty + log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name $task_name --json 2>br.log) + echo "log backup status: $log_backup_status" + local checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') + echo "checkpoint ts: $checkpoint_ts" + + # check whether the checkpoint ts is a number + if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then + if [ $checkpoint_ts -gt $current_ts ]; then + echo "the checkpoint has advanced" + break + fi + echo "the checkpoint hasn't advanced" + i=$((i+1)) + if [ "$i" -gt 50 ]; then + echo 'the checkpoint lag is too large' + exit 1 + fi + sleep 10 + else + echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" + exit 1 + fi + done +} diff --git a/br/tests/br_tiflash_conflict/run.sh b/br/tests/br_tiflash_conflict/run.sh index f224a1497bf00..5f7e1f7726ae4 100644 --- a/br/tests/br_tiflash_conflict/run.sh +++ b/br/tests/br_tiflash_conflict/run.sh @@ -22,6 +22,7 @@ CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) backup_dir=$TEST_DIR/keep/${TEST_NAME} pitr_dir=${backup_dir}_pitr br_log_file=$TEST_DIR/br.log +TASK_NAME="br_tiflash_conflict" # start a new cluster echo "restart a services" @@ -33,7 +34,7 @@ run_sql_file $CUR/prepare_data/prepare_data.sql #run pitr backup echo "run pitr backup" -run_br log start --task-name integration_test -s "local://$pitr_dir" +run_br log start --task-name $TASK_NAME -s "local://$pitr_dir" # run snapshot backup echo "run snapshot backup" @@ -47,39 +48,7 @@ echo "load the incremental data" run_sql_file $CUR/prepare_data/insert_data.sql # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # start a new cluster echo "restart a services"