Skip to content

Commit

Permalink
cluster: fix that scaled nodes don't respect component_versions (#2451
Browse files Browse the repository at this point in the history
)

* fix component_versions

* update logs

* add more debug logs

* update log place

* final commit

* use NewPart

* fix

Signed-off-by: xhe <[email protected]>

* fix

Signed-off-by: xhe <[email protected]>

* try fix

Signed-off-by: xhe <[email protected]>

* try fix

Signed-off-by: xhe <[email protected]>

* try fix

Signed-off-by: xhe <[email protected]>

* try another version

* try fix

Signed-off-by: xhe <[email protected]>

* try fix

Signed-off-by: xhe <[email protected]>

* try

Signed-off-by: xhe <[email protected]>

* use tiproxy.yaml

* component_versions

* update number

---------

Signed-off-by: xhe <[email protected]>
Co-authored-by: xhe <[email protected]>
  • Loading branch information
djshow832 and xhebox authored Aug 16, 2024
1 parent 9c22e67 commit 853c6a2
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 54 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/integrate-cluster-cmd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,15 @@ jobs:
- name: Collect component log
working-directory: ${{ env.working-directory }}
if: ${{ failure() }}
# if: always()
run: |
docker exec tiup-cluster-control bash /tiup-cluster/tests/tiup-cluster/script/pull_log.sh /tiup-cluster/logs
ls ${{ env.working-directory }}
- name: Detect error log
working-directory: ${{ env.working-directory }}
if: ${{ failure() }}
# if: always()
run: |
bash ./tests/tiup-cluster/script/detect_error.sh ./logs/
Expand All @@ -105,7 +107,8 @@ jobs:

- name: Output cluster debug log
working-directory: ${{ env.working-directory }}
if: always()
if: ${{ failure() }}
# if: always()
run: |
pwd
docker ps
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/integrate-cluster-scale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,15 @@ jobs:
- name: Collect component log
working-directory: ${{ env.working-directory }}
if: ${{ failure() }}
# if: always()
run: |
docker exec tiup-cluster-control bash /tiup-cluster/tests/tiup-cluster/script/pull_log.sh /tiup-cluster/logs
ls ${{ env.working-directory }}
- name: Detect error log
working-directory: ${{ env.working-directory }}
if: ${{ failure() }}
# if: always()
run: |
bash ./tests/tiup-cluster/script/detect_error.sh ./logs/
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integrate-dm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ jobs:
- name: Collect component log
working-directory: ${{ env.working-directory }}
if: ${{ failure() }}
# if: ${{ failure() }}
if: always()
run: |
docker exec tiup-cluster-control bash -c 'mkdir -p /tiup-cluster/logs; [[ -d ~/.tiup/logs ]] && find ~/.tiup/logs -type f -name "*.log" -exec cp {} /tiup-cluster/logs \; || true'
ls ${{ env.working-directory }}
Expand Down
9 changes: 3 additions & 6 deletions docker/up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,6 @@ exists python ||
exists docker ||
{ ERROR "Please install docker (https://docs.docker.com/engine/installation/)";
exit 1; }
exists docker-compose ||
{ ERROR "Please install docker-compose (https://docs.docker.com/compose/install/)";
exit 1; }

exists pip ||
{
Expand Down Expand Up @@ -233,17 +230,17 @@ echo "TIUP_TEST_IP_PREFIX=$ipprefix" >> ./secret/control.env

INFO "Running \`docker-compose build\`"
# shellcheck disable=SC2086
docker-compose -f docker-compose.yml ${COMPOSE} ${DEV} build
docker compose -f docker-compose.yml ${COMPOSE} ${DEV} build

INFO "Running \`docker-compose up\`"
if [ "${RUN_AS_DAEMON}" -eq 1 ]; then
# shellcheck disable=SC2086
docker-compose -f docker-compose.yml ${COMPOSE} ${DEV} up -d
docker compose -f docker-compose.yml ${COMPOSE} ${DEV} up -d
INFO "All containers started, run \`docker ps\` to view"
else
INFO "Please run \`docker exec -it tiup-cluster-control bash\` in another terminal to proceed"
# shellcheck disable=SC2086
docker-compose -f docker-compose.yml ${COMPOSE} ${DEV} up
docker compose -f docker-compose.yml ${COMPOSE} ${DEV} up
fi

popd
31 changes: 31 additions & 0 deletions pkg/cluster/spec/parse_topology_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,37 @@ tiflash_servers:
})
}

func (s *topoSuite) TestMergeComponentVersions(c *check.C) {
// test component version overwrite
with2TempFile(`
component_versions:
tidb: v8.0.0
tikv: v8.0.0
tidb_servers:
- host: 172.16.5.139
`, `
component_versions:
tikv: v8.1.0
pd: v8.0.0
tidb_servers:
- host: 172.16.5.134
`, func(base, scale string) {
baseTopo := Specification{}
c.Assert(ParseTopologyYaml(base, &baseTopo), check.IsNil)

scaleTopo := baseTopo.NewPart()
c.Assert(ParseTopologyYaml(scale, scaleTopo), check.IsNil)

mergedTopo := baseTopo.MergeTopo(scaleTopo)
c.Assert(mergedTopo.Validate(), check.IsNil)

c.Assert(scaleTopo.(*Specification).ComponentVersions, check.Equals, mergedTopo.(*Specification).ComponentVersions)
c.Assert(scaleTopo.(*Specification).ComponentVersions.TiDB, check.Equals, "v8.0.0")
c.Assert(scaleTopo.(*Specification).ComponentVersions.TiKV, check.Equals, "v8.1.0")
c.Assert(scaleTopo.(*Specification).ComponentVersions.PD, check.Equals, "v8.0.0")
})
}

func (s *topoSuite) TestFixRelativePath(c *check.C) {
// base test
topo := Specification{
Expand Down
7 changes: 4 additions & 3 deletions pkg/cluster/spec/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,10 @@ type UpgradableMetadata interface {
// NewPart implements ScaleOutTopology interface.
func (s *Specification) NewPart() Topology {
return &Specification{
GlobalOptions: s.GlobalOptions,
MonitoredOptions: s.MonitoredOptions,
ServerConfigs: s.ServerConfigs,
GlobalOptions: s.GlobalOptions,
MonitoredOptions: s.MonitoredOptions,
ServerConfigs: s.ServerConfigs,
ComponentVersions: s.ComponentVersions,
}
}

Expand Down
2 changes: 0 additions & 2 deletions tests/tiup-cluster/script/detect_error.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@ set -eu
err_num=$(find $1 -name "*.log" -exec grep "\[ERROR\]" {} \; | wc -l)
if [ ${err_num} != "0" ]; then
echo "detect ${err_num} [ERROR] log"
exit 1
fi

err_num=$(find $1 -name "*stderr.log" -exec cat {} \; | wc -l)
if [ ${err_num} != "0" ]; then
echo "detect ${err_num} stderr log"
exit 1
fi

echo "no error log found"
3 changes: 2 additions & 1 deletion tests/tiup-cluster/script/pull_log.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ do
logs=$(ssh -o "StrictHostKeyChecking no" root@$h "find /home/tidb | grep '.*log/.*\.log'")
for log in $logs
do
scp -o "StrictHostKeyChecking no" -r root@$h:$log "$out_dir/$h/"
scp -o "StrictHostKeyChecking no" -pr root@$h:$log "$out_dir/$h/"
done
fi
done
chmod -R 777 $out_dir
66 changes: 28 additions & 38 deletions tests/tiup-cluster/script/scale_tiproxy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,87 +6,77 @@ function scale_tiproxy() {
mkdir -p ~/.tiup/bin/

version=$1
test_tls=$2
native_ssh=$3
native_ssh=$2

client=""
common_args="--wait-timeout=360"
if [ $native_ssh == true ]; then
client="--ssh=system"
common_args="$common_args --ssh=system"
fi

name="test_scale_tiproxy_$RANDOM"
if [ $test_tls = true ]; then
topo=./topo/full_tls.yaml
else
topo=./topo/full.yaml
fi
topo=./topo/tiproxy.yaml

check_cert_file="ls /home/tidb/deploy/tidb-4000/tls/tiproxy-session.crt /home/tidb/deploy/tidb-4000/tls/tiproxy-session.key"
check_cert_config="grep -q session-token-signing-key /home/tidb/deploy/tidb-4000/conf/tidb.toml"

tiup-cluster $client --yes deploy $name $version $topo -i ~/.ssh/id_rsa
tiup-cluster $common_args --yes deploy $name $version $topo -i ~/.ssh/id_rsa

# the session certs exist
tiup-cluster $client exec $name -N n1 --command "$check_cert_file"
tiup-cluster $common_args exec $name -N n1 --command "$check_cert_file"
# the configurations are updated
tiup-cluster $client exec $name -N n1 --command "$check_cert_config"
tiup-cluster $common_args exec $name -N n1 --command "$check_cert_config"

tiup-cluster $client list | grep "$name"
tiup-cluster $common_args list | grep "$name"

tiup-cluster $client --yes start $name
tiup-cluster $common_args --yes start $name

tiup-cluster $client _test $name writable
tiup-cluster $common_args _test $name writable

tiup-cluster $client display $name
tiup-cluster $common_args display $name

tiup-cluster $client --yes reload $name --skip-restart
tiup-cluster $common_args --yes reload $name --skip-restart

if [ $test_tls = true ]; then
total_sub_one=18
total=19
else
total_sub_one=23
total=24
fi
total_sub_one=7
total=8

# disable tiproxy
echo "start scale in tiproxy"
tiup-cluster $client --yes scale-in $name -N n1:6000
tiup-cluster $common_args --yes scale-in $name -N n1:6000
wait_instance_num_reach $name $total $native_ssh

# scale in tidb and scale out again
echo "start scale in tidb"
tiup-cluster $client --yes scale-in $name -N n2:4000
tiup-cluster $common_args --yes scale-in $name -N n2:4000
wait_instance_num_reach $name $total_sub_one $native_ssh
echo "start scale out tidb"
topo=./topo/full_scale_in_tidb_2nd.yaml
tiup-cluster $client --yes scale-out $name $topo
tiup-cluster $common_args --yes scale-out $name $topo
# the session certs don't exist on the new tidb
! tiup-cluster $client exec $name -N n2 --command "$check_cert_file"
! tiup-cluster $common_args exec $name -N n2 --command "$check_cert_file"
# the configurations are not updated on the new tidb
! tiup-cluster $client exec $name -N n2 --command "$check_cert_config"
! tiup-cluster $common_args exec $name -N n2 --command "$check_cert_config"

# enable tiproxy again
echo "start scale out tiproxy"
topo=./topo/full_scale_in_tiproxy.yaml
tiup-cluster $client --yes scale-out $name $topo
tiup-cluster $common_args --yes scale-out $name $topo
# the session certs exist on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_file"
tiup-cluster $common_args exec $name -N n2 --command "$check_cert_file"
# the configurations are updated on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_config"
tiup-cluster $common_args exec $name -N n2 --command "$check_cert_config"

# scale in tidb and scale out again
echo "start scale in tidb"
tiup-cluster $client --yes scale-in $name -N n2:4000
tiup-cluster $common_args --yes scale-in $name -N n2:4000
wait_instance_num_reach $name $total $native_ssh
echo "start scale out tidb"
topo=./topo/full_scale_in_tidb_2nd.yaml
tiup-cluster $client --yes scale-out $name $topo
tiup-cluster $common_args --yes scale-out $name $topo
# the session certs exist on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_file"
tiup-cluster $common_args exec $name -N n2 --command "$check_cert_file"
# the configurations are updated on the new tidb
tiup-cluster $client exec $name -N n2 --command "$check_cert_config"
tiup-cluster $common_args exec $name -N n2 --command "$check_cert_config"

tiup-cluster $client _test $name writable
tiup-cluster $client --yes destroy $name
tiup-cluster $common_args _test $name writable
tiup-cluster $common_args --yes destroy $name
}
4 changes: 2 additions & 2 deletions tests/tiup-cluster/test_scale_tiproxy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ set -eu

source script/scale_tiproxy.sh

echo "test scaling of tidb and tiproxy in cluster for version v8.1.0, via easy ssh"
scale_tiproxy v8.1.0 false false
echo "test scaling of tidb and tiproxy in cluster for version v8.2.0, via easy ssh"
scale_tiproxy v8.2.0 false
23 changes: 23 additions & 0 deletions tests/tiup-cluster/topo/tiproxy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
global:
user: tidb
group: pingcap

component_versions:
tiproxy: v1.2.0

tidb_servers:
- host: n1
- host: n2

pd_servers:
- host: n3
- host: n4
- host: n5

tikv_servers:
- host: n3
- host: n4
- host: n5

tiproxy_servers:
- host: n1

0 comments on commit 853c6a2

Please sign in to comment.