Viewing: mds-survey
#!/usr/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This file is part of Lustre, http://www.lustre.org/
#
######################################################################
# customize per survey
# Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted.
#
# How to run test:
# case 1 (stripe_count=0 default):
# $ thrhi=8 dir_count=4 sh mds-survey
# one can also run test with user defined targets as follows,
# $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey
# case 2 (stripe_count > 0, must have ost mounted):
# $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2
# targets="lustre-MDT0000" sh mds-survey
# [ NOTE: It is advised to have automated login (passwordless entry) on server ]
# include library
source $(dirname $0)/iokit-libecho
# Customisation variables
#####################################################################
# One can change variable values in this section as per requirements
# The following variables can be set in the environment, or on the
# command line
# result file prefix (date/time + hostname makes unique)
# NB ensure path to it exists
rslt_loc=${rslt_loc:-"/tmp"}
rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"}
# min and max thread count
thrlo=${thrlo:-4}
thrhi=${thrhi:-32}
# number of directories to test
dir_count=${dir_count:-$thrlo}
# number of files per thread
file_count=${file_count:-100000}
targets=${targets:-""}
stripe_count=${stripe_count:-0}
# what tests to run (first must be create, and last must be destroy)
# default=(create lookup md_getattr setxattr destroy)
tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"}
# start number for each thread
start_number=${start_number:-2}
# layer to be tested
layer=${layer:-"mdd"}
# Customisation variables ends here.
#####################################################################
# leave the rest of this alone unless you know what you're doing...
export LC_ALL=POSIX
basedir="tests"
mdtbasedir="MDT%04x-"
create_directories () {
local host=$1
local devno=$2
local ndir=$3
local rfile=$4
local mdtidx=$5
local dir_stripes=$6
local idx
for ((idx = 0; idx < $ndir; idx++)); do
if (( idx == 0 )); then
dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
else
dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
fi
remote_shell $host $LCTL --device $devno test_mkdir /$dirname \
-c $dir_stripes --stripe_index $mdtidx > $rfile 2>&1
while read line; do
echo "$line" | grep -q 'error: test_mkdir'
if [ $? -eq 0 ]; then
cat $rfile >&2
echo "ERROR: fail test_mkdir" >&2
echo "ERROR"
return
fi
done < $rfile
done
echo $basedir
}
destroy_directories () {
local host=$1
local devno=$2
local ndir=$3
local rfile=$4
local mdtidx=$5
local idx
for ((idx = 0; idx < $ndir; idx++)); do
if (( idx == 0 )); then
dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
else
dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
fi
remote_shell $host $LCTL --device $devno test_rmdir /$dirname > $rfile 2>&1
done
}
get_stats () {
local rfile=$1
gawk < $rfile \
'/starting/ { \
n = 0; next; \
} \
/error/ { \
n = -1; \
exit; \
} \
/PID [0-9]+ had rc=0/ { \
next; \
} \
/^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ { \
ave = strtonum($8); \
n++; \
next; \
} \
/^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ { \
n++; v = strtonum($3); \
if (n == 1 || v < min) min = v; \
if (n == 1 || v > max) max = v; \
next; \
} \
{ \
if (n != 0) { \
n = -1; \
exit; \
} \
} \
END { \
if (n == 1) { \
min = ave; \
max = ave; \
} \
printf "%d %f %f %f\n", n, ave, min, max \
}'
}
get_global_stats () {
local rfile=$1
awk < $rfile \
'BEGIN { \
n = 0; \
} \
{ \
n++; \
if (n == 1) { \
err = $1; \
ave = $2; \
min = $3; \
max = $4; \
} else { \
if ($1 < err) \
err = $1; \
ave += $2; \
if ($3 < min) \
min = $3; \
if ($4 > max) \
max = $4; \
} \
} \
END { \
if (n == 0) \
err = 0; \
printf "%d %f %f %f\n", err, ave/n, min, max \
}'
}
print_summary () {
if [ "$1" = "-n" ]; then
minusn=$1; shift
else
minusn=""
fi
echo $minusn "$*" >> $rsltf
echo $minusn "$*"
}
declare -a tests
count=0
for name in $tests_str; do
tests[$count]=$name
count=$((count+1))
done
# hide a little trick to unset this from the command line
if [ "$lustre_root" == " " ]; then
unset lustre_root
fi
# find where 'lctl' binary is installed on this system
if [[ -x "$LCTL" ]]; then # full pathname specified
: # echo "LCTL=$LCTL"
elif [[ -n "$lustre_root" && -x "$lustre_root/utils/lctl" ]]; then
LCTL=${lustre_root}/utils/lctl
elif [[ -n "$LUSTRE" && -x "$LUSTRE/utils/lctl" ]]; then
LCTL=$LUSTRE/utils/lctl
else # hope that it is in the PATH
LCTL=${LCTL:-lctl}
fi
[[ -n "$(which $LCTL)" ]] || { echo "error: lctl not found"; exit 99; }
declare -a client_names
declare -a host_names
declare -a client_indexes
if [ -z "$targets" ]; then
targets=$($LCTL device_list | awk "{if (\$2 == \"UP\" && \
\$3 == \"mdt\") {print \$4} }")
if [ -z "$targets" ]; then
echo "Can't find any MDT to test. Please set targets=..."
exit 1
fi
fi
# split out hostnames from mdt names
ndevs=0
for trgt in $targets; do
str=($(split_hostname $trgt))
host_names[$ndevs]=${str[0]}
client_names[$ndevs]=${str[1]}
client_indexes[$ndevs]=0x$(echo ${str[1]} |
sed 's/.*MDT\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/')
ndevs=$((ndevs+1))
done
# check for ost
if (( $stripe_count > 0 )); then
for ((i=0; i < $ndevs; i++)); do
host=${host_names[$i]}
obd=$(remote_shell $host $LCTL device_list |
awk "{ if (\$2 == \"UP\" &&
(\$3 == \"osc\" || \$3 == \"osp\"))
{ print \$4 } }")
if [ -z "$obd" ]; then
echo "Need obdfilter to test stripe_count"
exit 1
fi
done
fi
# check and insert obdecho module
if ! lsmod | grep obdecho > /dev/null; then
modprobe obdecho
fi
count=${#tests[@]}
if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then
echo "tests: ${tests[@]}"
echo "First test must be 'create', and last test must be 'destroy'" 1>&2
exit 1
fi
rsltf="${rslt}.summary"
workf="${rslt}.detail"
cmdsf="${rslt}.script"
vmstatf="${rslt}.vmstat"
echo -n > $rsltf
echo -n > $workf
# get vmstat started
# disable portals debug and get obdecho loaded on all relevant hosts
unique_hosts=($(unique ${host_names[@]}))
load_obdechos
pidcount=0
for host in ${unique_hosts[@]}; do
host_vmstatf=${vmstatf}_${host}
echo -n > $host_vmstatf
remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
pid=$!
vmstatpids[$pidcount]=$pid
pidcount=$((pidcount+1))
done
# get all the echo_client device numbers and names
for ((i=0; i < $ndevs; i++)); do
host=${host_names[$i]}
devno=($(get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer))
if ((${#devno[@]} != 3)); then
exit 1
fi
devnos[$i]=${devno[0]}
client_names[$i]=${devno[1]}
do_teardown_ec[$i]=${devno[2]}
done
if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
echo "no devices or hosts specified"
cleanup 0
fi
print_summary "$(date) $0 from $(hostname)"
# create directories
tmpf="${workf}_tmp"
for ((idx = 0; idx < $ndevs; idx++)); do
host=${host_names[$idx]}
devno=${devnos[$idx]}
client_name="${host}:${client_names[$idx]}"
mdtidx=${client_indexes[$idx]}
echo "=======> Create $dir_count directories on $client_name" >> $workf
destroy_directories $host $devno $dir_count $tmpf $mdtidx
ret=$(create_directories $host $devno $dir_count $tmpf $mdtidx $ndevs)
cat $tmpf >> $workf
rm $tmpf
if [ $ret = "ERROR" ]; then
print_summary "created directories on $client_name failed"
cleanup 1
fi
done
snap=1
status=0
for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
thr_per_dir=$((${thr}/${dir_count}))
# skip if no enough thread
if (( thr_per_dir <= 0 )); then
continue
fi
file_count_per_thread=$((${file_count}/${thr}))
str=$(printf 'mdt %1d file %7d dir %4d thr %4d ' \
$ndevs $file_count $dir_count $thr)
echo "=======> $str" >> $workf
print_summary -n "$str"
# run tests
for test in ${tests[@]}; do
declare -a pidarray
for host in ${unique_hosts[@]}; do
echo "starting run for config: $config test: $test " \
"file: $file_count threads: $thr " \
"directories: $dir_count" >> ${vmstatf}_${host}
done
print_summary -n "$test "
# create per-host script files
for host in ${unique_hosts[@]}; do
echo -n > ${cmdsf}_${host}
done
for ((idx = 0; idx < $ndevs; idx++)); do
host=${host_names[$idx]}
devno=${devnos[$idx]}
dirname="$(printf "${mdtbasedir}" ${client_indexes[$idx]})$basedir"
tmpfi="${tmpf}_$idx"
[ "$test" = "create" ] && test="create -c $stripe_count"
echo >> ${cmdsf}_${host} \
"$LCTL > $tmpfi 2>&1 \
--threads $thr -$snap $devno test_$test \
-d /$dirname -D $dir_count \
-b $start_number -n $file_count_per_thread"
done
pidcount=0
for host in ${unique_hosts[@]}; do
echo "wait" >> ${cmdsf}_${host}
pidarray[$pidcount]=0
pidcount=$((pidcount+1))
done
pidcount=0
for host in ${unique_hosts[@]}; do
remote_shell $host bash < ${cmdsf}_${host} &
pidarray[$pidcount]=$!
pidcount=$((pidcount+1))
done
pidcount=0
for host in ${unique_hosts[@]}; do
wait ${pidarray[$pidcount]}
pidcount=$((pidcount+1))
done
#wait
# clean up per-host script files
for host in ${unique_hosts[@]}; do
rm ${cmdsf}_${host}
done
# collect/check individual MDT stats
echo -n > $tmpf
for ((idx = 0; idx < $ndevs; idx++)); do
client_name="${host_names[$idx]}:${client_names[$idx]}"
tmpfi="${tmpf}_$idx"
echo "=============> $test $client_name" >> $workf
host="${host_names[$idx]}"
remote_shell $host cat $tmpfi > ${tmpfi}_local
cat ${tmpfi}_local >> $workf
get_stats ${tmpfi}_local >> $tmpf
rm -f $tmpfi ${tmpfi}_local
done
# compute/display global min/max stats
echo "=============> $test global" >> $workf
cat $tmpf >> $workf
stats=($(get_global_stats $tmpf))
rm $tmpf
if ((stats[0] <= 0)); then
str=$(printf "%17s " ERROR)
status=1
else
str=$(awk "BEGIN {printf \"%7.2f [ %7.2f, %7.2f] \", \
${stats[1]}, ${stats[2]}, ${stats[3]}; exit}")
fi
print_summary -n "$str"
done
print_summary ""
done
# destroy directories
tmpf="${workf}_tmp"
for ((idx = 0; idx < $ndevs; idx++)); do
host=${host_names[$idx]}
devno=${devnos[$idx]}
mdtidx=${client_indexes[$idx]}
client_name="${host}:${client_names[$idx]}"
echo "====> Destroy $dir_count directories on $client_name" >> $workf
destroy_directories $host $devno $dir_count $tmpf $mdtidx
done
cleanup $status
exit $status