Viewing: lst.sh
#!/usr/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This file is part of Lustre, http://www.lustre.org/
#
print_help() {
cat <<EOF
Usage:
${0##*/} -f "nid1[ nid2...]" -t "nidA[ nidB...]" -m read|write|rw|ping [options]
or
${0##*/} -H -f "host1[ host2...]" -t "hostA[ hostB...]" -m read|write|rw|ping [options]
Options:
-b batch_name
Creates a batch test called <batch_name> rather than using the
default.
-c concurrency
The number of requests that are active at one time.
-C simple|full
A data validation check (checksum of data). The default is that no
check is done.
-d <source_count:sink_count>
Determines the ratio of client nodes to server nodes for the
specified test. This allows you to specify a wide range of
topologies, including one-to-one and all-to-all. Distribution divides
the source group into subsets, which are paired with equivalent
subsets from the target group so only nodes in matching subsets
communicate.
-D delay
The interval of the statistics (in seconds). Default is 15.
-e
Lists the number of failed RPCs on test nodes in the current session.
-h
Display this help.
-H
Run in "host mode". Host mode indicates that the arguments to '-t'
and '-f' flags are hostnames rather than LNet nids. This script will
attempt to ssh to each node to ensure the lnet-selftest module is
loaded, and to determine the appropriate LNet NIDs to give to LST.
-f "nid1[ nid2...]"
Space-separated list of LNet NIDs to place in the "clients" group.
When '-H' flag is specified, the '-f' argument is a space-separated
list of hostnames.
PDSH-style expressions are supported for NID arguments, but not for
host mode ('-H').
-g servers|clients
Report stats only from the specified group. Either 'clients' or
'servers'.
-l loops
The number of test loops. Default is -1 (infinite).
-L
Load lnet-selftest module on local and remote hosts. The module will
be unloaded at the end of the test execution. Requires running in
host mode ('-H').
-m read|write|rw|ping
Type of test to run. 'rw' specifies to run simultaneous read and
write test.
-M
Report bandwidth stats in MiB/s (default is MB/s).
-n count
The number of stat RPCs to issue. Default is 1.
-o <offset>
Add off=<offset> to brw tests.
-s iosize
I/O size in bytes, kilobytes, or Megabytes (i.e., -s 1024, -s 4K,
-s 1M). The default is 1 Megabyte.
-S <rate|bw|"rate bw">
By default, only bandwidth stats are displayed for read and write
and only RPC rate stats are shown for ping tests. The '-S' flag can
be used to override the stat output.
Examples:
Show only RPC rate stats:
# lst.sh -S rate ...
Show only bandwidth stats:
# lst.sh -S bw ...
Show both bandwidth and RPC rate stats:
# lst.sh -S "rate bw" ...
or
# lst.sh -S "bw rate" ...
-t "nid1[ nid2...]"
Space-separated list of LNet NIDs to place in the "servers" group.
When '-H' flag is specified, the '-t' argument is a space-separated
list of hostnames.
PDSH-style expressions are supported for NID arguments, but not for
host mode ('-H').
EOF
exit
}
stop_lst() {
local rc=0
if ${LST_BATCH_STARTED}; then
$LCTL mark "lst stop ${BATCH_NAME}"
[[ -n ${ALL_HOSTS} ]] &&
$PDSH "${ALL_HOSTS}" "$LCTL mark 'lst stop ${BATCH_NAME}'"
lst stop "${BATCH_NAME}" || rc=$?
LST_BATCH_STARTED=false
fi
if ${LST_SESSION_CREATED}; then
$LCTL mark "Stop LST $MODE"
echo "Stop LST $MODE - $(date)"
[[ -n ${ALL_HOSTS} ]] &&
$PDSH "${ALL_HOSTS}" "$LCTL mark 'Stop LST $MODE'"
lst end_session || rc=$((rc + $?))
LST_SESSION_CREATED=false
fi
return $rc
}
exit_handler() {
local rc=${1:-0}
trap "" EXIT
stop_lst || rc=$((rc + $?))
if ${LOAD_MODULES}; then
echo "Attempting to 'modprobe -r lnet-selftest' on all hosts (30 second timeout)..."
$PDSH "${ALL_HOSTS}" -u 30 \
"if lsmod | grep -q lnet_selftest; then
modprobe -r lnet-selftest
else
:
fi" | dshbak -c
rc=$((rc + PIPESTATUS[0]))
if lsmod | grep -q lnet_selftest; then
timeout 30 modprobe -r lnet-selftest
rc=$((rc + $?))
fi
fi
return $rc
}
LST_SESSION_CREATED=false # Whether 'lst new_session' was executed
LST_BATCH_STARTED=false # Whether 'lst run <batch>' was executed
PDSH="pdsh -S -Rssh -w"
BATCH_NAME=""
CONCURRENCY=16
CHECK=
DISTRIBUTION="1:1"
CLIENTS=""
LOOPS=""
MODE=""
IOSIZE="1m"
SERVERS=""
COUNT="1"
DELAY="15"
STAT_GROUP=""
SHOW_ERRORS=false
STAT_OPTS=""
STAT_OPT_RATE=false
STAT_OPT_BW=false
BW_UNITS="--mbs"
HOST_MODE=false
LOAD_MODULES=false
BRW_OFFSET=""
while getopts "b:C:c:d:D:ef:g:hHl:Lm:Mn:o:s:S:t:" flag ; do
case $flag in
b) BATCH_NAME="$OPTARG";;
c) CONCURRENCY="$OPTARG";;
C) CHECK="$OPTARG";;
d) DISTRIBUTION="$OPTARG";;
D) DELAY="$OPTARG";;
e) SHOW_ERRORS=true;;
h) print_help;;
H) HOST_MODE=true;;
f) CLIENTS="$OPTARG";;
g) STAT_GROUP="$OPTARG";;
l) LOOPS="$OPTARG";;
L) LOAD_MODULES=true;;
m) MODE="$OPTARG";;
M) BW_UNITS="";;
n) COUNT="$OPTARG";;
o) BRW_OFFSET="$OPTARG";;
s) IOSIZE="$OPTARG";;
S) STAT_OPTS="$OPTARG";;
t) SERVERS="$OPTARG";;
*) echo "Unrecognized option '-$flag'"
exit 1;;
esac
done
# find where 'lctl' binary is installed on this system
if [[ -x "$LCTL" ]]; then # full pathname specified
: # echo "LCTL=$LCTL"
elif [[ -n "$LUSTRE" && -x "$LUSTRE/utils/lctl" ]]; then
LCTL=$LUSTRE/utils/lctl
else # hope that it is in the PATH
LCTL=${LCTL:-lctl}
fi
#echo "using LCTL='$LCTL' lustre_root='$lustre_root' LUSTRE='$LUSTRE'"
[[ -n "$(which $LCTL)" ]] || { echo "error: lctl not found"; exit 99; }
if [[ -z $CLIENTS ]]; then
echo "Must specify \"clients\" group (-f)"
exit 1
elif [[ -z $SERVERS ]]; then
echo "Must specify \"servers\" group (-t)"
exit 1
elif [[ -z $MODE ]]; then
echo "Must specify a mode (-m <read|write|rw|ping>)"
exit 1
elif ! [[ $MODE =~ read|write|rw|ping ]]; then
echo "Invalid mode - \"$MODE\". (-m <read|write|rw|ping>)"
exit 1
elif [[ -z $(which lst 2>/dev/null) ]]; then
echo "Cannot find lst executable in PATH."
exit 1
elif ${LOAD_MODULES} && ! ${HOST_MODE}; then
echo "Module loading ('-L') is only available in host mode ('-H')"
exit 1
fi
for stat_opt in ${STAT_OPTS}; do
if [[ $stat_opt == rate ]]; then
STAT_OPT_RATE=true
elif [[ $stat_opt == bw ]]; then
STAT_OPT_BW=true
else
echo "Invalid stat option \"-S $stat_opt\""
print_help
fi
done
if [[ -z $STAT_GROUP ]]; then
STAT_GROUP="clients servers"
elif ! [[ $STAT_GROUP =~ clients|servers ]]; then
echo "Stat group must be either \"clients\" or \"servers\". Found \"$STAT_GROUP\""
exit 1
fi
if [[ -n ${LOOPS} && ${LOOPS} -eq 0 ]]; then
echo "Loops must be -1 or > 0. Found \"${LOOPS}\""
exit 1
fi
if ! ${LOAD_MODULES} && ! lsmod | grep -q lnet_selftest; then
echo "lnet-selftest module is not loaded on local host."
echo "Please ensure lnet-selftest module is loaded on the local host and all test nodes."
exit 1
fi
ALL_HOSTS=""
if ${HOST_MODE}; then
which pdsh &>/dev/null || { echo "Need pdsh for host mode"; exit; }
which ssh &>/dev/null || { echo "Need ssh for host mode"; exit; }
ALL_HOSTS="${SERVERS} ${CLIENTS}"
ALL_HOSTS=${ALL_HOSTS## }
ALL_HOSTS=${ALL_HOSTS%% }
ALL_HOSTS="${ALL_HOSTS// /,}"
if ${LOAD_MODULES}; then
echo "Loading lnet-selftest on test nodes"
$PDSH "${ALL_HOSTS}" \
"if ! lsmod | grep -q lnet_selftest; then
modprobe lnet-selftest 2>&1
else
true
fi" | dshbak -c
rc=${PIPESTATUS[0]}
if [[ $rc -ne 0 ]]; then
echo "Failed to load lnet-selftest module on test nodes"
exit "$rc"
fi
if ! lsmod | grep -q lnet_selftest; then
modprobe lnet-selftest
rc=$?
if [[ $rc -ne 0 ]]; then
echo "Failed to load lnet-selftest on local host"
exit $rc
fi
fi
fi
idx=0
opts=( -o NumberOfPasswordPrompts=0 -o ConnectTimeout=5 )
for host in ${SERVERS//,/ }; do
s_nids[idx]=$(ssh "${opts[@]}" "$host" "$LCTL list_nids | head -n 1")
if [[ -z ${s_nids[idx]} ]]; then
echo "Failed to determine primary NID of $host"
exit 1
fi
idx=$((idx + 1))
done
idx=0
for host in ${CLIENTS//,/ }; do
c_nids[idx]=$(ssh "${opts[@]}" "${host}" "$LCTL list_nids | head -n 1")
if [[ -z ${c_nids[idx]} ]]; then
echo "Failed to determine primary NID of $host"
exit 1
fi
idx=$((idx + 1))
done
SERVER_NIDS=( "${s_nids[@]}" )
CLIENT_NIDS=( "${c_nids[@]}" )
else
IFS=" " read -r -a SERVER_NIDS <<< "${SERVERS}"
IFS=" " read -r -a CLIENT_NIDS <<< "${CLIENTS}"
fi
if ! grep -q '\[' <<<"${SERVER_NIDS[@]}" && which lnetctl &>/dev/null; then
echo "Discover server NIDs"
lnetctl discover "${SERVER_NIDS[@]}" 1>/dev/null
rc=$?
if [[ $rc -ne 0 ]]; then
echo "Failed to discover all server NIDs"
exit $rc
fi
fi
if ! grep -q '\[' <<<"${CLIENT_NIDS[@]}" && which lnetctl &>/dev/null; then
echo "Discover client NIDs"
lnetctl discover "${CLIENT_NIDS[@]}" 1>/dev/null
rc=$?
if [[ $rc -ne 0 ]]; then
echo "Failed to discover all client NIDs"
exit $rc
fi
fi
[[ -n $ALL_HOSTS ]] &&
$PDSH "$ALL_HOSTS" "$LCTL mark 'Start LST $MODE'"
$LCTL mark "Start LST $MODE"
echo "Start LST $MODE - $(date)"
trap 'exit_handler' EXIT
export LST_SESSION=$$
echo "LST_SESSION=$LST_SESSION"
lst new_session lnet_session || { echo "new_session failed $?"; exit; }
LST_SESSION_CREATED=true
echo "Adding clients: ${CLIENT_NIDS[*]}"
lst add_group clients "${CLIENT_NIDS[@]}" || exit
echo "Adding servers: ${SERVER_NIDS[*]}"
lst add_group servers "${SERVER_NIDS[@]}" || exit
if [[ -z ${BATCH_NAME} ]]; then
BATCH_NAME="brw_${MODE}"
fi
lst add_batch "${BATCH_NAME}" || exit
test_opts+=( --batch "${BATCH_NAME}" --concurrency "${CONCURRENCY}" )
test_opts+=( --from clients --to servers --distribute "${DISTRIBUTION}" )
[[ -n ${LOOPS} ]] &&
test_opts+=( --loop "${LOOPS}" )
if [[ $MODE == ping ]]; then
test_opts+=( ping )
elif [[ $MODE == rw ]]; then
read_opts=( "${test_opts[@]}" brw read size="$IOSIZE" )
write_opts=( "${test_opts[@]}" brw write size="$IOSIZE" )
if [[ -n $CHECK ]]; then
read_opts+=( check="$CHECK" )
write_opts+=( check="$CHECK" )
fi
if [[ -n $BRW_OFFSET ]]; then
read_opts+=( off="$BRW_OFFSET" )
write_opts+=( off="$BRW_OFFSET" )
fi
else
test_opts+=( brw "${MODE}" )
[[ -n $BRW_OFFSET ]] &&
test_opts+=( off="$BRW_OFFSET" )
[[ -n $CHECK ]] &&
test_opts+=( check="$CHECK" )
test_opts+=( size="$IOSIZE" )
fi
stat_opts=( --count "${COUNT}" --delay "${DELAY}" )
if [[ -n $STAT_OPTS ]]; then
if ${STAT_OPT_RATE}; then
stat_opts+=( --rate )
fi
if ${STAT_OPT_BW}; then
stat_opts+=( --bw )
fi
elif [[ $MODE == ping ]]; then
stat_opts+=( --rate )
else
stat_opts+=( --bw "${BW_UNITS}" )
fi
for g in ${STAT_GROUP}; do
stat_opts+=( "${g}" )
done
if [[ $MODE == rw ]]; then
echo "Test: ${read_opts[*]}"
echo "Test: ${write_opts[*]}"
echo "Stat: ${stat_opts[*]}"
lst add_test "${read_opts[@]}" || exit
lst add_test "${write_opts[@]}" || exit
else
echo "Test: ${test_opts[*]}"
echo "Stat: ${stat_opts[*]}"
lst add_test "${test_opts[@]}" || exit
fi
lst run "${BATCH_NAME}" || exit
LST_BATCH_STARTED=true
lst stat "${stat_opts[@]}"
if ${SHOW_ERRORS}; then
lst show_error --session servers clients
fi
exit