Viewing: ost-survey
#!/usr/bin/perl
# SPDX-License-Identifier: GPL-2.0
#
# This file is part of Lustre, http://www.lustre.org/
#
# This script is to be run on a client machine and will test all the
# OSTs to determine which is the fastest and slowest
# The current test method is as follows:
# -Create a directory for each OST
# -Use 'lfs setstripe' to set the Lustre striping such that IO goes to
# only one OST
# -Use 'dd' to write and read a file of a specified size
# -Compute the average, and Standard deviation
# -Find the slowest OST for read and write
# -Find the Fastest OST for read and write
#
# GLOBALS
$pname = $0; # to hold program name
$OSTS = 0; # Number of OSTS we will loop over
$BSIZE = 1024 * 1024; # Size of i/o block
$MBSIZE = 1024 * 1024; # One MB
$MNT = "/mnt/lustre"; # Location of Lustre file system
$CACHESZ = 0; # max_cached_mb parameter
$FSIZE = 30; # Number of i/o blocks
# Usage
sub usage () {
print "Usage: $pname [-s <size>] [-h] <Lustre_Path>\n";
print "[OPTIONS]\n";
print " -s: size of test file in MB (default $FSIZE MB)\n";
print " -h: To display this help\n";
print "example : $pname /mnt/lustre\n";
exit 1;
}
# ost_count subroutine sets global variable $OST with number of OSTs
# Also fills 1 for active OST indexes in ACTIVEOST_INX array.
sub ost_count () {
$OSTS = `lctl get_param -n lov.$FSNAME-clilov-*.activeobd`;
if ( $? ) {
die "Read lov.*-clilov-$FSNAME.activeobd error: $?\n";
}
print "Number of Active OST devices : $OSTS";
$numost = `lctl get_param -n lov.$FSNAME-clilov-*.numobd`;
if ( $? ) {
die "Read lov.$FSNAME-clilov-*.numobd error: $?\n";
}
if ( $numost != $OSTS ) {
printf "Number of non active ots(s): %d\n", ( $numost - $OSTS );
$OSTS = $numost;
}
$targets = `lctl get_param -n lov.$FSNAME-clilov-*.target_obd`;
if ( $? ) {
die "Read lov.$FSNAME-clilov-*.target_obd error: $?\n";
}
my $count = 0;
foreach $line (split /\n/, $targets) {
my ($ost_num, $ost_name, $ost_status) = split(/\s+/, $line);
if ( $ost_status eq "ACTIVE" ) {
$ACTIVEOST_INX[$count] = 1;
}
$count++;
}
}
# cache_mod subroutine sets global variable $CACHESZ and sets max_cached_mb
# to 256 * system page size in MB.
sub cache_mod () {
use POSIX qw(sysconf _SC_PAGESIZE);
$CACHESZ = `lctl get_param -n llite.$FSNAME-*.max_cached_mb`;
if ( $? ) {
die "Read llite.$FSNAME-*.max_cached_mb error: $?\n";
}
$CACHESZ = `echo "$CACHESZ" | grep max_cached_mb | awk '{print \$2}'`;
my $pagesize = sysconf(_SC_PAGESIZE);
$req_cache_mb = ($pagesize * 256)/ $MBSIZE;
system("lctl set_param -n llite.$FSNAME-*.max_cached_mb=$req_cache_mb");
if ( $? ) {
die "Set llite.$FSNAME-*.max_cached_mb error: $?\n";
}
}
# cache_return subroutine returns max_cached_mb to original value
sub cache_return () {
system("lctl set_param -n llite.$FSNAME-*.max_cached_mb=$CACHESZ");
}
# make_dummy subroutine creates a dummy file that will be used for read operation.
sub make_dummy () {
my $SIZE = $_[0];
my $tempfile = $_[1];
system ("dd of=$tempfile if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null");
}
# run_test subroutine actually writes and reads data to/from dummy file
# and computes corresponding time taken for read and write operation and
# byte transfer for the both operations.
# This subroutine also fill corresponding global arrays with above information.
sub run_test () {
my $SIZE = $_[0];
my $INX=$_[1];
my $ACTION=$_[2];
my $tempfile = $_[3];
if ( !(-f $tempfile) && $ACTION eq "read" ) {
&make_dummy($SIZE, $tempfile);
}
system("sync");
my ($ts0, $tu0) = gettimeofday();
$tu0 = $ts0 + ($tu0 / 1000000);
if ( $ACTION eq "write" ) {
system("dd of=$tempfile if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null");
} elsif ( $ACTION eq "read" ) {
system("dd if=$tempfile of=/dev/null count=$SIZE bs=$BSIZE 2> /dev/null");
} else {
print "Action is neither read nor write\n";
exit 1;
}
system("sync");
my ($ts1, $tu1) = gettimeofday();
$tu1 = $ts1 + ($tu1/1000000);
my $tdelta = $tu1 - $tu0;
my $delta = ($SIZE * $BSIZE / ( $tu1 - $tu0 )) / (1024 * 1024);
if ( $ACTION eq "write" ) {
$wTime[$INX] = $tdelta;
$wMBs[$INX] = $delta;
} else {
$rTime[$INX] = $tdelta;
$rMBs[$INX] = $delta;
}
}
# calculate subroutine compute following things and displays them.
# - Finds worst and best OST for both read and write operations.
# - Compute average of read and write rate from all OSTS
# - Compute Standard deviation for read and write form all OST's
sub calculate () {
my ($op, $MBs);
$op = $_[0];
@MBs = @_[1..$#_];
my $count = 0;
my $total = 0;
my $avg = 0;
my $sd = 0;
my $best_OST = 0;
my $worst_OST = 0;
my $max_mb = 0;
my $min_mb = 999999999;
while ($count < $OSTS ) {
if ( $ACTIVEOST_INX[$count] ) {
$total = $total + $MBs[$count];
if ($max_mb < $MBs[$count] ) {
$max_mb = $MBs[$count];
$best_OST = $count;
}
if ($min_mb > $MBs[$count] ) {
$min_mb = $MBs[$count];
$worst_OST = $count;
}
}
$count++;
}
$avg = $total/$OSTS;
$total = 0;
$count = 0;
while ($count < $OSTS ) {
if ( $ACTIVEOST_INX[$count] ) {
$total = $total + ($MBs[$count] - $avg) * ($MBs[$count] - $avg);
}
$count++;
}
$sd = sqrt($total/$OSTS);
printf "Worst %s OST indx: %d speed: %f\n", $op, $worst_OST, $min_mb;
printf "Best %s OST indx: %d speed: %f\n", $op, $best_OST, $max_mb;
printf "%s Average: %f +/- %f MB/s\n", $op, $avg, $sd;
}
# output_all_data subroutine displays speed and time information
# for all OST's for both read and write operations.
sub output_all_data () {
my $count = 0;
print "Ost# Read(MB/s) Write(MB/s) Read-time Write-time\n";
print "----------------------------------------------------\n";
while ( $count < $OSTS ) {
if ( $ACTIVEOST_INX[$count] ) {
printf "%d %.3f %.3f %.3f %.3f\n",$count,
$rMBs[$count], $wMBs[$count], $rTime[$count], $wTime[$count];
} else {
printf "%d Inactive ost\n",$count;
}
$count = $count + 1;
}
}
@rTime = ();
@wTime = ();
@rMBs = ();
@wMBs = ();
@ACTIVEOST_INX;
# Locals
my $filename = "";
my $dirpath = "";
my $flag = 0;
# Check number of arguments
my $numargs = $#ARGV + 1;
usage() if $numargs < 1;
# Command line parameter parsing
use Getopt::Std;
getopts('s:h') or usage();
usage() if $opt_h;
$FSIZE = $opt_s if $opt_s;
my $i = 0;
foreach (@ARGV) {
$MNT = $_;
$i++;
if ($i > 1) {
print "ERROR: extra argument $_\n";
usage();
}
}
#Check for Time::HiRes module
my $CheckTimeHiRes = "require Time::HiRes";
eval ($CheckTimeHiRes) or die "You need to install the perl-Time-HiRes package to use this script\n";
my $LoadTimeHiRes = "use Time::HiRes qw(gettimeofday)";
eval ($LoadTimeHiRes);
use POSIX qw(strftime);
my $time_v = time();
my $hostname = `lctl list_nids | head -1` or die "You need to install lctl to use this script\n";
chop($hostname);
print "$pname: ", strftime("%D", localtime($time_v));
print " OST speed survey on $MNT from $hostname\n";
# Get the file system name.
$FSNAME = `lfs getname $MNT`;
if ( $? ) {
die "`lfs getname $MNT` error: $?\n";
}
$FSNAME =~ /(.*)-/;
$FSNAME = $1;
# get OST count
ost_count ();
# Modify local cache
cache_mod ();
$dirpath = "$MNT/ost_survey_tmp";
eval { mkpath($dirpath) };
if ($@) {
print "Couldn't create $dirpath: $@";
exit 1;
}
use File::Path;
$CNT = 0;
while ($CNT < $OSTS) {
$filename = "$dirpath/file$CNT";
if ( $ACTIVEOST_INX[$CNT] ) {
# set stripe for OST number $CNT
system ("lfs setstripe -S 0 -i $CNT -c 1 $filename");
# Perform write for OST number $CNT
&run_test($FSIZE,$CNT,"write",$filename);
$flag++;
}
$CNT = $CNT + 1;
}
$CNT = 0;
while ($CNT < $OSTS) {
$filename = "$dirpath/file$CNT";
if ( $ACTIVEOST_INX[$CNT] ) {
# Perform read for OST number $CNT
&run_test($FSIZE,$CNT,"read",$filename);
$flag++;
}
$CNT = $CNT + 1;
}
# if read or write performed on any OST then display information.
if ( $flag ) {
if ( $flag > 1 ) {
&calculate("Read",@rMBs);
&calculate("Write",@wMBs);
}
output_all_data ();
} else {
print "There is no active OST's found\n";
}
# Return cache to original size
cache_return ();
eval { rmtree($dirpath) };
if ($@) {
print "Warning: Couldn't remove $dirpath: $@";
}