Viewing: ptlrpc.py
#!/usr/bin/env python
"""
Copyright 2015-2019 Cray Inc. All Rights Reserved
Dumps the Lustre RPC queues for all ptlrpcd_XX threads.
"""
from pykdump.API import *
import sys
import argparse
import os
import lustrelib as ll
from crashlib.input import toint
from traceback import print_exc
description_short = "Displays the RPC queues of the Lustre ptlrpcd daemons"
def print_separator(count):
s = ""
for idx in range(count):
s += "="
print(s)
def print_title(title):
if title:
print("\n" + title)
print("%-14s %-6s %-19s %-18s %-19s %-4s %-14s %-4s %-22s %-19s" \
% ("thread", "pid", "ptlrpc_request", "xid", "nid", "opc",
"phase:flags", "R:W", "sent/deadline", "ptlrpc_body"))
print_separator(148)
def enum(**enums):
return type('Enum', (), enums)
REQ_Q = enum(rq_list=1, replay_list=2, set_chain=3, ctx_chain=4,
unreplied_list=5, timed_list=5, exp_list=6, hist_list=7)
RQ_LIST_LNKS = {
REQ_Q.rq_list: ['struct ptlrpc_request', 'rq_list', 'rq_type'],
REQ_Q.replay_list: ['struct ptlrpc_request', 'rq_replay_list', 'rq_type'],
REQ_Q.set_chain: ['struct ptlrpc_cli_req', 'cr_set_chain', 'rq_cli'],
REQ_Q.ctx_chain: ['struct ptlrpc_cli_req', 'cr_ctx_chain', 'rq_cli'],
REQ_Q.unreplied_list: ['struct ptlrpc_cli_req', 'cr_unreplied_list', 'rq_cli'],
REQ_Q.timed_list: ['struct ptlrpc_srv_req', 'sr_timed_list', 'rq_srv'],
REQ_Q.exp_list: ['struct ptlrpc_srv_req', 'sr_exp_list', 'rq_srv'],
REQ_Q.hist_list: ['struct ptlrpc_srv_req', 'sr_hist_list', 'rq_srv']
}
STRUCT_IDX = 0
MEMBER_IDX = 1
UNION_IDX = 2
def size_round(val):
return ((val + 7) & (~0x7))
LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3
def get_ptlrpc_body(req):
msg = req.rq_reqmsg
# msg = req.rq_repmsg
if not msg or msg == None:
return None
if msg.lm_magic != LUSTRE_MSG_MAGIC_V2:
return None
bufcount = msg.lm_bufcount
if bufcount < 1:
return None
buflen = msg.lm_buflens[0]
if buflen < getSizeOf('struct ptlrpc_body_v2'):
return None
offset = member_offset('struct lustre_msg_v2', 'lm_buflens')
buflen_size = getSizeOf("unsigned int")
offset += buflen_size * bufcount
offset = size_round(offset)
addr = Addr(msg) + offset
if addr == 0:
print("addr")
return None
return readSU('struct ptlrpc_body_v2', addr)
RQ_PHASE_NEW = 0xebc0de00
RQ_PHASE_RPC = 0xebc0de01
RQ_PHASE_BULK = 0xebc0de02
RQ_PHASE_INTERPRET = 0xebc0de03
RQ_PHASE_COMPLETE = 0xebc0de04
RQ_PHASE_UNREG_RPC = 0xebc0de05
RQ_PHASE_UNREG_BULK = 0xebc0de06
RQ_PHASE_UNDEFINED = 0xebc0de07
PHASES = {
RQ_PHASE_NEW: "NEW",
RQ_PHASE_RPC: "RPC",
RQ_PHASE_BULK: "BULK",
RQ_PHASE_INTERPRET: "NtrPrt",
RQ_PHASE_COMPLETE: "COMP",
RQ_PHASE_UNREG_RPC: "UNREG",
RQ_PHASE_UNREG_BULK: "UNBULK",
RQ_PHASE_UNDEFINED: "UNDEF"
}
FLAG_LEGEND = "\nFlag Legend:\n\n" + \
"I - rq_intr\tR - rq_replied\t\tE - rq_err\te - rq_net_err\tX - rq_timedout\tS - rq_resend\t\tT - rq_restart\n" + \
"P - rq_replay\tN - rq_no_resend\tW - rq_waiting\tC - rq_wait\tH - rq_hp\tM - rq_committed\tq - rq_req_unlinked\tu - rq_reply_unlinked\n"
def get_phase_flags(req):
phase = req.rq_phase
phasestr = PHASES.get(phase & 0xffffffff, "?%d" % phase)
return "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" % \
(phasestr,
"I" if req.rq_intr else "",
"R" if req.rq_replied else "",
"E" if req.rq_err else "",
"e" if req.rq_net_err else "",
"X" if req.rq_timedout else "",
"S" if req.rq_resend else "",
"T" if req.rq_restart else "",
"P" if req.rq_replay else "",
"N" if req.rq_no_resend else "",
"W" if req.rq_waiting else "",
"C" if req.rq_wait_ctx else "",
"H" if req.rq_hp else "",
"M" if req.rq_committed else "",
"q" if req.rq_req_unlinked else "",
"u" if req.rq_reply_unlinked else "")
LP_POISON = 0x5a5a5a5a5a5a5a5a
def print_one_request(sthread, req):
pb = get_ptlrpc_body(req)
status = -1
opc = -1
pbaddr = -1
if pb:
status = pb.pb_status
opc = pb.pb_opc
pbaddr = Addr(pb)
imp_invalid = 1
nid = "LNET_NID_ANY"
obd_name = "Invalid Import"
if req.rq_import and req.rq_import != 0xffffffffffffffff and \
req.rq_import != LP_POISON:
imp_invalid = req.rq_import.imp_invalid
obd_name = ll.obd2str(req.rq_import.imp_obd)
if not imp_invalid and req.rq_import.imp_connection:
nid = ll.nid2str(req.rq_import.imp_connection.c_peer.nid)
brw = "%1d:%1d" % (req.rq_bulk_read, req.rq_bulk_write)
rq_sent_dl = "%d/%d" % (req.rq_sent, req.rq_deadline)
print("%-14s %-6s 0x%-17x %-18d %-19s %-4d %-14s %-4s %-22s 0x%-17x" % \
(sthread,
status,
Addr(req),
req.rq_xid,
obd_name,
opc,
get_phase_flags(req),
brw,
rq_sent_dl,
pbaddr))
def print_request_list(sthread, lhdr, loffset):
try:
for reqlnk in readStructNext(lhdr, 'next'):
if reqlnk.next == Addr(lhdr):
break
req = readSU('struct ptlrpc_request', reqlnk.next-loffset)
print_one_request(sthread, req)
except Exception as e:
print_exc()
return 1
return 0
# Find offset from start of ptlrpc_request struct of link field
# Adjusts for links that are contained in embedded union
def get_linkfld_offset(lfld):
container = RQ_LIST_LNKS[lfld][STRUCT_IDX]
linkfld = RQ_LIST_LNKS[lfld][MEMBER_IDX]
req_union = RQ_LIST_LNKS[lfld][UNION_IDX]
off1 = member_offset('struct ptlrpc_request', req_union)
off2 = member_offset(container, linkfld)
return off1 + off2
def foreach_ptlrpcd_ctl(callback, *args):
pinfo_rpcds = readSymbol('ptlrpcds')
pinfo_count = readSymbol('ptlrpcds_num')
for idx in range(pinfo_count):
ptlrpcd = pinfo_rpcds[idx]
for jdx in range(ptlrpcd.pd_nthreads):
pd = ptlrpcd.pd_threads[jdx]
callback(pd, *args)
pd = readSymbol('ptlrpcd_rcv')
callback(pd, *args)
def get_daemon_listhdrs(pd, sent_rpcs, pend_rpcs):
sent_rpcs.append([pd.pc_name, pd.pc_set.set_requests])
pend_rpcs.append([pd.pc_name, pd.pc_set.set_new_requests])
def dump_list_of_lists(rpc_list, loffset):
for qinfo in rpc_list:
sthread, lhdr = qinfo
print_request_list(sthread, lhdr, loffset)
def dump_daemon_rpclists():
sent_rpcs = []
pend_rpcs = []
foreach_ptlrpcd_ctl(get_daemon_listhdrs, sent_rpcs, pend_rpcs)
offset = get_linkfld_offset(REQ_Q.set_chain)
print_title("Sent RPCS: ptlrpc_request_set.set_requests->")
dump_list_of_lists(sent_rpcs, offset)
print_title("Pending RPCS: ptlrpc_request_set.set_new_requests->")
dump_list_of_lists(pend_rpcs, offset)
print_title('')
def print_overview_entry(pd):
s = "%s:" % pd.pc_name
print("%-14s ptlrpcd_ctl 0x%x ptlrpc_request_set 0x%x" % \
(s, Addr(pd), pd.pc_set))
def dump_overview():
foreach_ptlrpcd_ctl(print_overview_entry)
def print_pcset_stats(pd):
if pd.pc_set.set_new_count.counter != 0 or \
pd.pc_set.set_remaining.counter != 0:
s = "%s:" %pd.pc_name
print("%-13s 0x%-18x %-4d %-4d %-6d" % \
(s, Addr(pd.pc_set),
pd.pc_set.set_refcount.counter,
pd.pc_set.set_new_count.counter,
pd.pc_set.set_remaining.counter))
def dump_pcsets():
print('%-14s %-19s %-4s %-4s %-6s' % \
("thread", "ptlrpc_request_set", "ref", "new", "remain"))
print_separator(52)
foreach_ptlrpcd_ctl(print_pcset_stats)
def dump_one_rpc(addr):
print_title("Request")
req = readSU('struct ptlrpc_request', addr)
print_one_request('', req)
def dump_one_rpclist(addr, link_fld):
lhdr = readSU('struct list_head', addr)
d = vars(REQ_Q)
loffset = get_linkfld_offset(d[link_fld])
print_title("Request list at %s" % lhdr)
print_request_list('', lhdr, loffset)
def dump_rpcs_cmd(args):
if args.oflag:
dump_overview()
return
if args.sflag:
dump_pcsets()
return
if args.rpc_addr:
if args.link_fld:
dump_one_rpclist(args.rpc_addr[0], args.link_fld)
else:
# dump_one_rpc(args.rpc_addr[0])
dump_one_rpc(args.rpc_addr)
return
dump_daemon_rpclists()
if __name__ == "__main__":
# usage = "$(prog)s [-o] [-s] [-l link_field] [addr]\n" + \
description = "" +\
"Displays lists of Lustre RPC requests. If no arguments are \n" +\
"specified, all rpcs in the sent and pending queues of the \n" +\
"ptlrpcd daemons are printed. If an address is specified, it \n" +\
"must be a pointer to either a ptlrpc_request or a list_head \n" +\
"struct. If the addr is a list_head, then a link_field must \n" +\
"also be provided. The link_field identifies the member of \n" +\
"the ptlrpc_request struct used to link the list together."
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=description, epilog=FLAG_LEGEND)
parser.add_argument("-o", dest="oflag", action="store_true",
help="print overview of ptlrpcd_XX threads with ptlrpcd_ctl " + \
"structs and the associated pc_set field")
parser.add_argument("-s", dest="sflag", action="store_true",
help="print rpc counts per ptlrpc_request_set")
parser.add_argument("-l", dest="link_fld", default="",
choices=['rq_list', 'replay_list', 'set_chain', 'ctx_chain',
'unreplied_list', 'timed_list', 'exp_list', 'hist_list'],
help="name of link field in ptlrpc_request for list headed by addr")
parser.add_argument("rpc_addr", nargs="?", default=[], type=toint,
help="address of either single ptlrpc_request or list_head; list_head requires a -l argument")
args = parser.parse_args()
dump_rpcs_cmd(args)