Viewing: ptlrpc.py

#!/usr/bin/env python

"""
Copyright 2015-2019 Cray Inc.  All Rights Reserved
Dumps the Lustre RPC queues for all ptlrpcd_XX threads.
"""

from pykdump.API import *
import sys
import argparse
import os

import lustrelib as ll
from crashlib.input import toint

from traceback import print_exc

description_short = "Displays the RPC queues of the Lustre ptlrpcd daemons"

def print_separator(count):
    s = ""
    for idx in range(count):
        s += "="
    print(s)

def print_title(title):
    if title:
        print("\n" + title)
        print("%-14s %-6s %-19s %-18s %-19s %-4s %-14s %-4s %-22s %-19s" \
               % ("thread", "pid", "ptlrpc_request", "xid", "nid", "opc",
                  "phase:flags", "R:W", "sent/deadline", "ptlrpc_body"))
    print_separator(148)

def enum(**enums):
    return type('Enum', (), enums)

REQ_Q = enum(rq_list=1, replay_list=2, set_chain=3, ctx_chain=4,
             unreplied_list=5, timed_list=5, exp_list=6, hist_list=7)

RQ_LIST_LNKS = {
    REQ_Q.rq_list:        ['struct ptlrpc_request', 'rq_list', 'rq_type'],
    REQ_Q.replay_list:    ['struct ptlrpc_request', 'rq_replay_list', 'rq_type'],
    REQ_Q.set_chain:      ['struct ptlrpc_cli_req', 'cr_set_chain', 'rq_cli'],
    REQ_Q.ctx_chain:      ['struct ptlrpc_cli_req', 'cr_ctx_chain', 'rq_cli'],
    REQ_Q.unreplied_list: ['struct ptlrpc_cli_req', 'cr_unreplied_list', 'rq_cli'],
    REQ_Q.timed_list:     ['struct ptlrpc_srv_req', 'sr_timed_list', 'rq_srv'],
    REQ_Q.exp_list:       ['struct ptlrpc_srv_req', 'sr_exp_list', 'rq_srv'],
    REQ_Q.hist_list:      ['struct ptlrpc_srv_req', 'sr_hist_list', 'rq_srv']
}

STRUCT_IDX = 0
MEMBER_IDX = 1
UNION_IDX = 2

def size_round(val):
    return ((val + 7) & (~0x7))

LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3

def get_ptlrpc_body(req):
    msg = req.rq_reqmsg
#    msg = req.rq_repmsg
    if not msg or msg == None:
        return None

    if msg.lm_magic != LUSTRE_MSG_MAGIC_V2:
        return None

    bufcount = msg.lm_bufcount
    if bufcount < 1:
        return None

    buflen = msg.lm_buflens[0]
    if buflen < getSizeOf('struct ptlrpc_body_v2'):
        return None

    offset = member_offset('struct lustre_msg_v2', 'lm_buflens')

    buflen_size = getSizeOf("unsigned int")
    offset += buflen_size * bufcount
    offset = size_round(offset)
    addr = Addr(msg) + offset
    if addr == 0:
        print("addr")
        return None
    return readSU('struct ptlrpc_body_v2', addr)

RQ_PHASE_NEW = 0xebc0de00
RQ_PHASE_RPC = 0xebc0de01
RQ_PHASE_BULK = 0xebc0de02
RQ_PHASE_INTERPRET = 0xebc0de03
RQ_PHASE_COMPLETE = 0xebc0de04
RQ_PHASE_UNREG_RPC =  0xebc0de05
RQ_PHASE_UNREG_BULK = 0xebc0de06
RQ_PHASE_UNDEFINED = 0xebc0de07

PHASES = {
       RQ_PHASE_NEW: "NEW",
       RQ_PHASE_RPC: "RPC",
       RQ_PHASE_BULK: "BULK",
       RQ_PHASE_INTERPRET: "NtrPrt",
       RQ_PHASE_COMPLETE: "COMP",
       RQ_PHASE_UNREG_RPC: "UNREG",
       RQ_PHASE_UNREG_BULK: "UNBULK",
       RQ_PHASE_UNDEFINED: "UNDEF"
   }

FLAG_LEGEND = "\nFlag Legend:\n\n" + \
         "I - rq_intr\tR - rq_replied\t\tE - rq_err\te - rq_net_err\tX - rq_timedout\tS - rq_resend\t\tT - rq_restart\n" + \
         "P - rq_replay\tN - rq_no_resend\tW - rq_waiting\tC - rq_wait\tH - rq_hp\tM - rq_committed\tq - rq_req_unlinked\tu - rq_reply_unlinked\n"

def get_phase_flags(req):
    phase = req.rq_phase
    phasestr = PHASES.get(phase & 0xffffffff, "?%d" % phase)
    return "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" % \
           (phasestr,
           "I" if req.rq_intr else "",
           "R" if req.rq_replied else "",
           "E" if req.rq_err else "",
           "e" if req.rq_net_err else "",
           "X" if req.rq_timedout else "",
           "S" if req.rq_resend else "",
           "T" if req.rq_restart else "",
           "P" if req.rq_replay else "",
           "N" if req.rq_no_resend else "",
           "W" if req.rq_waiting else "",
           "C" if req.rq_wait_ctx else "",
           "H" if req.rq_hp else "",
           "M" if req.rq_committed else "",
           "q" if req.rq_req_unlinked else "",
           "u" if req.rq_reply_unlinked else "")

LP_POISON = 0x5a5a5a5a5a5a5a5a

def print_one_request(sthread, req):
    pb = get_ptlrpc_body(req)
    status = -1
    opc = -1
    pbaddr = -1
    if pb:
        status = pb.pb_status
        opc = pb.pb_opc
        pbaddr = Addr(pb)

    imp_invalid = 1
    nid = "LNET_NID_ANY"
    obd_name = "Invalid Import"
    if req.rq_import and req.rq_import != 0xffffffffffffffff and \
       req.rq_import != LP_POISON:
        imp_invalid = req.rq_import.imp_invalid
        obd_name = ll.obd2str(req.rq_import.imp_obd)

    if not imp_invalid and req.rq_import.imp_connection:
        nid = ll.nid2str(req.rq_import.imp_connection.c_peer.nid)
    brw = "%1d:%1d" % (req.rq_bulk_read, req.rq_bulk_write)
    rq_sent_dl = "%d/%d" % (req.rq_sent, req.rq_deadline)
    print("%-14s %-6s 0x%-17x %-18d %-19s %-4d %-14s %-4s %-22s 0x%-17x" % \
            (sthread,
            status,
            Addr(req),
            req.rq_xid,
            obd_name,
            opc,
            get_phase_flags(req),
            brw,
            rq_sent_dl,
            pbaddr))

def print_request_list(sthread, lhdr, loffset):
    try:
        for reqlnk in readStructNext(lhdr, 'next'):
            if reqlnk.next == Addr(lhdr):
                break
            req = readSU('struct ptlrpc_request', reqlnk.next-loffset)
            print_one_request(sthread, req)

    except Exception as e:
        print_exc()
        return 1
    return 0

# Find offset from start of ptlrpc_request struct of link field
# Adjusts for links that are contained in embedded union
def get_linkfld_offset(lfld):
    container = RQ_LIST_LNKS[lfld][STRUCT_IDX]
    linkfld   = RQ_LIST_LNKS[lfld][MEMBER_IDX]
    req_union = RQ_LIST_LNKS[lfld][UNION_IDX]

    off1 = member_offset('struct ptlrpc_request', req_union)
    off2 = member_offset(container, linkfld)
    return off1 + off2

def foreach_ptlrpcd_ctl(callback, *args):
    pinfo_rpcds = readSymbol('ptlrpcds')
    pinfo_count = readSymbol('ptlrpcds_num')

    for idx in range(pinfo_count):
        ptlrpcd = pinfo_rpcds[idx]
        for jdx in range(ptlrpcd.pd_nthreads):
            pd = ptlrpcd.pd_threads[jdx]
            callback(pd, *args)
    pd = readSymbol('ptlrpcd_rcv')
    callback(pd, *args)

def get_daemon_listhdrs(pd, sent_rpcs, pend_rpcs):
    sent_rpcs.append([pd.pc_name, pd.pc_set.set_requests])
    pend_rpcs.append([pd.pc_name, pd.pc_set.set_new_requests])

def dump_list_of_lists(rpc_list, loffset):
    for qinfo in rpc_list:
        sthread, lhdr = qinfo
        print_request_list(sthread, lhdr, loffset)

def dump_daemon_rpclists():
    sent_rpcs = []
    pend_rpcs = []

    foreach_ptlrpcd_ctl(get_daemon_listhdrs, sent_rpcs, pend_rpcs)
    offset = get_linkfld_offset(REQ_Q.set_chain)

    print_title("Sent RPCS: ptlrpc_request_set.set_requests->")
    dump_list_of_lists(sent_rpcs, offset)

    print_title("Pending RPCS: ptlrpc_request_set.set_new_requests->")
    dump_list_of_lists(pend_rpcs, offset)
    print_title('')

def print_overview_entry(pd):
    s = "%s:" % pd.pc_name
    print("%-14s  ptlrpcd_ctl 0x%x   ptlrpc_request_set 0x%x" % \
        (s, Addr(pd), pd.pc_set))

def dump_overview():
    foreach_ptlrpcd_ctl(print_overview_entry)

def print_pcset_stats(pd):
    if pd.pc_set.set_new_count.counter != 0 or \
        pd.pc_set.set_remaining.counter != 0:
        s = "%s:" %pd.pc_name
        print("%-13s 0x%-18x %-4d %-4d %-6d" % \
            (s, Addr(pd.pc_set),
            pd.pc_set.set_refcount.counter,
            pd.pc_set.set_new_count.counter,
            pd.pc_set.set_remaining.counter))

def dump_pcsets():
    print('%-14s %-19s %-4s %-4s %-6s' % \
        ("thread", "ptlrpc_request_set", "ref", "new", "remain"))
    print_separator(52)
    foreach_ptlrpcd_ctl(print_pcset_stats)

def dump_one_rpc(addr):
    print_title("Request")
    req = readSU('struct ptlrpc_request', addr)
    print_one_request('', req)

def dump_one_rpclist(addr, link_fld):
    lhdr = readSU('struct list_head', addr)
    d = vars(REQ_Q)
    loffset = get_linkfld_offset(d[link_fld])

    print_title("Request list at %s" % lhdr)
    print_request_list('', lhdr, loffset)

def dump_rpcs_cmd(args):
    if args.oflag:
        dump_overview()
        return
    if args.sflag:
        dump_pcsets()
        return
    if args.rpc_addr:
        if args.link_fld:
            dump_one_rpclist(args.rpc_addr[0], args.link_fld)
        else:
            # dump_one_rpc(args.rpc_addr[0])
            dump_one_rpc(args.rpc_addr)
        return
    dump_daemon_rpclists()

if __name__ == "__main__":
#    usage = "$(prog)s [-o] [-s] [-l link_field] [addr]\n" + \
    description = "" +\
        "Displays lists of Lustre RPC requests. If no arguments are \n" +\
        "specified, all rpcs in the sent and pending queues of the \n" +\
        "ptlrpcd daemons are printed. If an address is specified, it \n" +\
        "must be a pointer to either a ptlrpc_request or a list_head \n" +\
        "struct. If the addr is a list_head, then a link_field must \n" +\
        "also be provided. The link_field identifies the member of \n" +\
        "the ptlrpc_request struct used to link the list together."

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=description, epilog=FLAG_LEGEND)
    parser.add_argument("-o", dest="oflag", action="store_true",
        help="print overview of ptlrpcd_XX threads with ptlrpcd_ctl " + \
            "structs and the associated pc_set field")
    parser.add_argument("-s", dest="sflag", action="store_true",
        help="print rpc counts per ptlrpc_request_set")
    parser.add_argument("-l", dest="link_fld", default="",
        choices=['rq_list', 'replay_list', 'set_chain', 'ctx_chain',
                 'unreplied_list', 'timed_list', 'exp_list', 'hist_list'],
        help="name of link field in ptlrpc_request for list headed by addr")
    parser.add_argument("rpc_addr", nargs="?", default=[], type=toint,
        help="address of either single ptlrpc_request or list_head; list_head requires a -l argument")
    args = parser.parse_args()

    dump_rpcs_cmd(args)