Viewing: lnet-types.h
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, 2017, Intel Corporation.
*/
/* This file is part of Lustre, http://www.lustre.org/ */
#ifndef __UAPI_LNET_TYPES_H__
#define __UAPI_LNET_TYPES_H__
#include <linux/types.h>
#include <linux/string.h>
#include <asm/byteorder.h>
#ifndef __KERNEL__
#include <stdbool.h>
#endif
/** \addtogroup lnet
* @{ */
#include <linux/lnet/lnet-idl.h>
/** \addtogroup lnet_addr
* @{ */
#define LNET_VERSION "0.7.0"
/** Portal reserved for LNet's own use.
* \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
*/
#define LNET_RESERVED_PORTAL 0
/** wildcard NID that matches any end-point address */
#define LNET_NID_ANY (~(lnet_nid_t) 0)
/** wildcard PID that matches any lnet_pid_t */
#define LNET_PID_ANY (~(lnet_pid_t) 0)
static inline int LNET_NID_IS_ANY(const struct lnet_nid *nid)
{
/* A NULL pointer can be used to mean "ANY" */
return !nid || nid->nid_type == 0xFF;
}
#define LNET_ANY_NID ((struct lnet_nid) \
{0xFF, 0xFF, ~0, {~0, ~0, ~0, ~0} })
#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
#define LNET_PID_LUSTRE 12345
/* how an LNET NID encodes net:address */
/** extract the address part of an lnet_nid_t */
static inline __u32 LNET_NIDADDR(lnet_nid_t nid)
{
return nid & 0xffffffff;
}
static inline __u32 LNET_NIDNET(lnet_nid_t nid)
{
return (nid >> 32) & 0xffffffff;
}
static inline __u32 LNET_NETNUM(__u32 net)
{
return net & 0xffff;
}
static inline __u32 LNET_NETTYP(__u32 net)
{
return (net >> 16) & 0xff;
}
static inline __u32 LNET_MKNET(__u32 type, __u32 num)
{
return (type << 16) | num;
}
static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr)
{
return (((__u64)net) << 32) | addr;
}
/** The lolnd NID (i.e. myself) */
#define LNET_NID_LO_0 LNET_MKNID(LNET_MKNET(LOLND, 0), 0)
#define LNET_NET_ANY LNET_NIDNET(LNET_NID_ANY)
#define LNET_ADDR_ANY LNET_NIDADDR(LNET_NID_ANY)
static inline bool nid_is_nid4(const struct lnet_nid *nid)
{
return NID_ADDR_BYTES(nid) == 4;
}
/**
* nid_addr_is_set - check if address portion of NID is set
* @nid: the NID to check
*
* This function attempts to distinguish between NIDs where:
* 1. Only the network is specified (nid_type and nid_num set, address unset)
* 2. A full NID is specified (network and address both set)
*
* LIMITATIONS AND KNOWN ISSUES:
* This function returns false for any NID whose address bytes are all zero.
* However, this creates ambiguity because NIDs like "0@kfi" or "0@gni" are
* valid, fully-specified NIDs where the address portion is legitimately zero.
*
* The function cannot distinguish between:
* - A network-only NID (e.g., "kfi0" with no specific address)
* - A fully-qualified NID with address=0 (e.g., "0@kfi")
*
* This is a fundamental limitation because struct lnet_nid has no explicit
* field to mark "address not specified" vs "address is zero". Both cases
* result in nid_addr[] being all zeros.
*
* Callers should be aware that if a user explicitly specifies an address of 0,
* this function will incorrectly return false.
*
* Return: true if any byte in the address portion is non-zero, false otherwise
*/
static inline bool nid_addr_is_set(const struct lnet_nid *nid)
{
__u8 *addr = (__u8 *)(&nid->nid_addr[0]);
int i;
for (i = 0; i < NID_ADDR_BYTES(nid); i++)
if (addr[i])
return true;
return false;
}
/* LOLND may not be defined yet, so we cannot use an inline */
#define nid_is_lo0(__nid) \
((__nid)->nid_type == LOLND && \
nid_is_nid4(__nid) && \
(__nid)->nid_num == 0 && \
(__nid)->nid_addr[0] == 0)
static inline __u32 LNET_NID_NET(const struct lnet_nid *nid)
{
if (LNET_NID_IS_ANY(nid))
return LNET_NET_ANY;
else
return LNET_MKNET(nid->nid_type, __be16_to_cpu(nid->nid_num));
}
static inline void lnet_nid4_to_nid(lnet_nid_t nid4, struct lnet_nid *nid)
{
if (nid4 == LNET_NID_ANY) {
*nid = LNET_ANY_NID;
return;
}
nid->nid_size = 0;
nid->nid_type = LNET_NETTYP(LNET_NIDNET(nid4));
nid->nid_num = __cpu_to_be16(LNET_NETNUM(LNET_NIDNET(nid4)));
nid->nid_addr[0] = __cpu_to_be32(LNET_NIDADDR(nid4));
nid->nid_addr[1] = nid->nid_addr[2] = nid->nid_addr[3] = 0;
}
static inline lnet_nid_t lnet_nid_to_nid4(const struct lnet_nid *nid)
{
if (LNET_NID_IS_ANY(nid))
return LNET_NID_ANY;
return LNET_MKNID(LNET_NID_NET(nid), __be32_to_cpu(nid->nid_addr[0]));
}
static inline int nid_same(const struct lnet_nid *n1,
const struct lnet_nid *n2)
{
return n1->nid_size == n2->nid_size &&
n1->nid_type == n2->nid_type &&
n1->nid_num == n2->nid_num &&
n1->nid_addr[0] == n2->nid_addr[0] &&
n1->nid_addr[1] == n2->nid_addr[1] &&
n1->nid_addr[2] == n2->nid_addr[2] &&
n1->nid_addr[3] == n2->nid_addr[3];
}
/* This can be used when we need to hash a nid */
static inline unsigned long nidhash(const struct lnet_nid *nid)
{
int i;
unsigned long hash = 0;
hash ^= LNET_NID_NET(nid);
for (i = 0; i < 4; i++)
hash ^= nid->nid_addr[i];
return hash;
}
struct lnet_counters_health {
__u32 lch_rst_alloc;
__u32 lch_resend_count;
__u32 lch_response_timeout_count;
__u32 lch_local_interrupt_count;
__u32 lch_local_dropped_count;
__u32 lch_local_aborted_count;
__u32 lch_local_no_route_count;
__u32 lch_local_timeout_count;
__u32 lch_local_error_count;
__u32 lch_remote_dropped_count;
__u32 lch_remote_error_count;
__u32 lch_remote_timeout_count;
__u32 lch_network_timeout_count;
__u32 lch_failed_resends;
__u32 lch_successful_resends;
};
struct lnet_counters {
struct lnet_counters_common lct_common;
struct lnet_counters_health lct_health;
};
/*
* This is a hard-coded limit on the number of interfaces supported by
* the interface bonding implemented by the ksocknal LND. It must be
* defined here because it is used in LNet data structures that are
* common to all LNDs.
*/
#define LNET_INTERFACES_NUM 16
/* The minimum number of interfaces per node supported by LNet. */
#define LNET_INTERFACES_MIN 16
/* The default - arbitrary - value of the lnet_max_interfaces tunable. */
#define LNET_INTERFACES_MAX_DEFAULT 200
/**
* Objects maintained by the LNet are accessed through handles. Handle types
* have names of the form lnet_handle_xx, where xx is one of the two letter
* object type codes ('md' for memory descriptor, and
* 'me' for match entry). Each type of object is given a unique handle type
* to enhance type checking.
*/
#define LNET_WIRE_HANDLE_COOKIE_NONE (~0ULL)
struct lnet_handle_md {
__u64 cookie;
};
/**
* Invalidate md handle \a h.
*/
static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h)
{
h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
}
/**
* Check whether handler \a h is invalid.
*
* \return 1 if handle is invalid, 0 if valid.
*/
static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h)
{
return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
}
/**
* Global process ID.
*/
struct lnet_process_id {
/** node id */
lnet_nid_t nid;
/** process id */
lnet_pid_t pid;
};
/**
* Global process ID - with large addresses
*/
struct lnet_processid {
/** node id */
struct lnet_nid nid;
/** process id */
lnet_pid_t pid;
};
static inline void
lnet_pid4_to_pid(struct lnet_process_id pid4, struct lnet_processid *pid)
{
pid->pid = pid4.pid;
lnet_nid4_to_nid(pid4.nid, &pid->nid);
}
static inline struct lnet_process_id
lnet_pid_to_pid4(struct lnet_processid *pid)
{
struct lnet_process_id ret;
ret.pid = pid->pid;
ret.nid = lnet_nid_to_nid4(&pid->nid);
return ret;
}
/** @} lnet_addr */
/** \addtogroup lnet_me
* @{ */
/**
* Specifies whether the match entry or memory descriptor should be unlinked
* automatically (LNET_UNLINK) or not (LNET_RETAIN).
*/
enum lnet_unlink {
LNET_RETAIN = 0,
LNET_UNLINK = 1,
};
/**
* Values of the type enum lnet_ins_pos are used to control where a new match
* entry is inserted. The value LNET_INS_BEFORE is used to insert the new
* entry before the current entry or before the head of the list. The value
* LNET_INS_AFTER is used to insert the new entry after the current entry
* or after the last item in the list.
*/
enum lnet_ins_pos {
/** insert ME before current position or head of the list */
LNET_INS_BEFORE = 0,
/** insert ME after current position or tail of the list */
LNET_INS_AFTER = 1,
/** attach ME at tail of local CPU partition ME list */
LNET_INS_LOCAL = 2,
};
/** @} lnet_me */
/** \addtogroup lnet_md
* @{ */
struct lnet_hdr_nid16 {
char _bytes[sizeof(struct lnet_hdr)];
} __attribute__((packed));
/**
* Event queue handler function type.
*
* The EQ handler runs for each event that is deposited into the EQ. The
* handler is supplied with a pointer to the event that triggered the
* handler invocation.
*
* The handler must not block, must be reentrant, and must not call any LNet
* API functions. It should return as quickly as possible.
*/
struct lnet_event;
typedef void (*lnet_handler_t)(struct lnet_event *event);
/**
* Defines the visible parts of a memory descriptor. Values of this type
* are used to initialize memory descriptors.
*/
struct lnet_md {
/**
* Specify the memory region associated with the memory descriptor.
* If the options field has:
* - LNET_MD_KIOV bit set: The start field points to the starting
* address of an array of struct bio_vec and the length field specifies
* the number of entries in the array. The length can't be bigger
* than LNET_MAX_IOV. The struct bio_vec is used to describe page-based
* fragments that are not necessarily mapped in virtal memory.
* - Otherwise: The memory region is contiguous. The start field
* specifies the starting address for the memory region and the
* length field specifies its length.
*
* When the memory region is fragmented, all fragments but the first
* one must start on page boundary, and all but the last must end on
* page boundary.
*/
void *umd_start;
unsigned int umd_length;
/**
* Specifies the maximum number of operations that can be performed
* on the memory descriptor. An operation is any action that could
* possibly generate an event. In the usual case, the threshold value
* is decremented for each operation on the MD. When the threshold
* drops to zero, the MD becomes inactive and does not respond to
* operations. A threshold value of LNET_MD_THRESH_INF indicates that
* there is no bound on the number of operations that may be applied
* to a MD.
*/
int umd_threshold;
/**
* Specifies the largest incoming request that the memory descriptor
* should respond to. When the unused portion of a MD (length -
* local offset) falls below this value, the MD becomes inactive and
* does not respond to further operations. This value is only used
* if the LNET_MD_MAX_SIZE option is set.
*/
int umd_max_size;
/**
* Specifies the behavior of the memory descriptor. A bitwise OR
* of the following values can be used:
* - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
* - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
* - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
* region is provided by the incoming request. By default, the
* offset is maintained locally. When maintained locally, the
* offset is incremented by the length of the request so that
* the next operation (PUT or GET) will access the next part of
* the memory region. Note that only one offset variable exists
* per memory descriptor. If both PUT and GET operations are
* performed on a memory descriptor, the offset is updated each time.
* - LNET_MD_TRUNCATE: The length provided in the incoming request can
* be reduced to match the memory available in the region (determined
* by subtracting the offset from the length of the memory region).
* By default, if the length in the incoming operation is greater
* than the amount of memory available, the operation is rejected.
* - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
* incoming PUT operations, even if requested. By default,
* acknowledgments are sent for PUT operations that request an
* acknowledgment. Acknowledgments are never sent for GET operations.
* The data sent in the REPLY serves as an implicit acknowledgment.
* - LNET_MD_KIOV: The start and length fields specify an array of
* struct bio_vec.
* - LNET_MD_MAX_SIZE: The max_size field is valid.
* - LNET_MD_BULK_HANDLE: The bulk_handle field is valid.
* - LNET_MD_TRACK_RESPONSE: Enable response tracking on this MD
* regardless of the value of the lnet_response_tracking param.
* - LNET_MD_NO_TRACK_RESPONSE: Disable response tracking on this MD
* regardless of the value of the lnet_response_tracking param.
* - LNET_MD_GNILND: Disable warning about exceeding LNET_MAX_IOV.
*
* Note:
* - LNET_MD_KIOV allows for a scatter/gather capability for memory
* descriptors.
* - When LNET_MD_MAX_SIZE is set, the total length of the memory
* region (i.e. sum of all fragment lengths) must not be less than
* \a max_size.
*/
unsigned int umd_options;
/**
* A user-specified value that is associated with the memory
* descriptor. The value does not need to be a pointer, but must fit
* in the space used by a pointer. This value is recorded in events
* associated with operations on this MD.
*/
void *umd_user_ptr;
/**
* The event handler used to log the operations performed on
* the memory region. If this argument is NULL operations
* performed on this memory descriptor are not logged.
*/
lnet_handler_t umd_handler;
/**
* The bulk MD handle which was registered to describe the buffers
* either to be used to transfer data to the peer or receive data
* from the peer. This allows LNet to properly determine the NUMA
* node on which the memory was allocated and use that to select the
* nearest local network interface. This value is only used
* if the LNET_MD_BULK_HANDLE option is set.
*/
struct lnet_handle_md umd_bulk_handle;
};
/* Max Transfer Unit (minimum supported everywhere).
* CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
* these limits are system wide and not interface-local. */
#define LNET_MTU_BITS 20
#define LNET_MTU (1u << LNET_MTU_BITS)
#define LNET_MTU_IOV_LIMIT (1u << (LNET_MTU_BITS - PAGE_SHIFT))
/**
* Options for the MD structure. See struct lnet_md::options.
*/
enum lnet_md_options {
LNET_MD_OP_PUT = 0x0001,
LNET_MD_OP_GET = 0x0002,
LNET_MD_MANAGE_REMOTE = 0x0004,
/* unused = 0x0008, */
LNET_MD_TRUNCATE = 0x0010,
LNET_MD_ACK_DISABLE = 0x0020,
/* LNET_MD_IOVEC = 0x0040 */
LNET_MD_MAX_SIZE = 0x0080,
LNET_MD_KIOV = 0x0100,
LNET_MD_BULK_HANDLE = 0x0200,
LNET_MD_TRACK_RESPONSE = 0x0400,
LNET_MD_NO_TRACK_RESPONSE = 0x0800,
LNET_MD_GNILND = 0x1000,
LNET_MD_GPU_ADDR = 0x2000,
};
/** Infinite threshold on MD operations. See struct lnet_md::threshold */
#define LNET_MD_THRESH_INF (-1)
/** @} lnet_md */
/** \addtogroup lnet_eq
* @{ */
/**
* Six types of events can be logged in an event queue.
*/
enum lnet_event_kind {
/** An incoming GET operation has completed on the MD. */
LNET_EVENT_GET = 1,
/**
* An incoming PUT operation has completed on the MD. The
* underlying layers will not alter the memory (on behalf of this
* operation) once this event has been logged.
*/
LNET_EVENT_PUT = 2,
/**
* A REPLY operation has completed. This event is logged after the
* data (if any) from the REPLY has been written into the MD.
*/
LNET_EVENT_REPLY = 3,
/** An acknowledgment has been received. */
LNET_EVENT_ACK = 4,
/**
* An outgoing send (PUT or GET) operation has completed. This event
* is logged after the entire buffer has been sent and it is safe for
* the caller to reuse the buffer.
*
* Note:
* - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
* happen even when the message has not yet been put out on wire.
* - It's unsafe to assume that in an outgoing GET operation
* the LNET_EVENT_SEND event would happen before the
* LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
* LNET_EVENT_ACK events in an outgoing PUT operation.
*/
LNET_EVENT_SEND = 5,
/**
* A MD has been unlinked. Note that LNetMDUnlink() does not
* necessarily trigger an LNET_EVENT_UNLINK event.
* \see LNetMDUnlink
*/
LNET_EVENT_UNLINK = 6,
};
#define LNET_SEQ_GT(a, b) (((signed long)((a) - (b))) > 0)
/**
* Information about an event on a MD.
*/
struct lnet_event {
/** The identifier (nid, pid) of the target. */
struct lnet_processid target;
/** The identifier (nid, pid) of the initiator. */
struct lnet_processid initiator;
/** The source NID on the initiator. */
struct lnet_processid source;
/**
* The NID of the immediate sender. If the request has been forwarded
* by routers, this is the NID of the last hop; otherwise it's the
* same as the source.
*/
struct lnet_nid sender;
/** Indicates the type of the event. */
enum lnet_event_kind type;
/** The portal table index specified in the request */
unsigned int pt_index;
/** A copy of the match bits specified in the request. */
__u64 match_bits;
/** The length (in bytes) specified in the request. */
unsigned int rlength;
/**
* The length (in bytes) of the data that was manipulated by the
* operation. For truncated operations, the manipulated length will be
* the number of bytes specified by the MD (possibly with an offset,
* see struct lnet_md). For all other operations, the manipulated length
* will be the length of the requested operation, i.e. rlength.
*/
unsigned int mlength;
/**
* The handle to the MD associated with the event. The handle may be
* invalid if the MD has been unlinked.
*/
struct lnet_handle_md md_handle;
/**
* A snapshot of relevant state of the MD immediately after the event
* has been processed.
*/
void *md_start;
void *md_user_ptr;
unsigned int md_options;
/**
* 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
* \see LNetPut
*/
__u64 hdr_data;
/**
* The message type, to ensure a handler for LNET_EVENT_SEND can
* distinguish between LNET_MSG_GET and LNET_MSG_PUT.
*/
__u32 msg_type;
/**
* Indicates the completion status of the operation. It's 0 for
* successful operations, otherwise it's an error code.
*/
int status;
/**
* Indicates whether the MD has been unlinked. Note that:
* - An event with unlinked set is the last event on the MD.
* - This field is also set for an explicit LNET_EVENT_UNLINK event.
* \see LNetMDUnlink
*/
int unlinked;
/**
* The displacement (in bytes) into the memory region that the
* operation used. The offset can be determined by the operation for
* a remote managed MD or by the local MD.
* \see struct lnet_md::options
*/
unsigned int offset;
/**
* The sequence number for this event. Sequence numbers are unique
* to each event.
*/
volatile unsigned long sequence;
};
/** \addtogroup lnet_data
* @{ */
/**
* Specify whether an acknowledgment should be sent by target when the PUT
* operation completes (i.e., when the data has been written to a MD of the
* target process).
*
* \see struct lnet_md::options for the discussion on LNET_MD_ACK_DISABLE
* by which acknowledgments can be disabled for a MD.
*/
enum lnet_ack_req {
/** Request an acknowledgment */
LNET_ACK_REQ,
/** Request that no acknowledgment should be generated. */
LNET_NOACK_REQ
};
/**
* UDSP action types. There are two available actions:
* 1. PRIORITY - set priority of matching LNet constructs
* 2. PREFERRED LIST - set preferred list of matching LNet constructs
*/
enum lnet_udsp_action_type {
EN_LNET_UDSP_ACTION_NONE = 0,
/** assign a priority to matching constructs */
EN_LNET_UDSP_ACTION_PRIORITY = 1,
/** assign a preferred list of NIDs to matching constructs */
EN_LNET_UDSP_ACTION_PREFERRED_LIST = 2,
};
/** @} lnet_data */
/** @} lnet */
#endif