Viewing: lnet-idl.h
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, 2017, Intel Corporation.
*/
/* This file is part of Lustre, http://www.lustre.org/ */
#ifndef __UAPI_LNET_IDL_H__
#define __UAPI_LNET_IDL_H__
#include <linux/types.h>
/************************************************************************
* Core LNet wire message format.
* These are sent in sender's byte order (i.e. receiver flips).
*/
/** Address of an end-point in an LNet network.
*
* A node can have multiple end-points and hence multiple addresses.
* An LNet network can be a simple network (e.g. tcp0) or a network of
* LNet networks connected by LNet routers. Therefore an end-point address
* has two parts: network ID, and address within a network.
* The most-significant-byte in this format is always 0. A larger value
* would imply a larger nid with a larger address.
*
* \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
*/
typedef __u64 lnet_nid_t;
/*
* Address of LNet end-point in extended form
*
* To support addresses larger than 32bits we have
* an extended nid which supports up to 128 bits
* of address and is extensible.
* If nid_size is 0, then the nid can be stored in an lnet_nid_t,
* and the first 8 bytes of the 'struct lnet_nid' are identical to
* the lnet_nid_t in big-endian format.
* If nid_type == 0xff, then all other fields should be ignored
* and this is an ANY wildcard address. In particular, the nid_size
* can be 0xff without making the address too big to fit.
*/
struct lnet_nid {
__u8 nid_size; /* total bytes - 8 */
__u8 nid_type;
__be16 nid_num;
__be32 nid_addr[4];
} __attribute__((packed));
#define NID_BYTES(nid) ((nid)->nid_size + 8)
#define NID_ADDR_BYTES(nid) ((nid)->nid_size + 4)
/**
* ID of a process in a node. Shortened as PID to distinguish from
* lnet_process_id, the global process ID.
*/
typedef __u32 lnet_pid_t;
/* Packed version of struct lnet_process_id to transfer via network */
struct lnet_process_id_packed {
lnet_nid_t nid;
lnet_pid_t pid; /* node id / process id */
} __attribute__((packed));
/* The wire handle's interface cookie only matches one network interface in
* one epoch (i.e. new cookie when the interface restarts or the node
* reboots). The object cookie only matches one object on that interface
* during that object's lifetime (i.e. no cookie re-use).
*/
struct lnet_handle_wire {
__u64 wh_interface_cookie;
__u64 wh_object_cookie;
} __attribute__((packed));
enum lnet_msg_type {
LNET_MSG_ACK = 0,
LNET_MSG_PUT,
LNET_MSG_GET,
LNET_MSG_REPLY,
LNET_MSG_HELLO,
};
/* The variant fields of the portals message header are aligned on an 8
* byte boundary in the message header. Note that all types used in these
* wire structs MUST be fixed size and the smaller types are placed at the
* end.
*/
struct lnet_ack {
struct lnet_handle_wire dst_wmd;
__u64 match_bits;
__u32 mlength;
} __attribute__((packed));
struct lnet_put {
struct lnet_handle_wire ack_wmd;
__u64 match_bits;
__u64 hdr_data;
__u32 ptl_index;
__u32 offset;
} __attribute__((packed));
struct lnet_get {
struct lnet_handle_wire return_wmd;
__u64 match_bits;
__u32 ptl_index;
__u32 src_offset;
__u32 sink_length;
} __attribute__((packed));
struct lnet_reply {
struct lnet_handle_wire dst_wmd;
} __attribute__((packed));
struct lnet_hello {
__u64 incarnation;
__u32 type;
} __attribute__((packed));
union lnet_cmd_hdr {
struct lnet_ack ack;
struct lnet_put put;
struct lnet_get get;
struct lnet_reply reply;
struct lnet_hello hello;
} __attribute__((packed));
/* This is used for message headers that lnet code is manipulating.
* All fields before the union are in host-byte-order.
*/
struct lnet_hdr {
struct lnet_nid dest_nid;
struct lnet_nid src_nid;
lnet_pid_t dest_pid;
lnet_pid_t src_pid;
__u32 type; /* enum lnet_msg_type */
__u32 payload_length; /* payload data to follow */
/*<------__u64 aligned------->*/
union lnet_cmd_hdr msg;
} __attribute__((packed));
/* This is used to support conversion between an lnet_hdr and
* the content of a network message.
*/
struct _lnet_hdr_nid4 {
lnet_nid_t dest_nid;
lnet_nid_t src_nid;
lnet_pid_t dest_pid;
lnet_pid_t src_pid;
__u32 type; /* enum lnet_msg_type */
__u32 payload_length; /* payload data to follow */
/*<------__u64 aligned------->*/
union lnet_cmd_hdr msg;
} __attribute__((packed));
/* This is stored in a network message buffer. Content cannot be accessed
* without converting to an lnet_hdr.
*/
struct lnet_hdr_nid4 {
char _bytes[sizeof(struct _lnet_hdr_nid4)];
} __attribute__((packed));
/* A HELLO message contains a magic number and protocol version
* code in the header's dest_nid, the peer's NID in the src_nid, and
* LNET_MSG_HELLO in the type field. All other common fields are zero
* (including payload_size; i.e. no payload).
* This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
* running the same protocol and to find out its NID. These LNDs should
* exchange HELLO messages when a connection is first established. Individual
* LNDs can put whatever else they fancy in lnet_hdr::msg.
*/
struct lnet_magicversion {
__u32 magic; /* LNET_PROTO_TCP_MAGIC */
__u16 version_major; /* increment on incompatible change */
__u16 version_minor; /* increment on compatible change */
} __attribute__((packed));
/* PROTO MAGIC for LNDs */
#define LNET_PROTO_IB_MAGIC 0x0be91b91
#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */
#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
#define LNET_PROTO_KFI_MAGIC 0xdeadbeef
#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100
#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */
#define LNET_PROTO_EFA_MAGIC 0x2be092be /* 2B or 9 2B */
/* Placeholder for a future "unified" protocol across all LNDs */
/* Current LNDs that receive a request with this magic will respond
* with a "stub" reply using their current protocol */
#define LNET_PROTO_MAGIC 0x45726963 /* ! */
#define LNET_PROTO_TCP_VERSION_MAJOR 1
#define LNET_PROTO_TCP_VERSION_MINOR 0
/* Acceptor connection request */
struct lnet_acceptor_connreq {
__u32 acr_magic; /* LNET_PROTO_ACCEPTOR_MAGIC */
__u32 acr_version; /* protocol version */
__u64 acr_nid; /* target NID */
} __attribute__((packed));
#define LNET_PROTO_ACCEPTOR_VERSION 1
struct lnet_acceptor_connreq_v2 {
__u32 acr_magic; /* LNET_PROTO_ACCEPTOR_MAGIC */
__u32 acr_version; /* protocol version - 2 */
struct lnet_nid acr_nid; /* target NID */
} __attribute__((packed));
/* For use with 16-byte addresses */
#define LNET_PROTO_ACCEPTOR_VERSION_16 2
struct lnet_counters_common {
__u32 lcc_msgs_alloc;
__u32 lcc_msgs_max;
__u32 lcc_errors;
__u32 lcc_send_count;
__u32 lcc_recv_count;
__u32 lcc_route_count;
__u32 lcc_drop_count;
__u64 lcc_send_length;
__u64 lcc_recv_length;
__u64 lcc_route_length;
__u64 lcc_drop_length;
} __attribute__((packed));
#define LNET_NI_STATUS_UP 0x15aac0de
#define LNET_NI_STATUS_DOWN 0xdeadface
#define LNET_NI_STATUS_INVALID 0x00000000
struct lnet_ni_status {
lnet_nid_t ns_nid;
__u32 ns_status;
__u32 ns_msg_size; /* represents ping buffer size if message
* contains large NID addresses.
*/
} __attribute__((packed));
/* When this appears in lnet_ping_info, it will be large
* enough to hold whatever nid is present, rounded up
* to a multiple of 4 bytes.
* NOTE: all users MUST check ns_nid.nid_size is usable.
*/
struct lnet_ni_large_status {
__u32 ns_status;
struct lnet_nid ns_nid;
} __attribute__((packed));
#define LNET_MD_BUFFER_SZ 32
struct lnet_nid_md_entry {
lnet_nid_t nid;
__u8 buffer[LNET_MD_BUFFER_SZ];
} __attribute__((packed));
struct lnet_nid_metadata {
__u32 num_nid_mappings;
struct lnet_nid_md_entry nid_mappings[];
} __attribute__((packed));
/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
* of old LNet, so there shouldn't be any compatibility issue
*/
#define LNET_PING_FEAT_INVAL (0) /* no feature */
#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */
#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */
#define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */
#define LNET_PING_FEAT_MULTI_RAIL (1 << 3) /* Multi-Rail aware */
#define LNET_PING_FEAT_DISCOVERY (1 << 4) /* Supports Discovery */
#define LNET_PING_FEAT_LARGE_ADDR (1 << 5) /* Large addr nids present */
#define LNET_PING_FEAT_PRIMARY_LARGE (1 << 6) /* Primary is first Large addr */
#define LNET_PING_FEAT_METADATA (1 << 7) /* LND defined NID metadata */
/*
* All ping feature bits fit to hit the wire.
* In lnet_assert_wire_constants() this is compared against its open-coded
* value, and in lnet_ping_target_update() it is used to verify that no
* unknown bits have been set.
* New feature bits can be added, just be aware that this does change the
* over-the-wire protocol.
*/
#define LNET_PING_FEAT_BITS (LNET_PING_FEAT_BASE | \
LNET_PING_FEAT_NI_STATUS | \
LNET_PING_FEAT_RTE_DISABLED | \
LNET_PING_FEAT_MULTI_RAIL | \
LNET_PING_FEAT_DISCOVERY | \
LNET_PING_FEAT_LARGE_ADDR | \
LNET_PING_FEAT_PRIMARY_LARGE | \
LNET_PING_FEAT_METADATA)
/* NOTE:
* The first address in pi_ni *must* be the loop-back nid: LNET_NID_LO_0
* The second address must be the primary nid for the host unless
* LNET_PING_FEAT_PRIMARY_LARGE is set, then the first large address
* is the preferred primary. However nodes that do not recognise that
* flag will quietly ignore it.
*/
struct lnet_ping_info {
__u32 pi_magic;
__u32 pi_features;
lnet_pid_t pi_pid;
__u32 pi_nnis; /* number of nid4 entries */
struct lnet_ni_status pi_ni[];
} __attribute__((packed));
#define LNET_PING_INFO_HDR_SIZE \
offsetof(struct lnet_ping_info, pi_ni[0])
#define LNET_PING_INFO_MIN_SIZE \
offsetof(struct lnet_ping_info, pi_ni[LNET_INTERFACES_MIN])
#define LNET_PING_INFO_LONI(PINFO) ((PINFO)->pi_ni[0].ns_nid)
#define LNET_PING_INFO_SEQNO(PINFO) ((PINFO)->pi_ni[0].ns_status)
/* If LNET_PING_FEAT_LARGE_ADDR set, pi_nnis is the number of nid4 entries
* and pi_ni[0].ns_msg_size is the total number of bytes, including header and
* lnet_ni_large_status entries which follow the lnet_ni_status entries.
* This must be a multiple of 4.
*/
#define lnet_ping_info_size(pinfo) \
(((pinfo)->pi_features & LNET_PING_FEAT_LARGE_ADDR) \
? ((pinfo)->pi_ni[0].ns_msg_size & ~3) \
: offsetof(struct lnet_ping_info, pi_ni[(pinfo)->pi_nnis]))
#endif