Viewing: lustre_disk.h
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
*
* Lustre disk format definitions.
*
* Author: Nathan Rutman <nathan.rutman@seagate.com>
*/
#ifndef _UAPI_LUSTRE_DISK_H
#define _UAPI_LUSTRE_DISK_H
/** \defgroup disk disk
*
* @{
*/
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/lnet/lnet-types.h> /* for lnet_nid_t */
#include <linux/lustre/lustre_param.h> /* for LDD_PARAM_LEN */
/****************** on-disk files ********************/
#define MDT_LOGS_DIR "LOGS" /* COMPAT_146 */
#define MOUNT_CONFIGS_DIR "CONFIGS"
#define CONFIGS_FILE "mountdata"
/** Persistent mount data are stored on the disk in this file. */
#define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/"CONFIGS_FILE
#define LAST_RCVD "last_rcvd"
#define REPLY_DATA "reply_data"
#define LOV_OBJID "lov_objid"
#define LOV_OBJSEQ "lov_objseq"
#define HEALTH_CHECK "health_check"
#define CAPA_KEYS "capa_keys"
#define CHANGELOG_USERS "changelog_users"
#define MGS_NIDTBL_DIR "NIDTBL_VERSIONS"
#define QMT_DIR "quota_master"
#define QSD_DIR "quota_slave"
#define QSD_DIR_DT "quota_slave_dt"
#define QSD_DIR_MD "quota_slave_md"
#define HSM_ACTIONS "hsm_actions"
#define LFSCK_DIR "LFSCK"
#define LFSCK_BOOKMARK "lfsck_bookmark"
#define LFSCK_LAYOUT "lfsck_layout"
#define LFSCK_NAMESPACE "lfsck_namespace"
#define REMOTE_PARENT_DIR "REMOTE_PARENT_DIR"
#define INDEX_BACKUP_DIR "index_backup"
#define MDT_ORPHAN_DIR "PENDING"
/* On-disk configuration file. In host-endian order. */
struct lustre_disk_data {
__u32 ldd_magic;
__u32 ldd_feature_compat; /* compatible feature flags */
__u32 ldd_feature_rocompat; /* read-only compatible feature flags */
__u32 ldd_feature_incompat; /* incompatible feature flags */
__u32 ldd_config_ver; /* config rewrite count - not used */
__u32 ldd_flags; /* LDD_SV_TYPE */
__u32 ldd_svindex; /* server index (0001), must match
* svname
*/
__u32 ldd_mount_type; /* target fs type LDD_MT_* */
char ldd_fsname[64]; /* filesystem this server is part of,
* MTI_NAME_MAXLEN
*/
char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
__u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
char ldd_userdata[1024 - 200]; /* arbitrary user string '200' */
__u8 ldd_padding[4096 - 1024]; /* 1024 */
char ldd_mount_opts[4096]; /* target fs mount opts '4096' */
char ldd_params[LDD_PARAM_LEN];/* key=value pairs '8192' */
};
/****************** persistent mount data *********************/
#define LDD_F_SV_TYPE_MDT 0x0001
#define LDD_F_SV_TYPE_OST 0x0002
#define LDD_F_SV_TYPE_MGS 0x0004
#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \
LDD_F_SV_TYPE_OST | \
LDD_F_SV_TYPE_MGS)
#define LDD_F_SV_ALL 0x0008
/** need an index assignment */
#define LDD_F_NEED_INDEX 0x0010
/** never registered */
#define LDD_F_VIRGIN 0x0020
/** update the config logs for this server */
#define LDD_F_UPDATE 0x0040
/** rewrite the LDD */
#define LDD_F_REWRITE_LDD 0x0080
/** regenerate config logs for this fs or server */
#define LDD_F_WRITECONF 0x0100
/** COMPAT_14 */
/*#define LDD_F_UPGRADE14 0x0200 deprecated since 1.8 */
/** process as lctl conf_param */
#define LDD_F_PARAM 0x0400
/** all nodes are specified as service nodes */
#define LDD_F_NO_PRIMNODE 0x1000
/** IR enable flag */
#define LDD_F_IR_CAPABLE 0x2000
/** the MGS refused to register the target. */
#define LDD_F_ERROR 0x4000
/** process at lctl set_param */
#define LDD_F_PARAM2 0x8000
/** the target shouldn't use local logs */
#define LDD_F_NO_LOCAL_LOGS 0x10000
#define LDD_MAGIC 0x1dd00001
#define XATTR_TARGET_RENAME "trusted.rename_tgt"
enum ldd_mount_type {
LDD_MT_EXT3 = 0,
LDD_MT_LDISKFS = 1,
LDD_MT_REISERFS = 3,
LDD_MT_LDISKFS2 = 4,
LDD_MT_ZFS = 5,
LDD_MT_WBCFS = 6,
LDD_MT_LAST
};
/****************** last_rcvd file *********************/
#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
#define LR_SERVER_SIZE 512
#define LR_CLIENT_START 8192
#define LR_CLIENT_SIZE 128
#if LR_CLIENT_START < LR_SERVER_SIZE
#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
#endif
/*
* Data stored per server at the head of the last_rcvd file. In le32 order.
*/
struct lr_server_data {
__u8 lsd_uuid[40]; /* server UUID */
__u64 lsd_last_transno; /* last completed transaction ID */
__u64 lsd_compat14; /* reserved - compat with old last_rcvd */
__u64 lsd_mount_count; /* incarnation number */
__u32 lsd_feature_compat; /* compatible feature flags */
__u32 lsd_feature_rocompat;/* read-only compatible feature flags */
__u32 lsd_feature_incompat;/* incompatible feature flags */
__u32 lsd_server_size; /* size of server data area */
__u32 lsd_client_start; /* start of per-client data area */
__u16 lsd_client_size; /* size of per-client data area */
__u16 lsd_subdir_count; /* number of subdirectories for objects */
__u64 lsd_catalog_oid; /* recovery catalog object id */
__u32 lsd_catalog_ogen; /* recovery catalog inode generation */
__u8 lsd_peeruuid[40]; /* UUID of MDS associated with this OST */
__u32 lsd_osd_index; /* index number of OST in LOV */
__u32 lsd_max_clients; /* max number of clients ever connected */
__u32 lsd_start_epoch; /* VBR: start epoch from last boot */
/** transaction values since lsd_trans_table_time */
__u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
/** start point of transno table below */
__u32 lsd_trans_table_time; /* time of first slot in table above */
__u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
__u8 lsd_padding[LR_SERVER_SIZE - 288];
};
/* Data stored per client in the last_rcvd file. In le32 order. */
struct lsd_client_data {
__u8 lcd_uuid[40]; /* client UUID */
__u64 lcd_last_transno; /* last completed transaction ID */
__u64 lcd_last_xid; /* xid for the last transaction */
__u32 lcd_last_result; /* result from last RPC */
__u32 lcd_last_data; /* per-op data (disposition for
* open &c.)
*/
/* for MDS_CLOSE requests */
__u64 lcd_last_close_transno; /* last completed transaction ID */
__u64 lcd_last_close_xid; /* xid for the last transaction */
__u32 lcd_last_close_result; /* result from last RPC */
__u32 lcd_last_close_data; /* per-op data */
/* VBR: last versions */
__u64 lcd_pre_versions[4];
__u32 lcd_last_epoch;
/* generation counter of client slot in last_rcvd */
__u32 lcd_generation;
__u8 lcd_padding[LR_CLIENT_SIZE - 128];
};
/* Data stored in each slot of the reply_data file.
*
* The lrd_client_gen field is assigned with lcd_generation value
* to allow identify which client the reply data belongs to.
*/
struct lsd_reply_data_v1 {
__u64 lrd_transno; /* transaction number */
__u64 lrd_xid; /* transmission id */
__u64 lrd_data; /* per-operation data */
__u32 lrd_result; /* request result */
__u32 lrd_client_gen; /* client generation */
};
struct lsd_reply_data_v2 {
__u64 lrd_transno; /* transaction number */
__u64 lrd_xid; /* transmission id */
__u64 lrd_data; /* per-operation data */
__u32 lrd_result; /* request result */
__u32 lrd_client_gen; /* client generation */
__u32 lrd_batch_idx; /* sub request index in the batched RPC */
__u32 lrd_padding[7]; /* unused fields, total size is 8X __u64 */
};
#define lsd_reply_data lsd_reply_data_v2
/* Header of the reply_data file */
#define LRH_MAGIC_V1 0xbdabda01
#define LRH_MAGIC_V2 0xbdabda02
#define LRH_MAGIC LRH_MAGIC_V1
/* Don't change the header size for compatibility. */
struct lsd_reply_header {
__u32 lrh_magic;
__u32 lrh_header_size;
__u32 lrh_reply_size;
__u8 lrh_pad[sizeof(struct lsd_reply_data_v1) - 12];
};
/****************** nodemap *********************/
enum nodemap_idx_type {
NODEMAP_EMPTY_IDX = 0, /* index created with blank record */
NODEMAP_CLUSTER_IDX = 1, /* a nodemap cluster of nodes */
NODEMAP_RANGE_IDX = 2, /* nid range assigned to a nm cluster */
NODEMAP_UIDMAP_IDX = 3, /* uid map assigned to a nm cluster */
NODEMAP_GIDMAP_IDX = 4, /* gid map assigned to a nm cluster */
NODEMAP_PROJIDMAP_IDX = 5, /* projid map assigned to nm cluster */
NODEMAP_NID_MASK_IDX = 6, /* large NID setup for a nm cluster */
NODEMAP_GLOBAL_IDX = 15, /* stores nodemap activation status */
};
/* This is needed for struct nodemap_clustre_rec. Please don't move
* to lustre_idl.h which will break user land builds.
*/
#define LUSTRE_NODEMAP_NAME_LENGTH 16
#define LUSTRE_NODEMAP_GUESS "?"
/* lu_nodemap flags */
enum nm_flag_bits {
NM_FL_ALLOW_ROOT_ACCESS = 0x1,
NM_FL_TRUST_CLIENT_IDS = 0x2,
NM_FL_DENY_UNKNOWN = 0x4,
NM_FL_MAP_UID = 0x8,
NM_FL_MAP_GID = 0x10,
NM_FL_ENABLE_AUDIT = 0x20,
NM_FL_FORBID_ENCRYPT = 0x40,
NM_FL_MAP_PROJID = 0x80,
};
enum nm_flag2_bits {
NM_FL2_READONLY_MOUNT = 0x1,
NM_FL2_DENY_MOUNT = 0x2,
NM_FL2_FILESET_USE_IAM = 0x4,
NM_FL2_GSS_IDENTIFY = 0x8,
};
/* Nodemap records, uses 32 byte record length.
* New nodemap config records can be added into NODEMAP_CLUSTER_IDX
* with a new nk_cluster_subid value, as long as the records are
* kept at 32 bytes in size. New global config records can be added
* into NODEMAP_GLOBAL_IDX with a new nk_global_subid. This avoids
* breaking compatibility. Do not change the record size. If a
* new ID type or range is needed, a new IDX type should be used.
*/
struct nodemap_cluster_rec {
char ncr_name[LUSTRE_NODEMAP_NAME_LENGTH + 1];
enum nm_flag_bits ncr_flags:8;
enum nm_flag2_bits ncr_flags2:8;
__u8 ncr_padding1; /* zeroed since 2.16 */
__u32 ncr_squash_projid;
__u32 ncr_squash_uid;
__u32 ncr_squash_gid;
};
enum nm_range_type_bits {
NM_RANGE_FL_REG = 0x0,
NM_RANGE_FL_BAN = 0x1,
};
/* lnet_nid_t is 8 bytes */
struct nodemap_range_rec {
lnet_nid_t nrr_start_nid;
lnet_nid_t nrr_end_nid;
__u64 nrr_padding1; /* zeroed since 2.16 */
__u64 nrr_padding2; /* zeroed since 2.16 */
};
struct nodemap_range2_rec {
struct lnet_nid nrr_nid_prefix;
__u32 nrr_padding1; /* padding may be used for nid_prefix */
__u32 nrr_padding2; /* if nrr_nid_prefix.nid_size > 12 */
__u16 nrr_padding3; /* zeroed since 2.16 */
__u8 nrr_padding4; /* zeroed since 2.16 */
__u8 nrr_netmask;
};
struct nodemap_id_rec {
__u32 nir_id_fs;
__u32 nir_padding1; /* zeroed since 2.16 */
__u64 nir_padding2; /* zeroed since 2.16 */
__u64 nir_padding3; /* zeroed since 2.16 */
__u64 nir_padding4; /* zeroed since 2.16 */
};
struct nodemap_global_rec {
__u8 ngr_is_active;
__u8 ngr_padding1; /* zeroed since 2.16 */
__u16 ngr_padding2; /* zeroed since 2.16 */
__u32 ngr_padding3; /* zeroed since 2.16 */
__u64 ngr_padding4; /* zeroed since 2.16 */
__u64 ngr_padding5; /* zeroed since 2.16 */
__u64 ngr_padding6; /* zeroed since 2.16 */
};
struct nodemap_cluster_roles_rec {
__u64 ncrr_roles; /* enum nodemap_rbac_roles */
__u64 ncrr_privs; /* enum nodemap_raise_privs */
__u64 ncrr_roles_raise; /* enum nodemap_rbac_roles */
__u64 ncrr_unused1; /* zeroed since 2.16 (always) */
};
struct nodemap_offset_rec {
__u32 nor_start_uid;
__u32 nor_limit_uid;
__u32 nor_start_gid;
__u32 nor_limit_gid;
__u32 nor_start_projid;
__u32 nor_limit_projid;
__u32 nor_padding1;
__u32 nor_padding2;
};
/* fileset fragment length for each nodemap record: 28 bytes for fragments */
#define LUSTRE_NODEMAP_FILESET_FRAGMENT_SIZE \
(sizeof(struct nodemap_cluster_rec) - (2 * sizeof(__u16)))
/* fileset subid range to support a PATH_MAX characters fileset and header */
#define LUSTRE_NODEMAP_FILESET_SUBID_RANGE 256
/* max number of filesets per nodemap */
#define LUSTRE_NODEMAP_FILESET_NUM_MAX 256
enum nm_fileset_flag_bits {
NM_FS_FL_READONLY = 0x1,
};
struct nodemap_fileset_header_rec {
enum nm_fileset_flag_bits nfhr_flags:8;
__u8 nfr_padding1; /* zeroed since 2.16 (always) */
__u16 nfr_padding2; /* zeroed since 2.16 (always) */
__u32 nfr_padding3; /* zeroed since 2.16 (always) */
__u64 nfr_padding4; /* zeroed since 2.16 (always) */
__u64 nfr_padding5; /* zeroed since 2.16 (always) */
__u64 nfr_padding6; /* zeroed since 2.16 (always) */
};
struct nodemap_fileset_rec {
/* 28 bytes for fileset path fragment */
char nfr_path_fragment[LUSTRE_NODEMAP_FILESET_FRAGMENT_SIZE];
__u16 nfr_fragment_id; /* fileset fragment id */
__u16 nfr_padding1; /* zeroed since 2.16 (always) */
};
struct nodemap_user_capabilities_rec {
__u64 nucr_caps;
__u8 nucr_type; /* enum nodemap_cap_type */
__u8 nucr_padding1; /* zeroed since 2.16.51 (always) */
__u16 nucr_padding2; /* zeroed since 2.16.51 (always) */
__u32 nucr_padding3; /* zeroed since 2.16.51 (always) */
__u64 nucr_padding4; /* zeroed since 2.16.51 (always) */
__u64 nucr_padding5; /* zeroed since 2.16.51 (always) */
};
union nodemap_rec {
struct nodemap_cluster_rec ncr;
struct nodemap_range_rec nrr;
struct nodemap_range2_rec nrr2;
struct nodemap_id_rec nir;
struct nodemap_global_rec ngr;
struct nodemap_cluster_roles_rec ncrr;
struct nodemap_offset_rec nor;
struct nodemap_fileset_header_rec nfhr;
struct nodemap_fileset_rec nfr;
struct nodemap_user_capabilities_rec nucr;
};
/* sub-keys for records of type NODEMAP_CLUSTER_IDX */
enum nodemap_cluster_rec_subid {
NODEMAP_CLUSTER_REC = 0, /* nodemap_cluster_rec */
NODEMAP_CLUSTER_ROLES = 1, /* nodemap_cluster_roles_rec */
NODEMAP_CLUSTER_OFFSET = 2, /* UID/GID/PROJID offset for a nm cluster */
NODEMAP_CLUSTER_CAPS = 3, /* User caps, nodemap_capabilities_rec */
/*
* A fileset may consist of up to 256 consecutive subids. Each consists
* of a header (nodemap_fileset_header_rec) and up to 255 fragments
* (nodemap_fileset_rec).
*/
NODEMAP_FILESET = 512,
/*
* Depending on its length, its fragments may use several subids
* in the range of 512 to 66,047 (assuming max 256 filesets). The first
* subid of each fileset range is its header.
*/
};
/* first 4 bits of the nodemap_id is the index type */
struct nodemap_key {
__u32 nk_nodemap_id;
union {
__u32 nk_cluster_subid;
/* first 4 bits of nk_range_id are range type */
__u32 nk_range_id;
__u32 nk_id_client;
__u32 nk_unused;
};
};
#define NM_TYPE_MASK 0x0FFFFFFF
#define NM_TYPE_SHIFT 28
/* file structure used for saving OI scrub bookmark state for restart */
#define OSD_OI_FID_OID_BITS_MAX 10
#define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX)
#define SCRUB_OI_BITMAP_SIZE (OSD_OI_FID_NR_MAX >> 3)
#define SCRUB_MAGIC_V1 0x4C5FD252
#define SCRUB_MAGIC_V2 0x4C5FE253
enum scrub_flags {
/* OI files have been recreated, OI mappings should be re-inserted. */
SF_RECREATED = 0x0000000000000001ULL,
/* OI files are invalid, should be rebuild ASAP */
SF_INCONSISTENT = 0x0000000000000002ULL,
/* OI scrub is triggered automatically. */
SF_AUTO = 0x0000000000000004ULL,
/* The device is upgraded from 1.8 format. */
SF_UPGRADE = 0x0000000000000008ULL,
};
enum scrub_status {
/* The scrub file is new created, for new MDT, upgrading from old disk,
* or re-creating the scrub file manually.
*/
SS_INIT = 0,
/* The scrub is checking/repairing the OI files. */
SS_SCANNING = 1,
/* The scrub checked/repaired the OI files successfully. */
SS_COMPLETED = 2,
/* The scrub failed to check/repair the OI files. */
SS_FAILED = 3,
/* The scrub is stopped manually, the OI files may be inconsistent. */
SS_STOPPED = 4,
/* The scrub is paused automatically when umount. */
SS_PAUSED = 5,
/* The scrub crashed during the scanning, should be restarted. */
SS_CRASHED = 6,
};
enum scrub_param {
/* Exit when fail. */
SP_FAILOUT = 0x0001,
/* Check only without repairing. */
SP_DRYRUN = 0x0002,
};
#ifdef __KERNEL__
#define sfid_t guid_t
#else
#define sfid_t uuid_le
#endif
struct scrub_file {
sfid_t sf_uuid; /* 128-bit uuid for volume */
__u64 sf_flags; /* see 'enum scrub_flags' */
__u32 sf_magic; /* SCRUB_MAGIC_V1/V2 */
__u16 sf_status; /* see 'enum scrub_status' */
__u16 sf_param; /* see 'enum scrub_param' */
__s64 sf_time_last_complete; /* wallclock of last scrub finish */
__s64 sf_time_latest_start; /* wallclock of last scrub run */
__s64 sf_time_last_checkpoint; /* wallclock of last checkpoint */
__u64 sf_pos_latest_start; /* OID of last scrub start */
__u64 sf_pos_last_checkpoint; /* OID of last scrub checkpoint */
__u64 sf_pos_first_inconsistent; /* OID first object to update */
__u64 sf_items_checked; /* number objects checked */
__u64 sf_items_updated; /* number objects updated */
__u64 sf_items_failed; /* number objects unrepairable */
__u64 sf_items_updated_prior; /* num objects fixed before scan */
__u64 sf_items_noscrub; /* number of objects skipped due to
* LDISKFS_STATE_LUSTRE_NOSCRUB
*/
__u64 sf_items_igif; /* number of IGIF(no FID) objects */
__u32 sf_run_time; /* scrub runtime in seconds */
__u32 sf_success_count; /* number of completed runs */
__u16 sf_oi_count; /* number of OI files */
__u16 sf_internal_flags; /* flags to keep after reset, see
* 'enum scrub_internal_flags'
*/
__u32 sf_reserved_1;
__u64 sf_reserved_2[16];
__u8 sf_oi_bitmap[SCRUB_OI_BITMAP_SIZE]; /* OI files recreated */
};
/** @} disk */
#endif /* _UAPI_LUSTRE_DISK_H */