Viewing: lustre_user.h
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2010, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
*
* Lustre public user-space interface definitions.
*/
#ifndef _LUSTRE_USER_H
#define _LUSTRE_USER_H
/* lustreuser */
#ifndef __KERNEL__
# define __USE_ISOC99 1
# include <stdbool.h>
# include <stdio.h> /* snprintf() */
# include <stdlib.h> /* abs() */
# include <errno.h>
# include <sys/stat.h>
# define __USE_GNU 1
# define __USE_XOPEN2K8 1
# define FILEID_LUSTRE 0x97 /* for name_to_handle_at() (and llapi_fd2fid()) */
# define U32_MAX UINT32_MAX
#endif /* !__KERNEL__ */
#include <linux/fs.h>
#include <linux/limits.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/quota.h>
#include <linux/types.h>
#include <linux/unistd.h>
#include <linux/lustre/lustre_fiemap.h>
#include <linux/lustre/lustre_ver.h>
#if defined(__cplusplus)
extern "C" {
#endif
#ifdef __STRICT_ANSI__
#define typeof __typeof__
#endif
/*
* This is a temporary solution of adding quota type.
* Should be removed as soon as system header is updated.
*/
#undef LL_MAXQUOTAS
#define LL_MAXQUOTAS 3
#undef INITQFNAMES
#define INITQFNAMES { \
"user", /* USRQUOTA */ \
"group", /* GRPQUOTA */ \
"project", /* PRJQUOTA */ \
"undefined", \
}
#ifndef USRQUOTA
#define USRQUOTA 0
#endif
#ifndef GRPQUOTA
#define GRPQUOTA 1
#endif
#ifndef PRJQUOTA
#define PRJQUOTA 2
#endif
/*
* We need to always use 64bit version because the structure
* is shared across entire cluster where 32bit and 64bit machines
* are co-existing.
*/
#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
typedef struct stat64 lstat_t;
#define lstat_f lstat64
#define fstat_f fstat64
#define fstatat_f fstatat64
#else
typedef struct stat lstat_t;
#define lstat_f lstat
#define fstat_f fstat
#define fstatat_f fstatat
#endif
#ifndef DECLARE_FLEX_ARRAY
#ifdef __cplusplus
/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */
#define DECLARE_FLEX_ARRAY(T, member) T member[0]
#else
/**
* DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
*
* @TYPE: The type of each flexible array element
* @NAME: The name of the flexible array member
*
* In order to have a flexible array member in a union or alone in a
* struct, it needs to be wrapped in an anonymous struct with at least 1
* named member, but that member can be empty.
*/
#define DECLARE_FLEX_ARRAY(TYPE, NAME) \
struct { \
struct { } __empty_ ## NAME; \
TYPE NAME[]; \
}
#endif
#endif /* DECLARE_FLEX_ARRAY */
#ifndef STATX_BASIC_STATS
/*
* Timestamp structure for the timestamps in struct statx.
*
* tv_sec holds the number of seconds before (negative) or after (positive)
* 00:00:00 1st January 1970 UTC.
*
* tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
*
* __reserved is held in case we need a yet finer resolution.
*/
struct statx_timestamp {
__s64 tv_sec;
__u32 tv_nsec;
__s32 __reserved;
};
/*
* Structures for the extended file attribute retrieval system call
* (statx()).
*
* The caller passes a mask of what they're specifically interested in as a
* parameter to statx(). What statx() actually got will be indicated in
* st_mask upon return.
*
* For each bit in the mask argument:
*
* - if the datum is not supported:
*
* - the bit will be cleared, and
*
* - the datum will be set to an appropriate fabricated value if one is
* available (eg. CIFS can take a default uid and gid), otherwise
*
* - the field will be cleared;
*
* - otherwise, if explicitly requested:
*
* - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
* set or if the datum is considered out of date, and
*
* - the field will be filled in and the bit will be set;
*
* - otherwise, if not requested, but available in approximate form without any
* effort, it will be filled in anyway, and the bit will be set upon return
* (it might not be up to date, however, and no attempt will be made to
* synchronise the internal state first);
*
* - otherwise the field and the bit will be cleared before returning.
*
* Items in STATX_BASIC_STATS may be marked unavailable on return, but they
* will have values installed for compatibility purposes so that stat() and
* co. can be emulated in userspace.
*/
struct statx {
/* 0x00 */
__u32 stx_mask; /* What results were written [uncond] */
__u32 stx_blksize; /* Preferred general I/O size [uncond] */
__u64 stx_attributes; /* Flags information about the file [uncond] */
/* 0x10 */
__u32 stx_nlink; /* Number of hard links */
__u32 stx_uid; /* User ID of owner */
__u32 stx_gid; /* Group ID of owner */
__u16 stx_mode; /* File mode */
__u16 __spare0[1];
/* 0x20 */
__u64 stx_ino; /* Inode number */
__u64 stx_size; /* File size */
__u64 stx_blocks; /* Number of 512-byte blocks allocated */
__u64 stx_attributes_mask; /* Mask for what's supported in
* stx_attributes
*/
/* 0x40 */
struct statx_timestamp stx_atime; /* Last access time */
struct statx_timestamp stx_btime; /* File creation time */
struct statx_timestamp stx_ctime; /* Last attribute change time */
struct statx_timestamp stx_mtime; /* Last data modification time */
/* 0x80 */
__u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
__u32 stx_rdev_minor;
__u32 stx_dev_major; /* ID of device containing file [uncond] */
__u32 stx_dev_minor;
/* 0x90 */
__u64 __spare2[14]; /* Spare space for future expansion */
/* 0x100 */
};
/*
* Flags to be stx_mask
*
* Query request/result mask for statx() and struct statx::stx_mask.
*
* These bits should be set in the mask argument of statx() to request
* particular items when calling statx().
*/
#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
#define STATX_UID 0x00000008U /* Want/got stx_uid */
#define STATX_GID 0x00000010U /* Want/got stx_gid */
#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
#define STATX_INO 0x00000100U /* Want/got stx_ino */
#define STATX_SIZE 0x00000200U /* Want/got stx_size */
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_ALL 0x00000fffU /* All currently supported flags */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
/*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
*
* These give information about the features or the state of a file that might
* be of use to ordinary userspace programs such as GUIs or ls rather than
* specialised tools.
*
* Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
* semantically. Where possible, the numerical value is picked to correspond
* also.
*/
#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
/* Update attrs_array in lustreapi.h if new attributes are added. */
#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
#endif /* STATX_BASIC_STATS */
typedef struct statx lstatx_t;
#define LUSTRE_EOF 0xffffffffffffffffULL
/* for statfs() */
#define LL_SUPER_MAGIC 0x0BD00BD0
#define LL_IOC_GETVERSION _IOR('f', 3, long)
#define FSFILT_IOC_GETVERSION LL_IOC_GETVERSION /* backward compat */
#define LL_IOC_RESIZE_FS _IOW('f', 16, __u64)
/* FIEMAP flags supported by Lustre */
#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
enum obd_statfs_state {
OS_STATFS_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */
OS_STATFS_READONLY = 0x00000002, /**< filesystem is read-only */
OS_STATFS_NOCREATE = 0x00000004, /**< no object creation */
OS_STATFS_UNUSED1 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
OS_STATFS_UNUSED2 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
OS_STATFS_ENOSPC = 0x00000020, /**< not enough free space */
OS_STATFS_ENOINO = 0x00000040, /**< not enough inodes */
OS_STATFS_SUM = 0x00000100, /**< aggregated for all tagrets */
OS_STATFS_NONROT = 0x00000200, /**< non-rotational device */
OS_STATFS_DOWNGRADE = OS_STATFS_DEGRADED | OS_STATFS_READONLY |
OS_STATFS_NOCREATE | OS_STATFS_ENOSPC |
OS_STATFS_ENOINO,
OS_STATFS_UPGRADE = OS_STATFS_NONROT,
};
struct obd_statfs_state_name {
enum obd_statfs_state osn_state;
const char osn_name;
bool osn_err;
};
/*
* Return the obd_statfs state info that matches the first set bit in @state.
*
* This is to identify various states returned by the OST_STATFS RPC.
*
* If .osn_err = true, then this is an error state indicating the target
* is degraded, read-only, full, or should otherwise not be used.
* If .osn_err = false, then this is an informational state and uses a
* lower-case name to distinguish it from error conditions.
*
* The UNUSED[12] bits were part of os_state=EROFS=30=0x1e until Lustre 1.6.
*/
static inline const
struct obd_statfs_state_name *obd_statfs_state_name_find(__u32 state)
{
static struct obd_statfs_state_name oss_names[] = {
{ .osn_state = OS_STATFS_DEGRADED, .osn_name = 'D', .osn_err = true },
{ .osn_state = OS_STATFS_READONLY, .osn_name = 'R', .osn_err = true },
{ .osn_state = OS_STATFS_NOCREATE, .osn_name = 'N', .osn_err = true },
{ .osn_state = OS_STATFS_UNUSED1, .osn_name = '?', .osn_err = true },
{ .osn_state = OS_STATFS_UNUSED2, .osn_name = '?', .osn_err = true },
{ .osn_state = OS_STATFS_ENOSPC, .osn_name = 'S', .osn_err = true },
{ .osn_state = OS_STATFS_ENOINO, .osn_name = 'I', .osn_err = true },
{ .osn_state = OS_STATFS_SUM, .osn_name = 'a', /* aggregate */ },
{ .osn_state = OS_STATFS_NONROT, .osn_name = 'f', /* flash */ },
{ .osn_state = 0, }
};
int i;
for (i = 0; oss_names[i].osn_state; i++) {
if (state & oss_names[i].osn_state)
return &oss_names[i];
}
return NULL;
};
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0)
#define OS_STATFS_NOPRECREATE OS_STATFS_NOCREATE
#endif
/** filesystem statistics/attributes for target device */
struct obd_statfs {
__u64 os_type; /* EXT4_SUPER_MAGIC, UBERBLOCK_MAGIC */
__u64 os_blocks; /* total size in #os_bsize blocks */
__u64 os_bfree; /* number of unused blocks */
__u64 os_bavail; /* blocks available for allocation */
__u64 os_files; /* total number of objects */
__u64 os_ffree; /* # objects that could be created */
__u8 os_fsid[40]; /* identifier for filesystem */
__u32 os_bsize; /* block size in bytes for os_blocks */
__u32 os_namelen; /* maximum length of filename in bytes*/
__u64 os_maxbytes; /* maximum object size in bytes */
__u32 os_state; /**< obd_statfs_state OS_STATFS_* */
__u32 os_fprecreated; /* objs available now to the caller */
/* used in QoS code to find preferred
* OSTs
*/
__u32 os_granted; /* space granted for MDS */
__u32 os_spare3; /* Unused padding fields. Remember */
__u32 os_spare4; /* to fix lustre_swab_obd_statfs() */
__u32 os_spare5;
__u32 os_spare6;
__u32 os_spare7;
__u32 os_spare8;
__u32 os_spare9;
};
/**
* File IDentifier.
*
* FID is a cluster-wide unique identifier of a file or an object (stripe).
* FIDs are never reused.
**/
struct lu_fid {
/**
* FID sequence. Sequence is a unit of migration: all files (objects)
* with FIDs from a given sequence are stored on the same server.
* Lustre should support 2^64 objects, so even if each sequence
* has only a single object we can still enumerate 2^64 objects.
**/
__u64 f_seq;
/* FID number within sequence. */
__u32 f_oid;
/**
* FID version, used to distinguish different versions (in the sense
* of snapshots, etc.) of the same file system object. Not currently
* used.
**/
__u32 f_ver;
} __attribute__((packed));
static inline bool fid_is_zero(const struct lu_fid *fid)
{
return fid->f_seq == 0 && fid->f_oid == 0;
}
/* The data name_to_handle_at() places in a struct file_handle (at f_handle) */
struct lustre_file_handle {
struct lu_fid lfh_child;
struct lu_fid lfh_parent;
};
/* Currently, the filter_fid::ff_parent::f_ver is not the real parent
* MDT-object's FID::f_ver, instead it is the OST-object index in its
* parent MDT-object's layout EA.
*/
#define f_stripe_idx f_ver
struct ost_layout {
__u32 ol_stripe_size;
__u32 ol_stripe_count;
__u64 ol_comp_start;
__u64 ol_comp_end;
__u32 ol_comp_id;
} __attribute__((packed));
/* The filter_fid structure has changed several times over its lifetime.
* For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
* stripe_index and the "self FID" (objid/seq) to be able to recover the
* OST objects in case of corruption. With the move to 2.4 and OSD-API for
* the OST, the "trusted.lma" xattr was added to the OST objects to store
* the "self FID" to be consistent with the MDT on-disk format, and the
* filter_fid only stored the MDT inode parent FID and stripe index.
*
* In 2.10, the addition of PFL composite layouts required more information
* to be stored into the filter_fid in order to be able to identify which
* component the OST object belonged. As well, the stripe size may vary
* between components, so it was no longer safe to assume the stripe size
* or stripe_count of a file. This is also more robust for plain layouts.
*
* For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
* enough space to store both the filter_fid and LMA in the inode, so they
* are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
* an extra seek for every OST object access.
*
* In 2.11, FLR mirror layouts also need to store the layout version and
* range so that writes to old versions of the layout are not allowed.
* That ensures that mirrored objects are not modified by evicted clients,
* and ensures that the components are correctly marked stale on the MDT.
*/
struct filter_fid_18_23 {
struct lu_fid ff_parent; /* stripe_idx in f_ver */
__u64 ff_objid;
__u64 ff_seq;
};
struct filter_fid_24_29 {
struct lu_fid ff_parent; /* stripe_idx in f_ver */
};
struct filter_fid_210 {
struct lu_fid ff_parent; /* stripe_idx in f_ver */
struct ost_layout ff_layout;
};
struct filter_fid {
struct lu_fid ff_parent; /* stripe_idx in f_ver */
struct ost_layout ff_layout;
__u32 ff_layout_version;
__u32 ff_range; /* range of layout version that
* write are allowed
*/
} __attribute__((packed));
/* Userspace should treat lu_fid as opaque, and only use the following methods
* to print or parse them. Other functions (e.g. compare, swab) could be moved
* here from lustre_idl.h if needed.
*/
struct lu_fid;
enum lma_compat {
LMAC_HSM = 0x00000001,
/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
* under /O/<seq>/d<x>.
*/
LMAC_STRIPE_INFO = 0x00000010, /* stripe info in the LMA EA. */
LMAC_COMP_INFO = 0x00000020, /* Component info in the LMA EA. */
LMAC_IDX_BACKUP = 0x00000040, /* Has index backup. */
};
/**
* Masks for all features that should be supported by a Lustre version to
* access a specific file.
* This information is stored in lustre_mdt_attrs::lma_incompat.
*/
enum lma_incompat {
LMAI_RELEASED = 0x00000001, /* file is released */
LMAI_AGENT = 0x00000002, /* agent inode */
LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
* is on the remote T
*/
LMAI_STRIPED = 0x00000008, /* striped directory inode */
LMAI_ORPHAN = 0x00000010, /* inode is orphan */
LMAI_ENCRYPT = 0x00000020, /* inode is encrypted */
LMA_INCOMPAT_SUPP = (LMAI_AGENT | LMAI_REMOTE_PARENT |
LMAI_STRIPED | LMAI_ORPHAN | LMAI_ENCRYPT)
};
/**
* Following struct for object attributes, that will be kept inode's EA.
* Introduced in 2.0 release (please see b15993, for details)
* Added to all objects since Lustre 2.4 as contains self FID
*/
struct lustre_mdt_attrs {
/**
* Bitfield for supported data in this structure. From enum lma_compat.
* lma_self_fid and lma_flags are always available.
*/
__u32 lma_compat;
/**
* Per-file incompat feature list. Lustre version should support all
* flags set in this field. The supported feature mask is available in
* LMA_INCOMPAT_SUPP.
*/
__u32 lma_incompat;
/** FID of this inode */
struct lu_fid lma_self_fid;
};
#define PFID_STRIPE_IDX_BITS 16
#define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
struct lustre_ost_attrs {
/* Use lustre_mdt_attrs directly for now, need a common header
* structure if want to change lustre_mdt_attrs in future.
*/
struct lustre_mdt_attrs loa_lma;
/* Below five elements are for OST-object's PFID EA, the
* lma_parent_fid::f_ver is composed of the stripe_count (high 16 bits)
* and the stripe_index (low 16 bits), the size should not exceed
* 5 * sizeof(__u64)) to be accessable by old Lustre. If the flag
* LMAC_STRIPE_INFO is set, then loa_parent_fid and loa_stripe_size
* are valid; if the flag LMAC_COMP_INFO is set, then the next three
* loa_comp_* elements are valid.
*/
struct lu_fid loa_parent_fid;
__u32 loa_stripe_size;
__u32 loa_comp_id;
__u64 loa_comp_start;
__u64 loa_comp_end;
};
/**
* Prior to 2.4, the LMA structure also included SOM attributes which has since
* been moved to a dedicated xattr
* lma_flags was also removed because of lma_compat/incompat fields.
*/
#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
enum lustre_som_flags {
/* Unknow or no SoM data, must get size from OSTs. */
SOM_FL_UNKNOWN = 0x0000,
/* Known strictly correct, FLR or DoM file (SoM guaranteed). */
SOM_FL_STRICT = 0x0001,
/* Known stale - was right at some point in the past, but it is
* known (or likely) to be incorrect now (e.g. opened for write).
*/
SOM_FL_STALE = 0x0002,
/* Approximate, may never have been strictly correct,
* need to sync SOM data to achieve eventual consistency.
*/
SOM_FL_LAZY = 0x0004,
};
struct lustre_som_attrs {
__u16 lsa_valid;
__u16 lsa_reserved[3];
__u64 lsa_size;
__u64 lsa_blocks;
};
/**
* OST object IDentifier.
*/
struct ost_id {
union {
struct {
__u64 oi_id;
__u64 oi_seq;
} oi;
struct lu_fid oi_fid;
};
} __attribute__((packed));
#define DOSTID "%#llx:%llu"
#define POSTID(oi) ((unsigned long long)ostid_seq(oi)), \
((unsigned long long)ostid_id(oi))
struct ll_futimes_3 {
__u64 lfu_atime_sec;
__u64 lfu_atime_nsec;
__u64 lfu_mtime_sec;
__u64 lfu_mtime_nsec;
__u64 lfu_ctime_sec;
__u64 lfu_ctime_nsec;
};
/*
* Default number of mirrors for layout creation.
* Maximum number of mirrors currently supported. This limit is
* somewhat arbitrary and is not imposed by any on-disk format
* restriction. In practice, the number of layout components in a
* single file is limited to about 512 single-stripe components
* due to the current maximum trusted.lov xattr size of 65536 bytes
* (VFS XATTR_SIZE_MAX), so 256 mirrors is a realistic upper
* limit today if the stripe count or component count is more
* than 1.
*/
#define LUSTRE_MIRROR_COUNT_DEF 16
#define LUSTRE_MIRROR_COUNT_MAX 256
/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
enum ll_lease_mode {
LL_LEASE_RDLCK = 0x01,
LL_LEASE_WRLCK = 0x02,
LL_LEASE_UNLCK = 0x04,
};
enum ll_lease_flags {
LL_LEASE_RESYNC = 0x1,
LL_LEASE_RESYNC_DONE = 0x2,
LL_LEASE_LAYOUT_MERGE = 0x4,
LL_LEASE_LAYOUT_SPLIT = 0x8,
LL_LEASE_PCC_ATTACH = 0x10,
};
#define IOC_IDS_MAX 4096
struct ll_ioc_lease {
__u32 lil_mode;
__u32 lil_flags;
__u32 lil_count;
__u32 lil_ids[];
};
struct ll_ioc_lease_id {
__u32 lil_mode;
__u32 lil_flags;
__u32 lil_count;
__u16 lil_mirror_id;
__u16 lil_padding1;
__u64 lil_padding2;
__u32 lil_ids[];
};
/*
* The ioctl naming rules:
* LL_* - works on the currently opened filehandle instead of parent dir
* *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly)
* *_MDC_* - gets/sets data related to MDC
* *_LOV_* - gets/sets data related to OSC/LOV
* *FILE* - called on parent dir and passes in a filename
* *STRIPE* - set/get lov_user_md
* *INFO - set/get lov_user_mds_data
*/
/* lustre_ioctl.h 101-150 */
/* ioctl codes 128-143 are reserved for fsverity */
#define LL_IOC_GETFLAGS _IOR('f', 151, long)
#define LL_IOC_SETFLAGS _IOW('f', 152, long)
#define LL_IOC_CLRFLAGS _IOW('f', 153, long)
#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long)
#define LL_IOC_LOV_SETSTRIPE_NEW _IOWR('f', 154, struct lov_user_md)
#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long)
#define LL_IOC_LOV_GETSTRIPE_NEW _IOR('f', 155, struct lov_user_md)
#define LL_IOC_LOV_SETEA _IOW('f', 156, long)
#define LL_IOC_GROUP_LOCK _IOW('f', 158, long)
#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long)
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long)
#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
#define LL_IOC_PATH2FID _IOR('f', 173, long)
#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
#define LL_IOC_GET_MDTIDX _IOR('f', 175, int)
#define LL_IOC_FUTIMES_3 _IOWR('f', 176, struct ll_futimes_3)
#define LL_IOC_FLR_SET_MIRROR _IOW('f', 177, long)
/* lustre_ioctl.h 177-210 */
#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
#define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm)
#define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *)
#define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *)
#define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request)
#define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request)
#define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version)
#define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \
struct lustre_swap_layouts)
#define LL_IOC_HSM_ACTION _IOR('f', 220, \
struct hsm_current_action)
/* lustre_ioctl.h 221-233 */
#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
#define LL_IOC_RMFID _IOR('f', 242, struct fid_array)
#define LL_IOC_UNLOCK_FOREIGN _IO('f', 242)
#define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease)
#define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long)
#define LL_IOC_GET_LEASE _IO('f', 244)
#define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import)
#define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
#define LL_IOC_MIGRATE _IOR('f', 247, int)
#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
#define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise)
#define LL_IOC_LADVISE2 _IOW('f', 250, struct llapi_lu_ladvise2)
#define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat)
#define LL_IOC_HEAT_SET _IOW('f', 251, __u64)
#define LL_IOC_PCC_ATTACH _IOW('f', 252, struct lu_pcc_attach)
#define LL_IOC_PCC_DETACH _IOWR('f', 252, struct lu_pcc_detach)
#define LL_IOC_PCC_DETACH_BY_FID _IOWR('f', 252, \
struct lu_pcc_detach_fid)
#define LL_IOC_PCC_STATE _IOR('f', 252, struct lu_pcc_state)
#define LL_IOC_PROJECT _IOW('f', 253, struct lu_project)
#define LL_IOC_HSM_DATA_VERSION _IOW('f', 254, struct ioc_data_version)
#ifndef FS_IOC_FSGETXATTR
/*
* Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
*/
struct fsxattr {
__u32 fsx_xflags; /* xflags field value (get/set) */
__u32 fsx_extsize; /* extsize field value (get/set)*/
__u32 fsx_nextents; /* nextents field value (get) */
__u32 fsx_projid; /* project identifier (get/set) */
unsigned char fsx_pad[12];
};
#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
#endif
#ifndef FS_XFLAG_PROJINHERIT
#define FS_XFLAG_PROJINHERIT 0x00000200
#endif
#define MDT_INVALID_UID U32_MAX
#define MDT_INVALID_GID U32_MAX
#define MDT_INVALID_PROJID U32_MAX
#define LL_STATFS_LMV 1
#define LL_STATFS_LOV 2
#define LL_STATFS_NODELAY 4
#define IOC_MDC_TYPE 'i'
#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
#define IOC_MDC_GETFILEINFO_V1 _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data_v1 *)
#define IOC_MDC_GETFILEINFO_V2 _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data)
#define LL_IOC_MDC_GETINFO_V1 _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data_v1 *)
#define LL_IOC_MDC_GETINFO_V2 _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data)
#define IOC_MDC_GETFILEINFO IOC_MDC_GETFILEINFO_V1
#define LL_IOC_MDC_GETINFO LL_IOC_MDC_GETINFO_V1
#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
* files, but are unlikely to be used in practice and are not harmful if
* used incorrectly. O_NOCTTY and FASYNC are only meaningful for character
* devices and are safe for use on new files. See LU-4209.
*/
/* To be compatible with old statically linked binary we keep the check for
* the older 0100000000 flag. This is already removed upstream. LU-812.
*/
#ifndef FASYNC
#define FASYNC 00020000 /* fcntl, for BSD compatibility */
#endif
/* This is Lustre-specific flag that defines O_LOV_DELAY_CREATE. There is no
* clash anywhere with these value and can be used safely
*/
#define O_LOV_DELAY_CREATE (O_NOCTTY | FASYNC)
/* O_CIPHERTEXT principle is similar to O_LOV_DELAY_CREATE above,
* for access to encrypted files without the encryption key.
*/
#define O_CIPHERTEXT (O_NOCTTY | O_NDELAY | O_DSYNC)
enum ll_file_flags {
LL_FILE_IGNORE_LOCK = 0x00000001,
LL_FILE_GROUP_LOCKED = 0x00000002,
LL_FILE_READAHEA = 0x00000004,
LL_FILE_LOCKED_DIRECTIO = 0x00000008, /* client-side locks with dio */
LL_FILE_FLOCK_WARNING = 0x00000020, /* warned about disabled flock */
};
#define LOV_USER_MAGIC_V1 0x0BD10BD0
#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
#define LOV_USER_MAGIC_V3 0x0BD30BD0
/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
#define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0
#define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0
#define LOV_USER_MAGIC_SEL 0x0BD80BD0
#define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */
#define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/
#define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0
enum lov_pattern {
LOV_PATTERN_NONE = 0x000,
LOV_PATTERN_RAID0 = 0x001,
LOV_PATTERN_RAID1 = 0x002,
LOV_PATTERN_PARITY = 0x004,
LOV_PATTERN_MDT = 0x100,
LOV_PATTERN_OVERSTRIPING = 0x200,
LOV_PATTERN_FOREIGN = 0x400,
LOV_PATTERN_COMPRESS = 0x800,
/* combine exclusive patterns as a bad pattern */
LOV_PATTERN_BAD = (LOV_PATTERN_RAID1 | LOV_PATTERN_MDT |
LOV_PATTERN_FOREIGN),
LOV_PATTERN_F_MASK = 0xffff0000,
LOV_PATTERN_F_HOLE = 0x40000000, /* hole in LOV EA objects */
LOV_PATTERN_F_RELEASED = 0x80000000, /* HSM released file */
LOV_PATTERN_DEFAULT = 0xffffffff
};
#define LOV_OFFSET_DEFAULT ((__u16)-1)
#define LMV_OFFSET_DEFAULT ((__u32)-1)
/* current client IO only understand these patterns */
static inline bool lov_pattern_supported(enum lov_pattern pattern)
{
enum lov_pattern pattern_base = pattern & ~(LOV_PATTERN_F_RELEASED |
LOV_PATTERN_F_MASK);
return pattern_base == LOV_PATTERN_RAID0 ||
pattern_base == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
pattern_base == (LOV_PATTERN_RAID0 | LOV_PATTERN_PARITY) ||
pattern_base == LOV_PATTERN_MDT;
}
/* but we can set and server allows for these patterns */
static inline bool lov_pattern_available(enum lov_pattern pattern)
{
enum lov_pattern pattern_base = pattern & ~(LOV_PATTERN_F_RELEASED |
LOV_PATTERN_F_MASK);
return pattern_base == LOV_PATTERN_RAID0 ||
pattern_base == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
pattern_base == (LOV_PATTERN_RAID0 | LOV_PATTERN_PARITY) ||
pattern_base == LOV_PATTERN_MDT;
}
/* RELEASED and MDT patterns are not valid in many places, so rather than
* having many extra checks on lov_pattern_supported, we have this separate
* check for non-released, non-readonly, non-DOM components
*/
static inline bool lov_pattern_supported_normal_comp(enum lov_pattern pattern)
{
return pattern == LOV_PATTERN_RAID0 ||
pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING);
}
#define LOV_MAXPOOLNAME 15
#define LOV_POOLNAMEF "%.15s"
/* The poolname "ignore" is used to force a component creation without pool */
#define LOV_POOL_IGNORE "ignore"
/* The poolname "inherit" is used to force a component to inherit the pool from
* parent or root directory
*/
#define LOV_POOL_INHERIT "inherit"
/* The poolname "none" is deprecated in 2.15 (same behavior as "inherit") */
#define LOV_POOL_NONE "none"
static inline bool lov_pool_is_ignored(const char *pool)
{
return pool && strncmp(pool, LOV_POOL_IGNORE, LOV_MAXPOOLNAME) == 0;
}
static inline bool lov_pool_is_inherited(const char *pool)
{
return pool && (strncmp(pool, LOV_POOL_INHERIT, LOV_MAXPOOLNAME) == 0 ||
strncmp(pool, LOV_POOL_NONE, LOV_MAXPOOLNAME) == 0);
}
static inline bool lov_pool_is_reserved(const char *pool)
{
return lov_pool_is_ignored(pool) || lov_pool_is_inherited(pool);
}
#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
#define LOV_MAX_STRIPE_COUNT_OLD 160
/* This calculation is crafted so that input of 4096 will result in 160
* which in turn is equal to old maximal stripe count.
* XXX: In fact this is too simpified for now, what it also need is to get
* ea_type argument to clearly know how much space each stripe consumes.
*
* The limit of 12 pages is somewhat arbitrary, but is a reasonably large
* allocation that is sufficient for the current generation of systems.
*
* (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1)
*/
#define LOV_MAX_STRIPE_COUNT 2000 /* ~((12 * 4096 - 256) / 24) */
/* max and min values are used to check range of overstripe count */
#define LOV_ALL_STRIPES 0xffff /* only valid for directories */
#define LOV_ALL_STRIPES_WIDE 0xffe0 /* LLAPI_OVERSTRIPE_COUNT_MAX */
#define LOV_V1_INSANE_STRIPE_INDEX (LOV_ALL_STRIPES_WIDE - 1) /* max index */
#define LOV_V1_INSANE_STRIPE_COUNT LOV_V1_INSANE_STRIPE_INDEX /* deprecated */
/* EC (Erasure Coding) stripe count limits */
#define LOV_EC_MAX_DATA_STRIPES 255 /* max data stripes for EC */
#define LOV_EC_MAX_CODING_STRIPES 15 /* max coding/parity stripes for EC */
#define XATTR_LUSTRE_PREFIX "lustre."
#define XATTR_LUSTRE_PIN XATTR_LUSTRE_PREFIX"pin"
#define XATTR_LUSTRE_LOV XATTR_LUSTRE_PREFIX"lov"
/* Please update if XATTR_LUSTRE_LOV".set" groks more flags in the future */
#define allowed_lustre_lov(att) (strcmp((att), XATTR_LUSTRE_LOV".add") == 0 || \
strcmp((att), XATTR_LUSTRE_LOV".set") == 0 || \
strcmp((att), XATTR_LUSTRE_LOV".set.flags") == 0 || \
strcmp((att), XATTR_LUSTRE_LOV".del") == 0)
#define lov_user_ost_data lov_user_ost_data_v1
struct lov_user_ost_data_v1 { /* per-stripe data structure */
struct ost_id l_ost_oi; /* OST object ID */
union {
__u32 l_ost_type; /* type of data stored in OST object */
__u32 l_ost_gen; /* generation of this OST index */
};
__u32 l_ost_idx; /* OST index in LOV */
} __attribute__((packed));
#define lov_user_md lov_user_md_v1
struct lov_user_md_v1 { /* LOV EA user data (host-endian) */
__u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */
__u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
struct ost_id lmm_oi; /* MDT parent inode id/seq (id/0 for 1.x) */
__u32 lmm_stripe_size; /* size of stripe in bytes */
__u16 lmm_stripe_count; /* num stripes in use for this object */
union {
__u16 lmm_stripe_offset; /* starting stripe offset in
* lmm_objects, use when writing
*/
__u16 lmm_layout_gen; /* layout generation number
* used when reading
*/
};
struct lov_user_ost_data_v1 lmm_objects[]; /* per-stripe data */
} __attribute__((packed, __may_alias__));
struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
__u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */
__u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
struct ost_id lmm_oi; /* MDT parent inode id/seq (id/0 for 1.x) */
__u32 lmm_stripe_size; /* size of stripe in bytes */
__u16 lmm_stripe_count; /* num stripes in use for this object */
union {
__u16 lmm_stripe_offset; /* starting stripe offset in
* lmm_objects, use when writing
*/
__u16 lmm_layout_gen; /* layout generation number
* used when reading
*/
};
char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */
struct lov_user_ost_data_v1 lmm_objects[]; /* per-stripe data */
} __attribute__((packed, __may_alias__));
struct lov_foreign_md {
__u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */
__u32 lfm_length; /* length of lfm_value */
__u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */
__u32 lfm_flags; /* flags, type specific */
char lfm_value[];
} __attribute__((packed));
#define lov_foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \
offsetof(struct lov_foreign_md, lfm_value))
#define lov_foreign_size_le(lfm) \
(le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \
offsetof(struct lov_foreign_md, lfm_value))
/**
* The stripe size fields are shared for the extension size storage, however
* the extension size is stored in KB, not bytes.
*/
#define SEL_UNIT_SIZE 1024llu
struct lu_extent {
__u64 e_start;
__u64 e_end;
} __attribute__((packed));
#define DEXT "[%#llx, %#llx)"
#define PEXT(ext) (unsigned long long)(ext)->e_start, (unsigned long long)(ext)->e_end
static inline bool lu_extent_is_overlapped(struct lu_extent *e1,
struct lu_extent *e2)
{
return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
}
static inline bool lu_extent_is_equal(struct lu_extent *e1,
struct lu_extent *e2)
{
return e1->e_start == e2->e_start && e1->e_end == e2->e_end;
}
static inline bool lu_extent_is_whole(struct lu_extent *e)
{
return e->e_start == 0 && e->e_end == LUSTRE_EOF;
}
enum lov_comp_md_entry_flags {
LCME_FL_STALE = 0x00000001, /* FLR: stale data */
LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */
LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */
LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR,
LCME_FL_OFFLINE = 0x00000008, /* Not used */
LCME_FL_INIT = 0x00000010, /* instantiated */
LCME_FL_NOSYNC = 0x00000020, /* FLR: no sync for the mirror */
LCME_FL_EXTENSION = 0x00000040, /* extension comp, never init */
LCME_FL_PARITY = 0x00000080, /* EC: a parity code component */
LCME_FL_COMPRESS = 0x00000100, /* the component should be compressed */
LCME_FL_PARTIAL = 0x00000200, /* some chunks in the component are
* uncompressed
*/
LCME_FL_NOCOMPR = 0x00000400, /* the component should not be
* compressed
*/
LCME_FL_IS_LINK_ID = 0x40000000, /* EC: llc_protected_ref is link ID
* (transient, not stored on disk)
*/
LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag,
* won't be stored on disk
*/
};
#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \
LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
LCME_FL_EXTENSION | LCME_FL_PARITY | \
LCME_FL_IS_LINK_ID)
/* The component flags can be set by users at creation/modification time. */
#define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
LCME_FL_EXTENSION | LCME_FL_PARITY)
/* The mirror flags can be set by users at creation time. */
#define LCME_USER_MIRROR_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOCOMPR)
/* The allowed flags obtained from the client at component creation time. */
#define LCME_CL_COMP_FLAGS (LCME_USER_MIRROR_FLAGS | LCME_FL_EXTENSION | \
LCME_FL_PARITY | LCME_FL_IS_LINK_ID)
/* The mirror flags sent by client */
#define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC)
/* These flags have meaning when set in a default layout and will be inherited
* from the default/template layout set on a directory.
*/
#define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
LCME_FL_EXTENSION | LCME_FL_PARITY)
/* lcme_id can be specified as certain flags, and the first
* bit of lcme_id is used to indicate that the ID is representing
* certain LCME_FL_* but not a real ID. Which implies we can have
* at most 31 flags (see LCME_FL_XXX).
*/
enum lcme_id {
LCME_ID_INVAL = 0x0,
LCME_ID_MAX = 0x7FFFFFFF,
LCME_ID_ALL = 0xFFFFFFFF,
LCME_ID_NOT_ID = LCME_FL_NEG
};
/* layout version equals to lcme_id, except some bits have special meanings */
enum layout_version_flags {
/* layout version reaches the high water mark to be increased to
* circularly reuse the smallest value
*/
LU_LAYOUT_HIGEN = 0x40000000,
/* the highest bit is used to mark if the file is being resynced */
LU_LAYOUT_RESYNC = 0x80000000,
};
#define LCME_ID_MASK LCME_ID_MAX
#define LCME_TIMESTAMP_ID_SHIFT 48
#define LCME_TIMESTAMP_TIME_MASK ((1ULL << LCME_TIMESTAMP_ID_SHIFT) - 1)
#define LCME_TIMESTAMP_ID_MASK ((1ULL << (64 - LCME_TIMESTAMP_ID_SHIFT)) - 1)
#define lcme_timestamp_and_id_pack(time, id) \
((__u64)((((id) & LCME_TIMESTAMP_ID_MASK) << LCME_TIMESTAMP_ID_SHIFT) | \
((time) & LCME_TIMESTAMP_TIME_MASK)))
#define lcme_timestamp_time_unpack(time_id) \
((time_id) & LCME_TIMESTAMP_TIME_MASK)
#define lcme_timestamp_id_unpack(time_id) \
(((time_id) >> LCME_TIMESTAMP_ID_SHIFT) & LCME_TIMESTAMP_ID_MASK)
struct lov_comp_md_entry_v1 {
__u32 lcme_id; /* unique id of component */
__u32 lcme_flags; /* LCME_FL_XXX */
/* file extent for component. If it's an EC code component, its flags
* contains LCME_FL_PARITY, and its extent covers the same extent of
* its corresponding data component.
*/
struct lu_extent lcme_extent;
__u32 lcme_offset; /* offset of component blob,
* start from v_comp_md_v1
*/
__u32 lcme_size; /* size of component blob */
__u32 lcme_layout_gen;
union {
__u64 lcme_time_and_id;
struct {
__u64 lcme_timestamp:48;
/* mirror link id for data and parity components */
__u16 lcme_mirror_link_id;
};
};
__u8 lcme_dstripe_count; /* data stripe count,
* k value in EC
*/
__u8 lcme_cstripe_count; /* code stripe count,
* p value in EC
*/
__u8 lcme_compr_type; /* compress type */
__u8 lcme_compr_lvl:4; /* compress level */
__u8 lcme_compr_chunk_log_bits:4;
/* chunk_size = 2^(16+chunk_log_bits)
* i.e. power-of-two multiple of 64KiB
*/
} __attribute__((packed));
#define SEQ_ID_MAX 0x0000FFFF
#define SEQ_ID_MASK SEQ_ID_MAX
/* bit 30:16 of lcme_id is used to store mirror id */
#define MIRROR_ID_MASK 0x7FFF0000
#define MIRROR_ID_NEG 0x8000
#define MIRROR_ID_SHIFT 16
static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
{
return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid;
}
static inline __u16 mirror_id_of(__u32 id)
{
return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT;
}
/**
* on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1.
*/
enum lov_comp_md_flags {
/* the least 4 bits are used by FLR to record file state */
LCM_FL_NONE = 0x0,
LCM_FL_RDONLY = 0x1,
LCM_FL_WRITE_PENDING = 0x2,
LCM_FL_SYNC_PENDING = 0x3,
LCM_FL_PCC_RDONLY = 0x8,
LCM_FL_FLR_MASK = 0xB,
};
struct lov_comp_md_v1 {
__u32 lcm_magic; /* LOV_USER_MAGIC_COMP_V1 */
__u32 lcm_size; /* overall size including this struct */
__u32 lcm_layout_gen;
__u16 lcm_flags;
__u16 lcm_entry_count;
/* lcm_mirror_count stores the number of actual mirrors minus 1,
* so that non-flr files will have value 0 meaning 1 mirror.
*/
__u16 lcm_mirror_count;
/* code components count, non-EC file contains 0 ec_count */
__u8 lcm_ec_count;
__u8 lcm_padding3[1];
__u16 lcm_padding1[2];
__u64 lcm_padding2;
struct lov_comp_md_entry_v1 lcm_entries[];
} __attribute__((packed));
static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
{
if (stripes <= LOV_ALL_STRIPES && stripes >= LOV_ALL_STRIPES_WIDE)
stripes = 0;
if (lmm_magic == LOV_USER_MAGIC_V1)
return sizeof(struct lov_user_md_v1) +
stripes * sizeof(struct lov_user_ost_data_v1);
return sizeof(struct lov_user_md_v3) +
stripes * sizeof(struct lov_user_ost_data_v1);
}
static inline __u32 lov_foreign_md_size(__u32 length)
{
return length + offsetof(struct lov_foreign_md, lfm_value);
}
/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
* use this. It is unsafe to #define those values in this header as it
* is possible the application has already #included <sys/stat.h>.
*/
#define lov_user_mds_data lov_user_mds_data_v2
struct lov_user_mds_data_v1 {
lstat_t lmd_st; /* MDS stat struct */
struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
} __attribute__((packed));
struct lov_user_mds_data_v2 {
struct lu_fid lmd_fid; /* Lustre FID */
lstatx_t lmd_stx; /* MDS statx struct */
__u64 lmd_flags; /* MDS stat flags */
__u32 lmd_lmmsize; /* LOV EA size */
__u32 lmd_padding; /* unused */
struct lov_user_md_v1 lmd_lmm; /* LOV EA user data */
} __attribute__((packed));
struct lmv_user_mds_data {
struct lu_fid lum_fid;
__u32 lum_padding;
__u32 lum_mds;
} __attribute__((packed, __may_alias__));
enum lmv_hash_type {
LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
LMV_HASH_TYPE_ALL_CHARS = 1, /* simple sum of characters */
LMV_HASH_TYPE_FNV_1A_64 = 2, /* reasonable non-cryptographic hash */
LMV_HASH_TYPE_CRUSH = 3, /* double-hash to optimize migration */
LMV_HASH_TYPE_CRUSH2 = 4, /* CRUSH with small fixes, LU-15692 */
LMV_HASH_TYPE_MAX,
LMV_HASH_TYPE_DEFAULT = LMV_HASH_TYPE_FNV_1A_64
};
static __attribute__((unused)) const char *mdt_hash_name[] = {
"none",
"all_char",
"fnv_1a_64",
"crush",
"crush2",
};
/* Right now only the lower part(0-16bits) of lmv_hash_type is being used,
* and the higher part will be the flag to indicate the status of object,
* for example the object is being migrated. And the hash function
* might be interpreted differently with different flags.
*/
#define LMV_HASH_TYPE_MASK 0x0000ffff
static inline bool lmv_is_known_hash_type(__u32 type)
{
return (type & LMV_HASH_TYPE_MASK) > LMV_HASH_TYPE_UNKNOWN &&
(type & LMV_HASH_TYPE_MASK) < LMV_HASH_TYPE_MAX;
}
/* This flag indicates that overstriping (>1 stripe per MDT) is desired */
#define LMV_HASH_FLAG_OVERSTRIPED 0x01000000
/* fixed layout, such directories won't split automatically */
/* NB, update LMV_HASH_FLAG_KNOWN when adding new flag */
#define LMV_HASH_FLAG_FIXED 0x02000000
#define LMV_HASH_FLAG_MERGE 0x04000000
#define LMV_HASH_FLAG_SPLIT 0x08000000
/* The striped directory has ever lost its master LMV EA, then LFSCK
* re-generated it. This flag is used to indicate such case. It is an
* on-disk flag.
*/
#define LMV_HASH_FLAG_LOST_LMV 0x10000000
#define LMV_HASH_FLAG_BAD_TYPE 0x20000000
#define LMV_HASH_FLAG_MIGRATION 0x80000000
#define LMV_HASH_FLAG_LAYOUT_CHANGE \
(LMV_HASH_FLAG_MIGRATION | LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MERGE)
#define LMV_HASH_FLAG_KNOWN 0xbf000000
/* migration failure may leave hash type as
* LMV_HASH_TYPE_UNKNOWN|LMV_HASH_FLAG_BAD_TYPE, which should be treated as
* sane, so such directory can be accessed (resume migration or unlink).
*/
static inline bool lmv_is_sane_hash_type(__u32 type)
{
return lmv_is_known_hash_type(type) ||
type == (LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE);
}
/* both SPLIT and MIGRATION are set for directory split */
static inline bool lmv_hash_is_splitting(__u32 hash)
{
return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) ==
(LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MIGRATION);
}
/* both MERGE and MIGRATION are set for directory merge */
static inline bool lmv_hash_is_merging(__u32 hash)
{
return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) ==
(LMV_HASH_FLAG_MERGE | LMV_HASH_FLAG_MIGRATION);
}
/* only MIGRATION is set for directory migration */
static inline bool lmv_hash_is_migrating(__u32 hash)
{
return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) == LMV_HASH_FLAG_MIGRATION;
}
static inline bool lmv_hash_is_restriping(__u32 hash)
{
return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash);
}
static inline bool lmv_hash_is_layout_changing(__u32 hash)
{
return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash) ||
lmv_hash_is_migrating(hash);
}
struct lustre_foreign_type {
__u32 lft_type;
const char *lft_name;
};
/**
* LOV/LMV foreign types
**/
enum lustre_foreign_types {
LU_FOREIGN_TYPE_NONE = 0,
/* HSM copytool lhsm_posix */
LU_FOREIGN_TYPE_POSIX = 1,
/* Used for PCC-RW. PCCRW components are local to a single archive. */
LU_FOREIGN_TYPE_PCCRW = 2,
/* Used for PCC-RO. PCCRO components may be shared between archives. */
LU_FOREIGN_TYPE_PCCRO = 3,
/* Used for S3 */
LU_FOREIGN_TYPE_S3 = 4,
/* Used for DAOS */
LU_FOREIGN_TYPE_SYMLINK = 0xda05,
/* must be the max/last one */
LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
};
extern struct lustre_foreign_type lu_foreign_types[];
/**
* When specified or returned as the value for stripe count, all
* available MDTs will be used.
*/
#define LMV_OVERSTRIPE_COUNT_MIN ((__s16)0xffff) /* -1 */
#define LMV_OVERSTRIPE_COUNT_MAX ((__s16)0xfffb) /* -5 */
/* Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
* (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
*/
#define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
#define LMV_MAX_STRIPES_PER_MDT 5 /* (RS_MAX_LOCKS - 4) / 2 */
#define lmv_user_md lmv_user_md_v1
struct lmv_user_md_v1 {
__u32 lum_magic; /* must be the first field */
__u32 lum_stripe_count; /* dirstripe count */
__u32 lum_stripe_offset; /* MDT idx for default dirstripe */
__u32 lum_hash_type; /* Dir stripe policy */
__u32 lum_type; /* LMV type: default */
__u8 lum_max_inherit; /* inherit depth of default LMV */
__u8 lum_max_inherit_rr; /* inherit depth of default LMV to
* round-robin mkdir
*/
__u16 lum_padding1;
__u32 lum_padding2;
__u32 lum_padding3;
char lum_pool_name[LOV_MAXPOOLNAME + 1];
struct lmv_user_mds_data lum_objects[];
} __attribute__((packed));
static inline __u32 lmv_foreign_to_md_stripes(__u32 size)
{
if (size <= sizeof(struct lmv_user_md))
return 0;
size -= sizeof(struct lmv_user_md);
return (size + sizeof(struct lmv_user_mds_data) - 1) /
sizeof(struct lmv_user_mds_data);
}
/*
* NB, historically default layout didn't set type, but use XATTR name to differ
* from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0,
* and still use the same method.
*/
enum lmv_type {
LMV_TYPE_DEFAULT = 0x0000,
/* fetch raw default LMV set on directory inode */
LMV_TYPE_RAW = 0x0001,
};
/* lum_max_inherit will be decreased by 1 after each inheritance if it's not
* LMV_INHERIT_UNLIMITED or > LMV_INHERIT_MAX.
*/
enum {
/* for historical reason, 0 means unlimited inheritance */
LMV_INHERIT_UNLIMITED = 0,
/* unlimited lum_max_inherit by default for plain stripe (0 or 1) */
LMV_INHERIT_DEFAULT_PLAIN = LMV_INHERIT_UNLIMITED,
/* not inherit any more */
LMV_INHERIT_END = 1,
/* for overstriped dirs, the default limit is 1 level of inheritance */
LMV_INHERIT_DEFAULT_OVERSTRIPED = 2,
/* for multiple stripes, the default limit is 2 levels of inheritance*/
LMV_INHERIT_DEFAULT_STRIPED = 3,
/* max inherit depth */
LMV_INHERIT_MAX = 250,
/* [251, 254] are reserved */
/* not set, or when inherit depth goes beyond end, */
LMV_INHERIT_NONE = 255,
};
enum {
/* not set, or when inherit_rr depth goes beyond end, */
LMV_INHERIT_RR_NONE = 0,
/* disable lum_max_inherit_rr by default */
LMV_INHERIT_RR_DEFAULT = LMV_INHERIT_RR_NONE,
/* not inherit any more */
LMV_INHERIT_RR_END = 1,
/* default inherit_rr of ROOT */
LMV_INHERIT_RR_ROOT = 3,
/* max inherit depth */
LMV_INHERIT_RR_MAX = 250,
/* [251, 254] are reserved */
/* unlimited inheritance */
LMV_INHERIT_RR_UNLIMITED = 255,
};
static inline unsigned int lmv_user_md_size(unsigned int stripes,
unsigned int lmm_magic)
{
unsigned int size = sizeof(struct lmv_user_md);
if (lmm_magic == LMV_USER_MAGIC_SPECIFIC)
size += stripes * sizeof(struct lmv_user_mds_data);
return size;
}
struct ll_recreate_obj {
__u64 lrc_id;
__u32 lrc_ost_idx;
};
struct ll_fid {
__u64 id; /* holds object id */
__u32 generation; /* holds object generation */
__u32 f_type; /* holds object type or stripe idx when passing it to
* OST for saving into EA.
*/
};
#define UUID_MAX 40
struct obd_uuid {
char uuid[UUID_MAX];
};
static inline bool obd_uuid_equals(const struct obd_uuid *u1,
const struct obd_uuid *u2)
{
return strncmp(u1->uuid, u2->uuid, sizeof(u1->uuid)) == 0;
}
static inline int obd_uuid_empty(struct obd_uuid *uuid)
{
return uuid->uuid[0] == '\0';
}
static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
{
strncpy(uuid->uuid, tmp, sizeof(uuid->uuid));
uuid->uuid[sizeof(uuid->uuid) - 1] = '\0';
}
/* For printf's only, make sure uuid is terminated */
static inline const char *obd_uuid2str(const struct obd_uuid *uuid)
{
if (uuid == NULL)
return NULL;
if (strnlen(uuid->uuid, sizeof(uuid->uuid)) >= sizeof(uuid->uuid)) {
/* Obviously not safe, but for printfs, no real harm done...
* we're always null-terminated, even in a ce.
*/
static char temp[sizeof(uuid->uuid)];
strncpy(temp, uuid->uuid, sizeof(temp));
temp[sizeof(temp) - 1] = '\0';
return temp;
}
return uuid->uuid;
}
#define LUSTRE_MAXFSNAME 8
#define LUSTRE_MAXINSTANCE 16
/* Extract fsname from uuid (or target name) of a target
* e.g. (myfs-OST0007_UUID -> myfs)
* see also deuuidify.
*/
static inline void obd_uuid2fsname(char *buf, char *uuid,
unsigned int buflen)
{
char *p;
if (buflen == 0)
return;
strncpy(buf, uuid, buflen - 1);
buf[buflen - 1] = '\0';
p = strrchr(buf, '-');
if (p != NULL)
*p = '\0';
}
/* printf display format for Lustre FIDs
* usage: printf("file FID is "DFID"\n", PFID(fid));
*/
#define FID_NOBRACE_LEN 40
#define FID_LEN (FID_NOBRACE_LEN + 2)
#define DFID_NOBRACE "%#llx:0x%x:0x%x"
#define DFID "[" DFID_NOBRACE "]"
#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
/* scanf input parse format for fids in DFID_NOBRACE format
* Need to strip '[' from DFID format first or use "["SFID"]" at caller.
* usage: sscanf(fidstr, SFID, RFID(&fid));
*/
#define SFID "0x%llx:0x%x:0x%x"
#define RFID(fid) (unsigned long long *)&((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
#define PLOGID(logid) (unsigned long long)(logid)->lgl_oi.oi.oi_seq, (__u32)(logid)->lgl_oi.oi.oi_id, 0
/********* Quotas **********/
/* From linux/fs/quota/quota.c */
static inline __u64 stoqb(__kernel_size_t space)
{
return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
}
#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
#define Q_GETOINFO 0x800102 /* get obd quota info */
#define Q_GETOQUOTA 0x800103 /* get obd quotas */
#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */
#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */
#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
/* lustre-specific control commands */
#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
#define LUSTRE_Q_GETDEFAULT 0x80000d /* get default quota */
#define LUSTRE_Q_SETDEFAULT 0x80000e /* set default quota */
#define LUSTRE_Q_GETQUOTAPOOL 0x80000f /* get user pool quota */
#define LUSTRE_Q_SETQUOTAPOOL 0x800010 /* set user pool quota */
#define LUSTRE_Q_GETINFOPOOL 0x800011 /* get pool quota info */
#define LUSTRE_Q_SETINFOPOOL 0x800012 /* set pool quota info */
#define LUSTRE_Q_GETDEFAULT_POOL 0x800013 /* get default pool quota*/
#define LUSTRE_Q_SETDEFAULT_POOL 0x800014 /* set default pool quota */
#define LUSTRE_Q_DELETEQID 0x800015 /* delete quota ID */
#define LUSTRE_Q_RESETQID 0x800016 /* reset quota ID */
#define LUSTRE_Q_ITERQUOTA 0x800017 /* iterate quota information */
#define LUSTRE_Q_ITEROQUOTA 0x800018 /* iterate obd quota information */
#define LUSTRE_Q_GETALLQUOTA 0x800019 /* get all quota information */
#define LUSTRE_Q_GETQUOTALQA 0x80001a /* get LQA quota */
#define LUSTRE_Q_SETQUOTALQA 0x80001b /* set LQA quota */
#define LUSTRE_Q_GETINFOLQA 0x80001c /* get LQA quota info */
#define LUSTRE_Q_SETINFOLQA 0x80001d /* set LQA quota info */
/* In the current Lustre implementation, the grace time is either the time
* or the timestamp to be used after some quota ID exceeds the soft limt,
* 48 bits should be enough, its high 16 bits can be used as quota flags.
*/
#define LQUOTA_GRACE_BITS 48
#define LQUOTA_GRACE_MASK ((1ULL << LQUOTA_GRACE_BITS) - 1)
#define LQUOTA_GRACE_MAX LQUOTA_GRACE_MASK
#define LQUOTA_GRACE(t) (t & LQUOTA_GRACE_MASK)
#define LQUOTA_FLAG(t) (t >> LQUOTA_GRACE_BITS)
#define LQUOTA_GRACE_FLAG(t, f) ((__u64)t | (__u64)f << LQUOTA_GRACE_BITS)
/* special grace time, only notify the user when its quota is over soft limit
* but doesn't block new writes until the hard limit is reached.
*/
#define NOTIFY_GRACE "notify"
#define NOTIFY_GRACE_TIME LQUOTA_GRACE_MASK
/* different quota flags */
/* the default quota flag, the corresponding quota ID will use the default
* quota setting, the hardlimit and softlimit of its quota record in the global
* quota file will be set to 0, the low 48 bits of the grace will be set to 0
* and high 16 bits will contain this flag (see above comment).
*/
#define LQUOTA_FLAG_DEFAULT 0x0001
#define LQUOTA_FLAG_DELETED 0x0002
#define LQUOTA_FLAG_RESET 0x0004
#define LQUOTA_FLAG_REVOKE 0x0008
#define LUSTRE_Q_CMD_IS_POOL(cmd) \
(cmd == LUSTRE_Q_GETQUOTAPOOL || \
cmd == LUSTRE_Q_SETQUOTAPOOL || \
cmd == LUSTRE_Q_SETINFOPOOL || \
cmd == LUSTRE_Q_GETINFOPOOL || \
cmd == LUSTRE_Q_SETDEFAULT_POOL || \
cmd == LUSTRE_Q_GETDEFAULT_POOL)
#define LUSTRE_Q_CMD_IS_LQA(cmd) \
(cmd == LUSTRE_Q_GETQUOTALQA || \
cmd == LUSTRE_Q_SETQUOTALQA || \
cmd == LUSTRE_Q_SETINFOLQA || \
cmd == LUSTRE_Q_GETINFOLQA)
#define ALLQUOTA 255 /* set all quota */
static inline const char *qtype_name(int qtype)
{
switch (qtype) {
case USRQUOTA:
return "usr";
case GRPQUOTA:
return "grp";
case PRJQUOTA:
return "prj";
}
return "unknown";
}
#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
/* permission */
#define N_PERMS_MAX 64
struct perm_downcall_data {
__u64 pdd_nid;
__u32 pdd_perm;
__u32 pdd_padding;
};
struct identity_downcall_data {
__u32 idd_magic;
__s32 idd_err; /* negative errno */
__u32 idd_uid;
__u32 idd_gid;
__u32 idd_nperms;
__u32 idd_ngroups;
struct perm_downcall_data idd_perms[N_PERMS_MAX];
__u32 idd_groups[];
};
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
/* old interface struct is deprecated in 2.14 */
#define SEPOL_DOWNCALL_MAGIC_OLD 0x8b8bb842
struct sepol_downcall_data_old {
__u32 sdd_magic;
__s64 sdd_sepol_mtime;
__u16 sdd_sepol_len;
char sdd_sepol[];
};
#endif
#define SEPOL_DOWNCALL_MAGIC 0x8b8bb843
struct sepol_downcall_data {
__u32 sdd_magic;
__u16 sdd_sepol_len;
__u16 sdd_padding1;
__s64 sdd_sepol_mtime;
char sdd_sepol[];
};
#ifdef NEED_QUOTA_DEFS
#ifndef QIF_BLIMITS
#define QIF_BLIMITS 1
#define QIF_SPACE 2
#define QIF_ILIMITS 4
#define QIF_INODES 8
#define QIF_BTIME 16
#define QIF_ITIME 32
#define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
#define QIF_USAGE (QIF_SPACE | QIF_INODES)
#define QIF_TIMES (QIF_BTIME | QIF_ITIME)
#define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
#endif
#endif /* !__KERNEL__ */
/* these are not defined in the kernel */
#ifndef QIF_BSOFTLIMIT
#define QIF_BSOFTLIMIT 1024
#define QIF_BHARDLIMIT QIF_BLIMITS
#define QIF_ISOFTLIMIT 2048
#define QIF_IHARDLIMIT QIF_ILIMITS
#define QIF_FILESYSTEM 4096
#define QIF_ALL_DETAIL (QIF_ALL | QIF_BSOFTLIMIT | QIF_ISOFTLIMIT | \
QIF_FILESYSTEM)
#endif
/* lustre volatile file support
* file name header: ".^L^S^T^R:volatile"
*/
#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
#define LUSTRE_VOLATILE_HDR_LEN 14
enum lustre_quota_version {
LUSTRE_QUOTA_V2 = 1
};
/* XXX: same as if_dqinfo struct in kernel */
struct obd_dqinfo {
__u64 dqi_bgrace;
__u64 dqi_igrace;
__u32 dqi_flags;
__u32 dqi_valid;
};
/* XXX: same as if_dqblk struct in kernel, plus one padding */
struct obd_dqblk {
__u64 dqb_bhardlimit; /* kbytes unit */
__u64 dqb_bsoftlimit; /* kbytes unit */
__u64 dqb_curspace; /* bytes unit */
__u64 dqb_ihardlimit;
__u64 dqb_isoftlimit;
__u64 dqb_curinodes;
__u64 dqb_btime;
__u64 dqb_itime;
__u32 dqb_valid;
__u32 dqb_padding;
};
enum {
QC_GENERAL = 0,
QC_MDTIDX = 1,
QC_OSTIDX = 2,
QC_UUID = 3
};
struct if_quotactl {
__u32 qc_cmd;
__u32 qc_type;
__u32 qc_id;
__u32 qc_stat;
__u32 qc_valid;
__u32 qc_idx;
struct obd_dqinfo qc_dqinfo;
struct obd_dqblk qc_dqblk;
char obd_type[16];
struct obd_uuid obd_uuid;
char qc_poolname[];
};
#define qc_allquota_count qc_dqblk.dqb_bhardlimit
#define qc_allquota_buffer qc_dqblk.dqb_bsoftlimit
#define qc_allquota_buflen qc_dqblk.dqb_curspace
#define qc_allquota_qid_start qc_dqblk.dqb_curinodes
#define qc_allquota_qid_end qc_dqblk.dqb_btime
#define qc_allquota_mark qc_dqblk.dqb_itime
enum lqa_cmd_type {
LQA_NEW = 1, /* create an LQA by name */
LQA_ADD = 2, /* add a range to LQA */
LQA_REM = 3, /* remove a range from an LQA */
LQA_DEL = 4, /* destroy an LQA */
LQA_LIST = 5 /* list ranges per LQA */
};
struct lqa_id_range {
__u32 lir_start;
__u32 lir_end;
};
#define LQA_NAME_MAX LOV_MAXPOOLNAME /* Maximum lqa name length */
#define LQA_RANGE_SIZE (sizeof(struct lqa_id_range)) /* LQA range size: %u%u */
/* swap layout flags */
enum lustre_swap_layouts_flags {
SWAP_LAYOUTS_CHECK_DV1 = 0x00000001,
SWAP_LAYOUTS_CHECK_DV2 = 0x00000002,
SWAP_LAYOUTS_KEEP_MTIME = 0x00000004,
SWAP_LAYOUTS_KEEP_ATIME = 0x00000008,
SWAP_LAYOUTS_CLOSE = 0x00000010,
/* Sent to the MDT through mdc_swap_layouts::msl_flags to indicate that
* mdc_swap_layouts contains valid msl_dv1 and msl_dv2.
*/
SWAP_LAYOUTS_WITH_DV12 = 0x00000020,
/* Skip the UID/GID check before a swap layout for a release
* (server only)
*/
SWAP_LAYOUTS_MDS_RELEASE = 0x80000000,
};
struct lustre_swap_layouts {
__u64 sl_flags;
__u32 sl_fd;
__u32 sl_gid;
__u64 sl_dv1;
__u64 sl_dv2;
};
/** Bit-mask of valid attributes */
/* The LA_* flags are written to disk as part of the ChangeLog records
* so they are part of the on-disk and network protocol, and cannot be changed.
* Only the first 12 bits are currently saved.
*/
enum la_valid {
LA_ATIME = 1 << 0, /* 0x00001 */
LA_MTIME = 1 << 1, /* 0x00002 */
LA_CTIME = 1 << 2, /* 0x00004 */
LA_SIZE = 1 << 3, /* 0x00008 */
LA_MODE = 1 << 4, /* 0x00010 */
LA_UID = 1 << 5, /* 0x00020 */
LA_GID = 1 << 6, /* 0x00040 */
LA_BLOCKS = 1 << 7, /* 0x00080 */
LA_TYPE = 1 << 8, /* 0x00100 */
LA_FLAGS = 1 << 9, /* 0x00200 */
LA_NLINK = 1 << 10, /* 0x00400 */
LA_RDEV = 1 << 11, /* 0x00800 */
LA_BLKSIZE = 1 << 12, /* 0x01000 */
LA_KILL_SUID = 1 << 13, /* 0x02000 */
LA_KILL_SGID = 1 << 14, /* 0x04000 */
LA_PROJID = 1 << 15, /* 0x08000 */
LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */
LA_LSIZE = 1 << 17, /* 0x20000 */
LA_LBLOCKS = 1 << 18, /* 0x40000 */
LA_BTIME = 1 << 19, /* 0x80000 */
LA_DIRENT_CNT = 1 << 20, /* 0x100000 */
/**
* Attributes must be transmitted to OST objects
*/
LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION)
};
enum mds_open_flags {
MDS_FMODE_CLOSED = 00000000,
MDS_FMODE_READ = 00000001,
MDS_FMODE_WRITE = 00000002,
/* MAY_EXEC checks for permission eg inode_permission(). Different from
* MDS_FMODE_EXECUTE which is permission check via execve
*/
MDS_FMODE_EXEC = 00000004,
MDS_OPEN_CREATED = 00000010,
/* MDS_OPEN_CROSS = 00000020, obsolete in 2.12, internal use only */
/* open for execution via execve */
MDS_FMODE_EXECUTE = 00000020,
MDS_OPEN_CREAT = 00000100,
MDS_OPEN_EXCL = 00000200,
MDS_OPEN_NOCTTY = 00000400,
MDS_OPEN_TRUNC = 00001000,
MDS_OPEN_APPEND = 00002000,
MDS_OPEN_NONBLOCK = 00004000,
MDS_OPEN_SYNC = 00010000,
MDS_OPEN_FASYNC = 00020000,
MDS_OPEN_LARGEFILE = 00100000,
MDS_OPEN_DIRECTORY = 00200000,
MDS_OPEN_NOFOLLOW = 00400000,
/* MDS_FMODE_EPOCH = 01000000, obsolete in 2.8.0 */
/* MDS_FMODE_TRUNC = 02000000, obsolete in 2.8.0 */
/* MDS_FMODE_SOM = 04000000, obsolete in 2.8.0 */
MDS_OPEN_BY_FID = 040000000, /* open_by_fid for known object */
MDS_OPEN_DELAY_CREATE = 0100000000, /* delay initial object create */
MDS_OPEN_OWNEROVERRIDE = 0200000000, /* NFSD rw-reopen ro file for owner */
/* MDS_OPEN_JOIN_FILE = 0400000000, obsolete in 1.4 */
/* FMODE_NONOTIFY = 0400000000, from OPEN_FMODE() */
MDS_OPEN_LOCK = 04000000000, /* This requires open lock */
MDS_OPEN_HAS_EA = 010000000000, /* specify obj create pattern */
MDS_OPEN_HAS_OBJS = 020000000000, /* Just set EA, the obj exist */
MDS_OPEN_NORESTORE = 0100000000000ULL, /* Dont restore file at open */
/* New stripe needed (restripe or hsm restore) */
MDS_OPEN_NEWSTRIPE = 0200000000000ULL,
MDS_OPEN_VOLATILE = 0400000000000ULL, /* File is volatile = created linked */
/* Open file and grant lease delegaion, success if not being opened with conflict mode */
MDS_OPEN_LEASE = 01000000000000ULL,
MDS_OPEN_RELEASE = 02000000000000ULL, /* Open file for HSM release */
MDS_OPEN_RESYNC = 04000000000000ULL, /* FLR: file resync */
/* PCC: auto RW-PCC cache attach for newly created file */
MDS_OPEN_PCC = 010000000000000ULL,
MDS_OP_WITH_FID = 020000000000000ULL, /* operation carried out by FID */
/* open fetches default LMV, or mkdir with default LMV */
MDS_OPEN_DEFAULT_LMV = 040000000000000ULL,
/* lustre internal open flags, should not be set from user space */
MDS_OPEN_FL_INTERNAL = (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_PCC |
MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
MDS_OPEN_RELEASE | MDS_OPEN_RESYNC |
MDS_OPEN_LOCK | MDS_OP_WITH_FID |
MDS_OPEN_DEFAULT_LMV),
};
/* mkdir fetches LMV, reuse bit of MDS_OPEN_RESYNC */
#define MDS_MKDIR_LMV MDS_OPEN_RESYNC
/********* Changelogs **********/
/** Changelog record types */
enum changelog_rec_type {
CL_NONE = -1,
CL_MARK = 0,
CL_CREATE = 1, /* namespace */
CL_MKDIR = 2, /* namespace */
CL_HARDLINK = 3, /* namespace */
CL_SOFTLINK = 4, /* namespace */
CL_MKNOD = 5, /* namespace */
CL_UNLINK = 6, /* namespace */
CL_RMDIR = 7, /* namespace */
CL_RENAME = 8, /* namespace */
CL_EXT = 9, /* namespace extended record (2nd half of rename) */
CL_OPEN = 10, /* not currently used */
CL_CLOSE = 11, /* may be written to log only with mtime change */
CL_LAYOUT = 12, /* file layout/striping modified */
CL_TRUNC = 13,
CL_SETATTR = 14,
CL_SETXATTR = 15,
CL_XATTR = CL_SETXATTR, /* Deprecated name */
CL_HSM = 16, /* HSM specific events, see flags */
CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
CL_CTIME = 18,
CL_ATIME = 19,
CL_MIGRATE = 20,
CL_FLRW = 21, /* FLR: file was firstly written */
CL_RESYNC = 22, /* FLR: file was resync-ed */
CL_GETXATTR = 23,
CL_DN_OPEN = 24, /* denied open */
CL_LAST,
};
static inline const char *changelog_type2str(int type)
{
static const char *const changelog_str[] = {
"MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
"RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
"SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT",
"FLRW", "RESYNC", "GXATR", "NOPEN",
};
if (type >= 0 && type < CL_LAST)
return changelog_str[type];
return NULL;
}
/* 12 bits of per-record data can be stored in the bottom of the flags */
#define CLF_FLAGSHIFT 12
enum changelog_rec_flags {
CLF_VERSION = 0x1000,
CLF_RENAME = 0x2000,
CLF_JOBID = 0x4000,
CLF_EXTRA_FLAGS = 0x8000,
CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID |
CLF_EXTRA_FLAGS,
CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1,
CLF_VERMASK = ~CLF_FLAGMASK,
};
/* Anything under the flagmask may be per-type (if desired) */
/* Flags for unlink */
#define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */
#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
/* HSM cleaning needed */
/* Flags for rename */
#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink
* of target
*/
#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target
* has an archive in backend
*/
/* Flags for HSM */
/* 12b used (from high weight to low weight):
* 2b for flags
* 3b for event
* 7b for error code
*/
#define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */
#define CLF_HSM_ERR_H 6
#define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */
#define CLF_HSM_EVENT_H 9
#define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */
#define CLF_HSM_FLAG_H 11
#define CLF_HSM_SPARE_L 12 /* 4 spare bits */
#define CLF_HSM_SPARE_H 15
#define CLF_HSM_LAST 15
/* Remove bits higher than _h, then extract the value
* between _h and _l by shifting lower weigth to bit 0.
*/
#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
>> (CLF_HSM_LAST - _h + _l))
#define CLF_HSM_SUCCESS 0x00
#define CLF_HSM_MAXERROR 0x7E
#define CLF_HSM_ERROVERFLOW 0x7F
#define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */
/* 3 bits field => 8 values allowed */
enum hsm_event {
HE_ARCHIVE = 0,
HE_RESTORE = 1,
HE_CANCEL = 2,
HE_RELEASE = 3,
HE_REMOVE = 4,
HE_STATE = 5,
HE_SPARE1 = 6,
HE_SPARE2 = 7,
};
static inline enum hsm_event hsm_get_cl_event(__u16 flags)
{
return (enum hsm_event)CLF_GET_BITS(flags, CLF_HSM_EVENT_H,
CLF_HSM_EVENT_L);
}
static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags,
enum hsm_event he)
{
*clf_flags = (enum changelog_rec_flags)
((__u32)*clf_flags | ((__u32)he << CLF_HSM_EVENT_L));
}
static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags)
{
return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
}
static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags,
unsigned int bits)
{
*clf_flags = (enum changelog_rec_flags)
((__u32)*clf_flags | (__u32)(bits << CLF_HSM_FLAG_L));
}
static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags)
{
return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
}
static inline int hsm_set_cl_error(enum changelog_rec_flags *clf_flags,
int error)
{
/* In case a negative error is given */
error = abs(error);
if (error > CLF_HSM_MAXERROR)
error = CLF_HSM_ERROVERFLOW;
*clf_flags = (enum changelog_rec_flags)
(*clf_flags | (error << CLF_HSM_ERR_L));
return error == CLF_HSM_ERROVERFLOW ? -EOVERFLOW : 0;
}
enum changelog_rec_extra_flags {
CLFE_INVALID = 0,
CLFE_UIDGID = 0x0001,
CLFE_NID = 0x0002,
CLFE_OPEN = 0x0004,
CLFE_XATTR = 0x0008,
/* NID is in network-byte-order and may be large. */
CLFE_NID_BE = 0x0010,
CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR |
CLFE_NID_BE,
};
enum changelog_send_flag {
/* Use changelog follow mode: llapi_changelog_recv() will not stop at
* the end of records and wait for new records to be generated.
*/
CHANGELOG_FLAG_FOLLOW = 0x01,
/* Deprecated since Lustre 2.10 */
CHANGELOG_FLAG_BLOCK = 0x02,
/* Pack jobid into the changelog records if available. */
CHANGELOG_FLAG_JOBID = 0x04,
/* Pack additional flag bits into the changelog record */
CHANGELOG_FLAG_EXTRA_FLAGS = 0x08,
/* Request NIDs to be packed in large big-endian format */
CHANGELOG_FLAG_NID_BE = 0x10,
};
#define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \
changelog_rec_offset(CLF_SUPPORTED, \
CLFE_SUPPORTED), 8)
/* 31 usable bytes string + null terminator. */
#define LUSTRE_JOBID_SIZE 32
/* This is the minimal changelog record. It can contain extensions
* such as rename fields or process jobid. Its exact content is described
* by the cr_flags and cr_extra_flags.
*
* Extensions are packed in the same order as their corresponding flags,
* then in the same order as their corresponding extra flags.
*/
struct changelog_rec {
__u16 cr_namelen;
__u16 cr_flags; /**< \a changelog_rec_flags */
__u32 cr_type; /**< \a changelog_rec_type */
__u64 cr_index; /**< changelog record number */
__u64 cr_prev; /**< last index for this target fid */
__u64 cr_time;
union {
struct lu_fid cr_tfid; /**< target fid */
__u32 cr_markerflags; /**< CL_MARK flags */
};
struct lu_fid cr_pfid; /**< parent fid */
} __attribute__ ((packed));
/* Changelog extension for RENAME. */
struct changelog_ext_rename {
struct lu_fid cr_sfid; /**< source fid, or zero */
struct lu_fid cr_spfid; /**< source parent fid, or zero */
};
/* Changelog extension to include JOBID. */
struct changelog_ext_jobid {
char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */
};
/* Changelog extension to include additional flags. */
struct changelog_ext_extra_flags {
__u64 cr_extra_flags; /* Additional CLFE_* flags */
};
/* Changelog extra extension to include UID/GID. */
struct changelog_ext_uidgid {
__u64 cr_uid;
__u64 cr_gid;
};
/* Changelog extra extension to include NID. */
struct changelog_ext_nid {
/* If CLFE_NID_BE is not set cr_nid is of the lnet_nid_t type.
* With CLFE_NID_BE set then all this data is struct lnet_nid
*/
__u64 cr_nid;
__u64 extra;
__u32 padding;
};
/* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */
struct changelog_ext_openmode {
__u32 cr_openflags; /* enum mds_open_flags */
};
/* Changelog extra extension to include xattr */
struct changelog_ext_xattr {
char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */
};
/* Changelog filter for kernel-side filtering */
struct changelog_filter {
__u64 cf_mask;
__u32 cf_user_id;
__u32 cf_padding;
char cf_username[30]; /* CHANGELOG_USER_NAMELEN_FULL */
};
static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags(
const struct changelog_rec *rec);
static
inline __kernel_size_t changelog_rec_offset(enum changelog_rec_flags crf,
enum changelog_rec_extra_flags cref)
{
__kernel_size_t size = sizeof(struct changelog_rec);
if (crf & CLF_RENAME)
size += sizeof(struct changelog_ext_rename);
if (crf & CLF_JOBID)
size += sizeof(struct changelog_ext_jobid);
if (crf & CLF_EXTRA_FLAGS) {
size += sizeof(struct changelog_ext_extra_flags);
if (cref & CLFE_UIDGID)
size += sizeof(struct changelog_ext_uidgid);
if (cref & CLFE_NID)
size += sizeof(struct changelog_ext_nid);
if (cref & CLFE_OPEN)
size += sizeof(struct changelog_ext_openmode);
if (cref & CLFE_XATTR)
size += sizeof(struct changelog_ext_xattr);
}
return size;
}
static
inline __kernel_size_t changelog_rec_size(const struct changelog_rec *rec)
{
enum changelog_rec_extra_flags cref = CLFE_INVALID;
if (rec->cr_flags & CLF_EXTRA_FLAGS)
cref = (enum changelog_rec_extra_flags)
changelog_rec_extra_flags(rec)->cr_extra_flags;
return changelog_rec_offset(
(enum changelog_rec_flags)rec->cr_flags, cref);
}
static
inline __kernel_size_t changelog_rec_varsize(const struct changelog_rec *rec)
{
return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
}
static inline
struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags & CLF_VERSION);
return (struct changelog_ext_rename *)((char *)rec +
changelog_rec_offset(crf,
CLFE_INVALID));
}
/* The jobid follows the rename extension, if present */
static inline
struct changelog_ext_jobid *changelog_rec_jobid(const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags & (CLF_VERSION | CLF_RENAME));
return (struct changelog_ext_jobid *)((char *)rec +
changelog_rec_offset(crf,
CLFE_INVALID));
}
/* The additional flags follow the rename and jobid extensions, if present */
static inline
struct changelog_ext_extra_flags *changelog_rec_extra_flags(
const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags & (CLF_VERSION | CLF_RENAME | CLF_JOBID));
return (struct changelog_ext_extra_flags *)((char *)rec +
changelog_rec_offset(crf,
CLFE_INVALID));
}
/* The uid/gid is the first extra extension */
static inline
struct changelog_ext_uidgid *changelog_rec_uidgid(
const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags &
(CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
return (struct changelog_ext_uidgid *)((char *)rec +
changelog_rec_offset(crf,
CLFE_INVALID));
}
/* The nid is the second extra extension */
static inline
struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags &
(CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
enum changelog_rec_extra_flags cref = CLFE_INVALID;
if (rec->cr_flags & CLF_EXTRA_FLAGS)
cref = (enum changelog_rec_extra_flags)
(changelog_rec_extra_flags(rec)->cr_extra_flags &
CLFE_UIDGID);
return (struct changelog_ext_nid *)((char *)rec +
changelog_rec_offset(crf, cref));
}
/* The OPEN mode is the third extra extension */
static inline
struct changelog_ext_openmode *changelog_rec_openmode(
const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags &
(CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
enum changelog_rec_extra_flags cref = CLFE_INVALID;
if (rec->cr_flags & CLF_EXTRA_FLAGS) {
cref = (enum changelog_rec_extra_flags)
(changelog_rec_extra_flags(rec)->cr_extra_flags &
(CLFE_UIDGID | CLFE_NID));
}
return (struct changelog_ext_openmode *)((char *)rec +
changelog_rec_offset(crf, cref));
}
/* The xattr name is the fourth extra extension */
static inline
struct changelog_ext_xattr *changelog_rec_xattr(
const struct changelog_rec *rec)
{
enum changelog_rec_flags crf = (enum changelog_rec_flags)
(rec->cr_flags &
(CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
enum changelog_rec_extra_flags cref = CLFE_INVALID;
if (rec->cr_flags & CLF_EXTRA_FLAGS)
cref = (enum changelog_rec_extra_flags)
(changelog_rec_extra_flags(rec)->cr_extra_flags &
(CLFE_UIDGID | CLFE_NID | CLFE_OPEN));
return (struct changelog_ext_xattr *)((char *)rec +
changelog_rec_offset(crf, cref));
}
/* The name follows the rename, jobid and extra flags extns, if present */
static inline char *changelog_rec_name(const struct changelog_rec *rec)
{
enum changelog_rec_extra_flags cref = CLFE_INVALID;
if (rec->cr_flags & CLF_EXTRA_FLAGS)
cref = (enum changelog_rec_extra_flags)
changelog_rec_extra_flags(rec)->cr_extra_flags;
return (char *)rec + changelog_rec_offset(
(enum changelog_rec_flags)(rec->cr_flags & CLF_SUPPORTED),
(enum changelog_rec_extra_flags)(cref & CLFE_SUPPORTED));
}
static inline char *changelog_rec_sname(const struct changelog_rec *rec)
{
char *str = changelog_rec_name(rec);
char *end = str + NAME_MAX; /* NB: NAME_MAX use in CR_MAXSIZE */
while (*str != '\0' && str <= end)
str++;
return str + 1;
}
static
inline __kernel_size_t changelog_rec_snamelen(const struct changelog_rec *rec)
{
size_t snamelen;
/* always positive but < cr_namelen,(see changelog_rec_sname() code */
snamelen = (size_t)(changelog_rec_sname(rec) - changelog_rec_name(rec));
return rec->cr_namelen - snamelen;
}
enum changelog_message_type {
CL_RECORD = 10, /* message is a changelog_rec */
CL_EOF = 11, /* at end of current changelog */
};
/********* Misc **********/
struct ioc_data_version {
__u64 idv_version;
__u32 idv_layout_version; /* FLR: layout version for OST objects */
__u32 idv_flags; /* enum ioc_data_version_flags */
};
enum ioc_data_version_flags {
LL_DV_RD_FLUSH = (1 << 0), /* Flush dirty pages from clients */
LL_DV_WR_FLUSH = (1 << 1), /* Flush all caching pages from clients */
LL_DV_SZ_UPDATE = (1 << 2), /* Update the file size on the client */
};
#ifndef offsetof
#define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
#endif
#define dot_lustre_name ".lustre"
#define dot_fscrypt_name ".fscrypt"
/********* HSM **********/
#define UUID_MAX 40
struct lov_hsm_base {
/* HSM archive ID */
__u64 lhb_archive_id;
/* Data version associated with the last archiving, if any. */
__u64 lhb_archive_ver;
/* Identifier within HSM backend */
char lhb_uuid[UUID_MAX];
};
/**
* HSM layout is a kind of FOREIGN layout.
*/
struct lov_hsm_md {
/* LOV_MAGIC_FOREIGN */
__u32 lhm_magic;
/* To make HSM layout compatible with lov_foreign_md, this @length
* includes everything after @lhm_flags: sizeof(lhm_archive_id) +
* sizeof(lhm_archive_ver) + lenght of lhm_archive_uuid.
*/
__u32 lhm_length;
/* HSM type, see LU_FOREIGN_TYPE_(POSIX, S3, PCCRW, PCCRO}. */
__u32 lhm_type;
/* HSM flags, see enum hsm_states */
__u32 lhm_flags;
/*
* Data structure members above are compatible with @lov_foreign_md.
* The following members are private to HSM layout.
*/
struct lov_hsm_base lhm_hsm;
} __attribute__((packed));
#define lhm_archive_id lhm_hsm.lhb_archive_id
#define lhm_archive_ver lhm_hsm.lhb_archive_ver
#define lhm_archive_uuid lhm_hsm.lhb_uuid
static inline bool lov_hsm_type_supported(__u32 type)
{
return type == LU_FOREIGN_TYPE_POSIX || type == LU_FOREIGN_TYPE_PCCRW ||
type == LU_FOREIGN_TYPE_PCCRO || type == LU_FOREIGN_TYPE_S3;
}
static inline bool lov_foreign_type_supported(__u32 type)
{
return lov_hsm_type_supported(type) || type == LU_FOREIGN_TYPE_SYMLINK;
}
/**
* HSM per-file state
* See HSM_FLAGS below.
*/
enum hsm_states {
HS_NONE = 0x00000000,
HS_EXISTS = 0x00000001,
HS_DIRTY = 0x00000002,
HS_RELEASED = 0x00000004,
HS_ARCHIVED = 0x00000008,
HS_NORELEASE = 0x00000010,
HS_NOARCHIVE = 0x00000020,
HS_LOST = 0x00000040,
HS_PCCRW = 0x00000080,
HS_PCCRO = 0x00000100,
};
/* HSM user-setable flags. */
#define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
/* Other HSM flags. */
#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED | \
HS_PCCRW | HS_PCCRO)
/*
* All HSM-related possible flags that could be applied to a file.
* This should be kept in sync with hsm_states.
*/
#define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK)
/**
* HSM request progress state
*/
enum hsm_progress_states {
HPS_NONE = 0,
HPS_WAITING = 1,
HPS_RUNNING = 2,
HPS_DONE = 3,
};
static inline const char *hsm_progress_state2name(enum hsm_progress_states s)
{
switch (s) {
case HPS_WAITING: return "waiting";
case HPS_RUNNING: return "running";
case HPS_DONE: return "done";
default: return "unknown";
}
}
struct hsm_extent {
__u64 offset;
__u64 length;
} __attribute__((packed));
/**
* Current HSM states of a Lustre file.
*
* This structure purpose is to be sent to user-space mainly. It describes the
* current HSM flags and in-progress action.
*/
struct hsm_user_state {
/** Current HSM states, from enum hsm_states. */
__u32 hus_states;
__u32 hus_archive_id;
/** The current undergoing action, if there is one */
__u32 hus_in_progress_state;
__u32 hus_in_progress_action;
struct hsm_extent hus_in_progress_location;
char hus_extended_info[];
};
struct hsm_state_set_ioc {
struct lu_fid hssi_fid;
__u64 hssi_setmask;
__u64 hssi_clearmask;
};
/*
* This structure describes the current in-progress action for a file.
* it is retuned to user space and send over the wire
*/
struct hsm_current_action {
/** The current undergoing action, if there is one */
/* state is one of hsm_progress_states */
__u32 hca_state;
/* action is one of hsm_user_action */
__u32 hca_action;
struct hsm_extent hca_location;
};
/***** HSM user requests ******/
/* User-generated (lfs/ioctl) request types */
enum hsm_user_action {
HUA_NONE = 1, /* no action (noop) */
HUA_ARCHIVE = 10, /* copy to hsm */
HUA_RESTORE = 11, /* prestage */
HUA_RELEASE = 12, /* drop ost objects */
HUA_REMOVE = 13, /* remove from archive */
HUA_CANCEL = 14 /* cancel a request */
};
static inline const char *hsm_user_action2name(enum hsm_user_action a)
{
switch (a) {
case HUA_NONE: return "NOOP";
case HUA_ARCHIVE: return "ARCHIVE";
case HUA_RESTORE: return "RESTORE";
case HUA_RELEASE: return "RELEASE";
case HUA_REMOVE: return "REMOVE";
case HUA_CANCEL: return "CANCEL";
default: return "UNKNOWN";
}
}
/*
* List of hr_flags (bit field)
*/
#define HSM_FORCE_ACTION 0x0001
/* used by CT, cannot be set by user */
#define HSM_GHOST_COPY 0x0002
/*
* To indicate that the action has been triggered by the
* kernel and a user process is currently blocked on it.
*/
#define HSM_REQ_BLOCKING 0x0004
/**
* Contains all the fixed part of struct hsm_user_request.
*/
struct hsm_request {
__u32 hr_action; /* enum hsm_user_action */
__u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */
__u64 hr_flags; /* request flags */
__u32 hr_itemcount; /* item count in hur_user_item vector */
__u32 hr_data_len;
};
struct hsm_user_item {
struct lu_fid hui_fid;
struct hsm_extent hui_extent;
} __attribute__((packed));
struct hsm_user_request {
struct hsm_request hur_request;
struct hsm_user_item hur_user_item[];
/* extra data blob at end of struct (after all
* hur_user_items), only use helpers to access it
*/
} __attribute__((packed));
/** Return pointer to data field in a hsm user request */
static inline void *hur_data(struct hsm_user_request *hur)
{
return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
}
/**
* Compute the current length of the provided hsm_user_request. This returns
* ~0UL (-1 for 32-bit arches) instead of an errno because __kernel_ssize_t
* is defined to be only [ -1, SSIZE_MAX ] there. On 64-bit architectures
* the max return value is 2^32 * (sizeof(hur_user_item) + 1) ~= 2^37 bytes.
*
* return -1 on bounds check error (32-bit only).
*/
static inline __kernel_size_t hur_len(struct hsm_user_request *hur)
{
__u64 size;
/* can't overflow a __u64 since hr_itemcount is only __u32 */
size = offsetof(struct hsm_user_request, hur_user_item[0]) +
(__u64)hur->hur_request.hr_itemcount *
sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len;
/* this "if (0 && ..)" is removed by the compiler on 64-bit */
if (sizeof(__kernel_size_t) == 4 && (__kernel_ssize_t)size < 0)
return ~0UL;
return size;
}
/****** HSM RPCs to copytool *****/
/* Message types the copytool may receive */
enum hsm_message_type {
HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
};
/* Actions the copytool may be instructed to take for a given action_item */
enum hsm_copytool_action {
HSMA_NONE = 10, /* no action */
HSMA_ARCHIVE = 20, /* arbitrary offset */
HSMA_RESTORE = 21,
HSMA_REMOVE = 22,
HSMA_CANCEL = 23
};
static inline const char *hsm_copytool_action2name(enum hsm_copytool_action a)
{
switch (a) {
case HSMA_NONE: return "NOOP";
case HSMA_ARCHIVE: return "ARCHIVE";
case HSMA_RESTORE: return "RESTORE";
case HSMA_REMOVE: return "REMOVE";
case HSMA_CANCEL: return "CANCEL";
default: return "UNKNOWN";
}
}
/* Copytool item action description */
struct hsm_action_item {
__u32 hai_len; /* valid size of this struct */
__u32 hai_action; /* hsm_copytool_action, but use known size */
struct lu_fid hai_fid; /* Lustre FID to operate on */
struct lu_fid hai_dfid; /* fid used for data access */
struct hsm_extent hai_extent; /* byte range to operate on */
__u64 hai_cookie; /* action cookie from coordinator */
__u64 hai_gid; /* grouplock id */
char hai_data[]; /* variable length */
} __attribute__((packed));
/**
* helper function which print in hexa the first bytes of
* hai opaque field
*
* \param hai [IN] record to print
* \param buffer [IN,OUT] buffer to write the hex string to
* \param len [IN] max buffer length
*
* \retval buffer
*/
static inline char *hai_dump_data_field(const struct hsm_action_item *hai,
char *buffer, __kernel_size_t len)
{
int i;
int data_len;
char *ptr;
ptr = buffer;
data_len = hai->hai_len - sizeof(*hai);
for (i = 0; (i < data_len) && (len > 2); i++) {
snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]);
ptr += 2;
len -= 2;
}
*ptr = '\0';
return buffer;
}
/* Copytool action list */
#define HAL_VERSION 1
#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
struct hsm_action_list {
__u32 hal_version;
__u32 hal_count; /* number of hai's to follow */
__u64 hal_compound_id; /* returned by coordinator, ignored */
__u64 hal_flags;
__u32 hal_archive_id; /* which archive backend */
__u32 padding1;
char hal_fsname[]; /* null-terminated */
/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
* boundaries. See i_zero
*/
} __attribute__((packed));
/* Return pointer to first hai in action list */
static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
{
__kernel_size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
return (struct hsm_action_item *)(hal->hal_fsname + offset);
}
/* Return pointer to next hai */
static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai)
{
__kernel_size_t offset = __ALIGN_KERNEL(hai->hai_len, 8);
return (struct hsm_action_item *)((char *)hai + offset);
}
/* Return size of an hsm_action_list */
static inline __kernel_size_t hal_size(struct hsm_action_list *hal)
{
__u32 i;
__kernel_size_t sz;
struct hsm_action_item *hai;
sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
hai = hai_first(hal);
for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai))
sz += __ALIGN_KERNEL(hai->hai_len, 8);
return sz;
}
/* HSM file import
* describe the attributes to be set on imported file
*/
struct hsm_user_import {
__u64 hui_size;
__u64 hui_atime;
__u64 hui_mtime;
__u32 hui_atime_ns;
__u32 hui_mtime_ns;
__u32 hui_uid;
__u32 hui_gid;
__u32 hui_mode;
__u32 hui_archive_id;
};
/* Copytool progress reporting */
#define HP_FLAG_COMPLETED 0x01
#define HP_FLAG_RETRY 0x02
struct hsm_progress {
struct lu_fid hp_fid;
__u64 hp_cookie;
struct hsm_extent hp_extent;
__u16 hp_flags;
__u16 hp_errval; /* positive val */
__u32 padding;
};
struct hsm_copy {
__u64 hc_data_version;
__u16 hc_flags;
__u16 hc_errval; /* positive val */
__u32 padding;
struct hsm_action_item hc_hai;
};
enum lu_ladvise_type {
LU_LADVISE_INVALID = 0,
LU_LADVISE_WILLREAD = 1,
LU_LADVISE_DONTNEED = 2,
LU_LADVISE_LOCKNOEXPAND = 3,
LU_LADVISE_LOCKAHEAD = 4,
/* Ahead operations for open|create|stat|read|write. */
LU_LADVISE_AHEAD = 5,
LU_LADVISE_MAX
};
#define LU_LADVISE_NAMES { \
[LU_LADVISE_WILLREAD] = "willread", \
[LU_LADVISE_DONTNEED] = "dontneed", \
[LU_LADVISE_LOCKNOEXPAND] = "locknoexpand", \
[LU_LADVISE_LOCKAHEAD] = "lockahead", \
[LU_LADVISE_AHEAD] = "ahead", \
}
/* This is the userspace argument for ladvise. It is currently the same as
* what goes on the wire (struct lu_ladvise), but is defined separately as we
* may need info which is only used locally.
*/
struct llapi_lu_ladvise {
__u16 lla_advice; /* advice type */
__u16 lla_value1; /* values for different advice types */
__u32 lla_value2;
__u64 lla_start; /* first byte of extent for advice */
__u64 lla_end; /* last byte of extent for advice */
__u32 lla_value3;
__u32 lla_value4;
};
struct llapi_lu_ladvise2 {
__u16 lla_advice; /* advice type */
__u16 lla_value1; /* values for different advice types */
__u32 lla_value2;
__u64 lla_start;
__u64 lla_end;
__u32 lla_value3;
__u32 lla_value4;
union {
struct {
__u32 lla_value5;
__u32 lla_value6;
};
char lla_buf[NAME_MAX + 1];
};
};
/* I/O call sequences in a batch access. */
enum lu_access_flags {
ACCESS_FL_NONE = 0x0,
ACCESS_FL_STAT = 0x01,
ACCESS_FL_OPEN = 0x02,
ACCESS_FL_CREAT = 0x04,
ACCESS_FL_READ = 0x08,
ACCESS_FL_WRITE = 0x10,
ACCESS_FL_OC = ACCESS_FL_OPEN | ACCESS_FL_CREAT,
ACCESS_FL_SOR = ACCESS_FL_STAT | ACCESS_FL_OPEN | ACCESS_FL_READ,
ACCESS_FL_OCW = ACCESS_FL_OPEN | ACCESS_FL_CREAT | ACCESS_FL_WRITE,
};
enum lu_ahead_mode {
LU_AH_MODE_NONE = 0,
/*
* The batch access pattern obeys certain naming rules, such as mdtest
* with the file naming format mdtest.$rank.$i.
*/
LU_AH_NAME_INDEX = 1,
/*
* Provide a file name list as input to do batch accesses with
* irregular file name format.
*/
LU_AH_NAME_ARRAY = 2,
/* Prefetching in readdir() order under a directory. */
LU_AH_NAME_READDIR = 3,
LU_AH_MODE_MAX,
};
#define lla_ahead_mode lla_value1
#define lla_access_flags lla_value2
#define lla_batch_max lla_value3
#define lla_fname lla_buf
enum ladvise_flag {
LF_ASYNC = 0x00000001,
LF_UNSET = 0x00000002,
};
#define LADVISE_MAGIC 0x1ADF1CE0
/* Masks of valid flags for each advice */
#define LF_LOCKNOEXPAND_MASK LF_UNSET
/* Flags valid for all advices not explicitly specified */
#define LF_DEFAULT_MASK LF_ASYNC
/* All flags */
#define LF_MASK (LF_ASYNC | LF_UNSET)
#define lla_lockahead_mode lla_value1
#define lla_peradvice_flags lla_value2
#define lla_lockahead_result lla_value3
/* This is the userspace argument for ladvise, corresponds to ladvise_hdr which
* is used on the wire. It is defined separately as we may need info which is
* only used locally.
*/
struct llapi_ladvise_hdr {
__u32 lah_magic; /* LADVISE_MAGIC */
__u32 lah_count; /* number of advices */
__u64 lah_flags; /* from enum ladvise_flag */
__u32 lah_value1; /* unused */
__u32 lah_value2; /* unused */
__u64 lah_value3; /* unused */
struct llapi_lu_ladvise lah_advise[]; /* advices in this header */
};
#define LAH_COUNT_MAX (1024)
/* Shared key */
enum sk_crypt_alg {
SK_CRYPT_EMPTY = 0,
SK_CRYPT_AES256_CTR = 1,
SK_CRYPT_INVALID = __UINT16_MAX__
};
enum sk_hmac_alg {
SK_HMAC_EMPTY = 0,
SK_HMAC_SHA256 = 1,
SK_HMAC_SHA512 = 2,
SK_HMAC_INVALID = __UINT16_MAX__
};
struct sk_crypt_type {
const char *sct_name;
int sct_type;
};
struct sk_hmac_type {
const char *sht_name;
int sht_type;
};
struct sk_prime_type {
const char *spt_name;
int spt_type;
int spt_primebits;
};
enum lock_mode_user {
MODE_READ_USER = 1,
MODE_WRITE_USER,
MODE_MAX_USER,
};
#define LOCK_MODE_NAMES { \
[MODE_READ_USER] = "READ",\
[MODE_WRITE_USER] = "WRITE"\
}
enum lockahead_results {
LLA_RESULT_SENT = 0,
LLA_RESULT_DIFFERENT,
LLA_RESULT_SAME,
};
enum lu_heat_flag_bit {
LU_HEAT_FLAG_BIT_INVALID = 0,
LU_HEAT_FLAG_BIT_OFF,
LU_HEAT_FLAG_BIT_CLEAR,
};
enum lu_heat_flag {
LU_HEAT_FLAG_OFF = 1ULL << LU_HEAT_FLAG_BIT_OFF,
LU_HEAT_FLAG_CLEAR = 1ULL << LU_HEAT_FLAG_BIT_CLEAR,
};
enum obd_heat_type {
OBD_HEAT_READSAMPLE = 0,
OBD_HEAT_WRITESAMPLE = 1,
OBD_HEAT_READBYTE = 2,
OBD_HEAT_WRITEBYTE = 3,
OBD_HEAT_COUNT
};
#define LU_HEAT_NAMES { \
[OBD_HEAT_READSAMPLE] = "readsample", \
[OBD_HEAT_WRITESAMPLE] = "writesample", \
[OBD_HEAT_READBYTE] = "readbyte", \
[OBD_HEAT_WRITEBYTE] = "writebyte", \
}
struct lu_heat {
__u32 lh_count;
__u32 lh_flags;
__u64 lh_heat[];
};
enum lu_pcc_type {
LU_PCC_NONE = 0x0,
LU_PCC_READWRITE = 0x01,
LU_PCC_READONLY = 0x02,
LU_PCC_TYPE_MASK = LU_PCC_READWRITE | LU_PCC_READONLY,
LU_PCC_FL_ASYNC = 0x10,
LU_PCC_MAX
};
static inline const char *pcc_type2string(enum lu_pcc_type type)
{
switch (type & LU_PCC_TYPE_MASK) {
case LU_PCC_NONE:
return "none";
case LU_PCC_READWRITE:
return "readwrite";
case LU_PCC_READONLY:
return "readonly";
default:
return "fault";
}
}
#define PCC_YAML_PCCPATH "pccpath"
#define PCC_YAML_HSMTOOL "hsmtool"
#define PCC_YAML_RWID "rwid"
#define PCC_YAML_ROID "roid"
#define PCC_YAML_FLAGS "flags"
#define PCC_YAML_AUTOCACHE "autocache"
enum hsmtool_type {
HSMTOOL_UNKNOWN = 0,
/*
* v1 (original) using 6 directories (oid & 0xffff)/-/-/-/-/-/FID.
* Places only one FID per directory. See ct_path_archive() below.
*/
HSMTOOL_POSIX_V1 = 1,
/* v2 using (OID & 0xffff)^(SEQ & 0xffff)/FID. */
HSMTOOL_POSIX_V2 = 2,
HSMTOOL_DEFAULT = HSMTOOL_POSIX_V2,
};
static inline const char *hsmtool_type2string(enum hsmtool_type type)
{
switch (type) {
case HSMTOOL_POSIX_V1:
return "posix_v1";
case HSMTOOL_POSIX_V2:
return "posix_v2";
default:
return "unknown";
}
}
static inline enum hsmtool_type hsmtool_string2type(const char *str)
{
if (strcmp(str, "posix") == 0)
return HSMTOOL_DEFAULT;
if (strcmp(str, "posix_v1") == 0)
return HSMTOOL_POSIX_V1;
if (strcmp(str, "posix_v2") == 0)
return HSMTOOL_POSIX_V2;
return HSMTOOL_UNKNOWN;
}
struct lu_pcc_attach {
__u32 pcca_type; /* PCC type */
__u32 pcca_id; /* Attach ID */
};
enum lu_pcc_detach_flags {
/* Detach only, keep the PCC copy */
PCC_DETACH_FL_NONE = 0x0,
/* Remove the cached file after detach */
PCC_DETACH_FL_UNCACHE = 0x01,
/* Known the file was once used as PCC-RW */
PCC_DETACH_FL_KNOWN_READWRITE = 0x02,
/* Known the file was once used as PCC-RO */
PCC_DETACH_FL_KNOWN_READONLY = 0x04,
/* Indicate PCC cached copy is removed */
PCC_DETACH_FL_CACHE_REMOVED = 0x08,
/* Indicate the file is being attached */
PCC_DETACH_FL_ATTACHING = 0x10,
};
struct lu_pcc_detach_fid {
/* fid of the file to detach */
struct lu_fid pccd_fid;
__u32 pccd_flags;
};
struct lu_pcc_detach {
__u32 pccd_flags;
};
enum lu_pcc_state_flags {
PCC_STATE_FL_NONE = 0x0,
/* The inode attr is cached locally */
PCC_STATE_FL_ATTR_VALID = 0x01,
/* The file is being attached into PCC */
PCC_STATE_FL_ATTACHING = 0x02,
/* The PCC copy is unlinked */
PCC_STATE_FL_UNLINKED = 0x04,
};
struct lu_pcc_state {
__u32 pccs_type; /* OUT: enum lu_pcc_type */
__u32 pccs_open_count; /* OUT: user count */
__u32 pccs_flags; /* OUT: enum lu_pcc_state_flags */
__u32 pccs_namelen; /* IN: file name len */
char pccs_path[PATH_MAX]; /* IN|OUT: file name or path buffer */
};
enum lu_pcc_cleanup_flags {
PCC_CLEANUP_FL_NONE = 0x0,
/* Remove the PCC backend but retain the data on the cache */
PCC_CLEANUP_FL_KEEP_DATA = 0x1,
};
enum lu_project_type {
LU_PROJECT_NONE = 0,
LU_PROJECT_SET,
LU_PROJECT_GET,
LU_PROJECT_MAX
};
struct lu_project {
__u32 project_type; /* enum lu_project_type */
__u32 project_id;
__u32 project_xflags;
__u32 project_reserved;
char project_name[NAME_MAX + 1];
};
struct fid_array {
__u32 fa_nr;
/* make header's size equal lu_fid */
__u32 fa_padding0;
__u64 fa_padding1;
struct lu_fid fa_fids[];
};
#define OBD_MAX_FIDS_IN_ARRAY 4096
/* more types could be defined upon need for more complex
* format to be used in foreign symlink LOV/LMV EAs, like
* one to describe a delimiter string and occurence number
* of delimited sub-string, ...
*/
enum ll_foreign_symlink_upcall_item_type {
EOB_TYPE = 1,
STRING_TYPE = 2,
POSLEN_TYPE = 3,
};
/* may need to be modified to allow for more format items to be defined, and
* like for ll_foreign_symlink_upcall_item_type enum
*/
struct ll_foreign_symlink_upcall_item {
__u32 type;
union {
struct {
__u32 pos;
__u32 len;
};
struct {
size_t size;
union {
/* internal storage of constant string */
char *string;
/* upcall stores constant string in a raw */
DECLARE_FLEX_ARRAY(char, bytestring);
};
};
};
};
#define POSLEN_ITEM_SZ (offsetof(struct ll_foreign_symlink_upcall_item, len) + \
sizeof(((struct ll_foreign_symlink_upcall_item *)0)->len))
#define STRING_ITEM_SZ(sz) ( \
offsetof(struct ll_foreign_symlink_upcall_item, bytestring) + \
(sz + sizeof(__u32) - 1) / sizeof(__u32) * sizeof(__u32))
/* presently limited to not cause max stack frame size to be reached
* because of temporary automatic array of
* "struct ll_foreign_symlink_upcall_item" presently used in
* foreign_symlink_upcall_info_store()
*/
#define MAX_NB_UPCALL_ITEMS 32
/**
* The data stripes in a comp is split into smaller chunks for the purpose
* of ec calculations. The total number of stripes may not always be
* evenly divisible with by 'k' so we may need to divide it up into two
* different sets of k0 and k1 sized chunks.
*
* The total stripes are divided into c0 number of k0 sized chunks
* followed by c1 number of k1 sized chunks.
*/
struct ec_split_comp {
int esc_n0, esc_k0;
int esc_n1, esc_k1;
};
/*
* Arbitrary limit on the minimum size we will attempt to split up into
* smaller chunks for ec computation.
*/
#define EC_MIN_SPLIT_SIZE 5
/*
* We have data consisting of 'total' stripes. Create a mapping where
* we split this into smaller chunks based on what the
* suggested / requested hint is.
* Try to keep the sizes of the different buckets as equal as possible
* even if it means we will sometimes use smaller bucket size
* than what the hint suggested.
*
* A pathological example could be a data comp with 15 stripes and
* we request to split this into buckets for 7,m EC encoding.
* For best fit this would then find a configuration of 3 buckets
* of size 5 and thus the chunk size is 2 less than the requested hint.
*/
static inline void
ec_split_stripes(int total, int suggested, struct ec_split_comp *sc)
{
int num_buckets;
/* If total is very small then just map it into a single chunk */
if (suggested >= total || total < EC_MIN_SPLIT_SIZE) {
sc->esc_k0 = total;
sc->esc_n0 = 1;
sc->esc_k1 = 0;
sc->esc_n1 = 0;
return;
}
/* If the total is evenly divisible by the suggested chunk size */
if (total % suggested == 0) {
sc->esc_k0 = suggested;
sc->esc_n0 = total / suggested;
sc->esc_k1 = 0;
sc->esc_n1 = 0;
return;
}
/* We need one extra bucket because there was a residual */
num_buckets = total / suggested + 1;
/*
* If we can split the total evenly in the new number of buckets.
* For this case we end up with num_bucket chunks that are all
* suggested-1 or suggested-2 in size.
*/
if (total % num_buckets == 0) {
sc->esc_k0 = total / num_buckets;
sc->esc_n0 = num_buckets;
sc->esc_k1 = 0;
sc->esc_n1 = 0;
return;
}
/*
* Split the total stripes into num_buckets chunks and with the first
* block of chunks being one larger to consume the residual.
*
* We can describe any number as :
*
* total = nb * bs + r
*
* where
* nb is number of buckets
* bs is bucket size
* r is the residual, r < bs.
*
* This can then be rearranged as :
*
* total = r * (bs + 1) + (nb - r) * bs
* =>
* total = r * bs + r + nb * bs - r * bs
* =>
* total = nb * bs + r
*/
sc->esc_n0 = total % num_buckets; /* r */
sc->esc_k0 = total / num_buckets + 1; /* bs + 1 */
sc->esc_n1 = num_buckets - sc->esc_n0; /* nb - r */
sc->esc_k1 = total / num_buckets; /* bs */
}
#if defined(__cplusplus)
}
#endif
/** @} lustreuser */
#endif /* _LUSTRE_USER_H */