Viewing: lustre_lmv.h

/* SPDX-License-Identifier: GPL-2.0 */

/*
 * Copyright (c) 2014, 2016, Intel Corporation.
 */

/*
 * This file is part of Lustre, http://www.lustre.org/
 *
 * Lustre LMV structures and functions.
 *
 * Author: Di Wang <di.wang@intel.com>
 */

#ifndef _LUSTRE_LMV_H
#define _LUSTRE_LMV_H
#include <uapi/linux/lustre/lustre_idl.h>

struct lmv_oinfo {
	struct lu_fid	lmo_fid;
	u32		lmo_mds;
	struct inode	*lmo_root;
};

struct lmv_stripe_md {
	__u32	lsm_md_magic;
	__u32	lsm_md_stripe_count;
	__u32	lsm_md_master_mdt_index;
	__u32	lsm_md_hash_type;
	__u8	lsm_md_max_inherit;
	__u8	lsm_md_max_inherit_rr;
	__u32	lsm_md_layout_version;
	__u32	lsm_md_migrate_offset;
	__u32	lsm_md_migrate_hash;
	char	lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
	struct lmv_oinfo lsm_md_oinfo[];
};

struct lmv_stripe_object {
	struct kref			lso_refs;
	union {
		struct lmv_stripe_md	lso_lsm;
		struct lmv_foreign_md	lso_lfm;
	};
};

static inline bool lmv_dir_striped(const struct lmv_stripe_object *lso)
{
	return lso && lso->lso_lsm.lsm_md_magic == LMV_MAGIC;
}

static inline bool lmv_dir_foreign(const struct lmv_stripe_object *lso)
{
	return lso && lso->lso_lsm.lsm_md_magic == LMV_MAGIC_FOREIGN;
}

static inline bool lmv_dir_layout_changing(const struct lmv_stripe_object *lso)
{
	return lmv_dir_striped(lso) &&
	       lmv_hash_is_layout_changing(lso->lso_lsm.lsm_md_hash_type);
}

static inline bool lmv_dir_bad_hash(const struct lmv_stripe_object *lso)
{
	if (!lmv_dir_striped(lso))
		return false;

	if (lso->lso_lsm.lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
		return true;

	return !lmv_is_known_hash_type(lso->lso_lsm.lsm_md_hash_type);
}

static inline __u8 lmv_inherit_next(__u8 inherit)
{
	if (inherit == LMV_INHERIT_END || inherit == LMV_INHERIT_NONE)
		return LMV_INHERIT_NONE;

	if (inherit == LMV_INHERIT_UNLIMITED || inherit > LMV_INHERIT_MAX)
		return inherit;

	return inherit - 1;
}

static inline __u8 lmv_inherit_rr_next(__u8 inherit_rr)
{
	if (inherit_rr == LMV_INHERIT_RR_NONE ||
	    inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
	    inherit_rr > LMV_INHERIT_RR_MAX)
		return inherit_rr;

	return inherit_rr - 1;
}

static inline bool lmv_is_inheritable(__u8 inherit)
{
	return inherit == LMV_INHERIT_UNLIMITED ||
	       (inherit > LMV_INHERIT_END && inherit <= LMV_INHERIT_MAX);
}

static inline bool lsm_md_eq(const struct lmv_stripe_object *lso1,
			     const struct lmv_stripe_object *lso2)
{
	const struct lmv_stripe_md *lsm1 = &lso1->lso_lsm;
	const struct lmv_stripe_md *lsm2 = &lso2->lso_lsm;
	__u32 idx;

	if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
	    lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
	    lsm1->lsm_md_master_mdt_index !=
				lsm2->lsm_md_master_mdt_index ||
	    lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
	    lsm1->lsm_md_max_inherit != lsm2->lsm_md_max_inherit ||
	    lsm1->lsm_md_max_inherit_rr != lsm2->lsm_md_max_inherit_rr ||
	    lsm1->lsm_md_layout_version !=
				lsm2->lsm_md_layout_version ||
	    lsm1->lsm_md_migrate_offset !=
				lsm2->lsm_md_migrate_offset ||
	    lsm1->lsm_md_migrate_hash !=
				lsm2->lsm_md_migrate_hash ||
	    strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name,
		    sizeof(lsm1->lsm_md_pool_name)) != 0)
		return false;

	if (lmv_dir_striped(lso1)) {
		for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
			if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
				       &lsm2->lsm_md_oinfo[idx].lmo_fid))
				return false;
		}
	} else if (lsm1->lsm_md_magic == LMV_USER_MAGIC_SPECIFIC) {
		for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
			if (lsm1->lsm_md_oinfo[idx].lmo_mds !=
			    lsm2->lsm_md_oinfo[idx].lmo_mds)
				return false;
		}
	}

	return true;
}

static inline void
lmv_stripe_object_dump(int mask, const struct lmv_stripe_object *lsmo)
{
	const struct lmv_stripe_md *lsm = &lsmo->lso_lsm;
	int i;

	CDEBUG(mask,
	       "dump LMV: magic=%#x refs=%u count=%u index=%u hash=%s:%#x max_inherit=%hhu max_inherit_rr=%hhu version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",
	       lsm->lsm_md_magic, kref_read(&lsmo->lso_refs),
	       lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index,
	       lmv_is_known_hash_type(lsm->lsm_md_hash_type) ?
		mdt_hash_name[lsm->lsm_md_hash_type & LMV_HASH_TYPE_MASK] :
		"invalid", lsm->lsm_md_hash_type,
	       lsm->lsm_md_max_inherit, lsm->lsm_md_max_inherit_rr,
	       lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
	       lmv_is_known_hash_type(lsm->lsm_md_migrate_hash) ?
		mdt_hash_name[lsm->lsm_md_migrate_hash & LMV_HASH_TYPE_MASK] :
		"invalid", lsm->lsm_md_migrate_hash,
	       LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);

	if (!lmv_dir_striped(lsmo))
		return;

	for (i = 0; i < lsm->lsm_md_stripe_count; i++)
		CDEBUG_LIMIT(mask, "stripe[%d] "DFID"\n",
			     i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
}

static inline bool
lmv_object_inherited(const struct lmv_stripe_object *plsm,
		     const struct lmv_stripe_object *clsm)
{
	return plsm && clsm &&
	       plsm->lso_lsm.lsm_md_magic ==
			clsm->lso_lsm.lsm_md_magic &&
	       plsm->lso_lsm.lsm_md_stripe_count ==
			clsm->lso_lsm.lsm_md_stripe_count &&
	       plsm->lso_lsm.lsm_md_master_mdt_index ==
			clsm->lso_lsm.lsm_md_master_mdt_index &&
	       plsm->lso_lsm.lsm_md_hash_type ==
			clsm->lso_lsm.lsm_md_hash_type &&
	       lmv_inherit_next(plsm->lso_lsm.lsm_md_max_inherit) ==
			clsm->lso_lsm.lsm_md_max_inherit &&
	       lmv_inherit_rr_next(plsm->lso_lsm.lsm_md_max_inherit_rr) ==
			clsm->lso_lsm.lsm_md_max_inherit_rr;
}

union lmv_mds_md;

struct lmv_stripe_object *lmv_stripe_object_alloc(__u32 magic,
						  const union lmv_mds_md *lmm,
						  size_t lmm_size);

void lmv_stripe_object_free(struct kref *kref);
void lmv_stripe_object_put(struct lmv_stripe_object **lsm_obj);

struct lmv_stripe_object *
	lmv_stripe_object_get(struct lmv_stripe_object *lsm_obj);

/* This hash is only for testing purpose */
static inline unsigned int
lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
{
	unsigned int c = 0;
	const unsigned char *p = (const unsigned char *)name;

	while (--namelen >= 0)
		c += p[namelen];

	c = c % count;

	return c;
}

static inline unsigned int
lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
{
	__u64 hash;

	hash = lustre_hash_fnv_1a_64(name, namelen);

	return do_div(hash, count);
}

/*
 * Robert Jenkins' function for mixing 32-bit values
 * http://burtleburtle.net/bob/hash/evahash.html
 * a, b = random bits, c = input and output
 *
 * Mixing inputs to generate an evenly distributed hash.
 */
#define crush_hashmix(a, b, c)				\
do {							\
	a = a - b;  a = a - c;  a = a ^ (c >> 13);	\
	b = b - c;  b = b - a;  b = b ^ (a << 8);	\
	c = c - a;  c = c - b;  c = c ^ (b >> 13);	\
	a = a - b;  a = a - c;  a = a ^ (c >> 12);	\
	b = b - c;  b = b - a;  b = b ^ (a << 16);	\
	c = c - a;  c = c - b;  c = c ^ (b >> 5);	\
	a = a - b;  a = a - c;  a = a ^ (c >> 3);	\
	b = b - c;  b = b - a;  b = b ^ (a << 10);	\
	c = c - a;  c = c - b;  c = c ^ (b >> 15);	\
} while (0)

#define crush_hash_seed 1315423911

static inline __u32 crush_hash(__u32 a, __u32 b)
{
	__u32 hash = crush_hash_seed ^ a ^ b;
	__u32 x = 231232;
	__u32 y = 1232;

	crush_hashmix(a, b, hash);
	crush_hashmix(x, a, hash);
	crush_hashmix(b, y, hash);

	return hash;
}

/* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
 * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
 * algorithm.
 */
static inline unsigned int
lmv_hash_crush(unsigned int count, const char *name, int namelen, bool crush2)
{
	unsigned long long straw;
	unsigned long long highest_straw = 0;
	unsigned int pg_id;
	unsigned int idx = 0;
	int i;

	/* put temp and backup file on the same MDT where target is located.
	 * temporary file naming rule:
	 * 1. rsync: .<target>.XXXXXX
	 * 2. dstripe: <target>.XXXXXXXX
	 */
	if (lu_name_is_temp_file(name, namelen, true, 6, crush2)) {
		name++;
		namelen -= 8;
	} else if (lu_name_is_temp_file(name, namelen, false, 8, crush2)) {
		namelen -= 9;
	} else if (lu_name_is_backup_file(name, namelen, &i)) {
		LASSERT(i < namelen);
		namelen -= i;
	}

	pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);

	/* distribute PG among all stripes pseudo-randomly, so they are almost
	 * evenly distributed, and when stripe count changes, only (delta /
	 * total) sub files need to be moved, herein 'delta' is added or removed
	 * stripe count, 'total' is total stripe count before change for
	 * removal, or count after change for addition.
	 */
	for (i = 0; i < count; i++) {
		straw = crush_hash(pg_id, i);
		if (straw > highest_straw) {
			highest_straw = straw;
			idx = i;
		}
	}
	LASSERT(idx < count);

	return idx;
}

/* directory layout may change in three ways:
 * 1. directory migration, in its LMV source stripes are appended after
 *    target stripes, \a migrate_hash is source hash type, \a migrate_offset is
 *    target stripe count,
 * 2. directory split, \a migrate_hash is hash type before split,
 *    \a migrate_offset is stripe count before split.
 * 3. directory merge, \a migrate_hash is hash type after merge,
 *    \a migrate_offset is stripe count after merge.
 */
static inline int
__lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count,
			   __u32 migrate_hash, __u32 migrate_offset,
			   const char *name, int namelen, bool new_layout)
{
	__u32 saved_hash = hash_type;
	__u32 saved_count = stripe_count;
	int stripe_index = 0;

	LASSERT(namelen > 0);
	LASSERT(stripe_count > 0);

	if (lmv_hash_is_splitting(hash_type)) {
		if (!new_layout) {
			hash_type = migrate_hash;
			stripe_count = migrate_offset;
		}
	} else if (lmv_hash_is_merging(hash_type)) {
		if (new_layout) {
			hash_type = migrate_hash;
			stripe_count = migrate_offset;
		}
	} else if (lmv_hash_is_migrating(hash_type)) {
		if (new_layout) {
			stripe_count = migrate_offset;
		} else {
			hash_type = migrate_hash;
			stripe_count -= migrate_offset;
		}
	}

	if (stripe_count > 1) {
		switch (hash_type & LMV_HASH_TYPE_MASK) {
		case LMV_HASH_TYPE_ALL_CHARS:
			stripe_index = lmv_hash_all_chars(stripe_count, name,
							  namelen);
			break;
		case LMV_HASH_TYPE_FNV_1A_64:
			stripe_index = lmv_hash_fnv1a(stripe_count, name,
						      namelen);
			break;
		case LMV_HASH_TYPE_CRUSH:
			stripe_index = lmv_hash_crush(stripe_count, name,
						      namelen, false);
			break;
		case LMV_HASH_TYPE_CRUSH2:
			stripe_index = lmv_hash_crush(stripe_count, name,
						      namelen, true);
			break;
		default:
			return -EBADFD;
		}
	}

	LASSERT(stripe_index < stripe_count);

	if (!new_layout && lmv_hash_is_migrating(saved_hash))
		stripe_index += migrate_offset;

	LASSERT(stripe_index < saved_count);

	CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
	       namelen, name, saved_hash, migrate_hash, stripe_index,
	       saved_count, migrate_offset, new_layout ? "new" : "old");

	return stripe_index;
}

static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv,
					   const char *name, int namelen)
{
	if (lmv->lmv_magic == LMV_MAGIC_V1 ||
	    lmv->lmv_magic == LMV_MAGIC_STRIPE)
		return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
						  lmv->lmv_stripe_count,
						  lmv->lmv_migrate_hash,
						  lmv->lmv_migrate_offset,
						  name, namelen, true);

	if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
	    lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
		return __lmv_name_to_stripe_index(
					le32_to_cpu(lmv->lmv_hash_type),
					le32_to_cpu(lmv->lmv_stripe_count),
					le32_to_cpu(lmv->lmv_migrate_hash),
					le32_to_cpu(lmv->lmv_migrate_offset),
					name, namelen, true);

	return -EINVAL;
}

static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv,
					       const char *name, int namelen)
{
	if (lmv->lmv_magic == LMV_MAGIC_V1 ||
	    lmv->lmv_magic == LMV_MAGIC_STRIPE)
		return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
						  lmv->lmv_stripe_count,
						  lmv->lmv_migrate_hash,
						  lmv->lmv_migrate_offset,
						  name, namelen, false);

	if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
	    lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
		return __lmv_name_to_stripe_index(
					le32_to_cpu(lmv->lmv_hash_type),
					le32_to_cpu(lmv->lmv_stripe_count),
					le32_to_cpu(lmv->lmv_migrate_hash),
					le32_to_cpu(lmv->lmv_migrate_offset),
					name, namelen, false);

	return -EINVAL;
}

static inline bool lmv_user_magic_supported(__u32 lum_magic)
{
	return lum_magic == LMV_USER_MAGIC ||
	       lum_magic == LMV_USER_MAGIC_SPECIFIC ||
	       lum_magic == LMV_MAGIC_FOREIGN;
}

#define LMV_DEBUG(mask, lmv, msg)					      \
	CDEBUG_LIMIT(mask,						      \
	       "%s LMV: magic=%#x count=%u index=%u hash=%s:%#x version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",\
	       msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count,		      \
	       (lmv)->lmv_master_mdt_index,				      \
	       lmv_is_known_hash_type((lmv)->lmv_hash_type) ?		      \
		mdt_hash_name[(lmv)->lmv_hash_type & LMV_HASH_TYPE_MASK] :    \
		"invalid", (lmv)->lmv_hash_type,			      \
	       (lmv)->lmv_layout_version, (lmv)->lmv_migrate_offset,	      \
	       lmv_is_known_hash_type((lmv)->lmv_migrate_hash) ?	      \
		mdt_hash_name[(lmv)->lmv_migrate_hash & LMV_HASH_TYPE_MASK] : \
		"invalid", (lmv)->lmv_migrate_hash,			      \
	       LOV_MAXPOOLNAME, lmv->lmv_pool_name)

/* master LMV is sane */
static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv)
		return false;

	if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
		goto insane;

	if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
		goto insane;

	if (!lmv_is_sane_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
		goto insane;

	return true;
insane:
	LMV_DEBUG(D_ERROR, lmv, "unknown layout");
	return false;
}

/* LMV can be either master or stripe LMV */
static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv)
		return false;

	if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
	    le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
		goto insane;

	if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
		goto insane;

	if (!lmv_is_sane_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
		goto insane;

	return true;
insane:
	LMV_DEBUG(D_ERROR, lmv, "unknown layout");
	return false;
}

static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv_is_sane2(lmv))
		return false;

	return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
}

static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv_is_sane2(lmv))
		return false;

	return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
}

static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv_is_sane(lmv))
		return false;

	return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
}

static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv_is_sane2(lmv))
		return false;

	return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
	       lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
}

static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
{
	if (!lmv_is_sane2(lmv))
		return false;

	return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
	       lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
	       lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
}

static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv)
{
	return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED;
}

#endif