Viewing: kfilnd_dev.c

// SPDX-License-Identifier: GPL-2.0

/*
 * Copyright 2022 Hewlett Packard Enterprise Development LP
 */

/*
 * This file is part of Lustre, http://www.lustre.org/
 *
 * kfilnd device implementation.
 */

#include "kfilnd_dev.h"
#include "kfilnd_ep.h"
#include "kfilnd_dom.h"
#include "kfilnd_peer.h"

/**
 * kfilnd_dev_post_imm_buffers() - Post all immediate receive buffers on each
 * KFI LND endpoint.
 * @dev: KFI LND device to have all endpoint receive buffers posted.
 *
 * This function should be called only during KFI LND device initialization.
 *
 * Return: On success, zero. Else, negative errno.
 */
int kfilnd_dev_post_imm_buffers(struct kfilnd_dev *dev)
{
	int i;
	int rc;

	if (!dev)
		return -EINVAL;

	for (i = 0; i < dev->kfd_ni->ni_ncpts; i++) {
		rc = kfilnd_ep_post_imm_buffers(dev->kfd_endpoints[i]);
		if (rc)
			return rc;
	}

	return 0;
}

/**
 * kfilnd_dev_free() - Free a KFI LND device.
 *
 * This function will not complete until all underlying KFI LND transactions are
 * complete.
 *
 * Once the KFI LND device is freed, a reference is returned to the module.
 */
void kfilnd_dev_free(struct kfilnd_dev *dev)
{
	int i;
	int lnet_ncpts;

	if (!dev)
		return;

	debugfs_remove_recursive(dev->dev_dir);

	/* Change state to shutting down so TNs stop using it */
	dev->kfd_state = KFILND_STATE_SHUTTING_DOWN;

	/* Cancel all outstanding RX buffers. */
	for (i = 0; i < dev->kfd_ni->ni_ncpts; i++)
		kfilnd_ep_cancel_imm_buffers(dev->kfd_endpoints[i]);

	/* Free all endpoints. */
	for (i = 0; i < dev->kfd_ni->ni_ncpts; i++)
		kfilnd_ep_free(dev->kfd_endpoints[i]);

	kfilnd_peer_destroy(dev);

	lnet_ncpts = cfs_cpt_number(lnet_cpt_table());
	LIBCFS_FREE(dev->cpt_to_endpoint,
		    lnet_ncpts * sizeof(*dev->cpt_to_endpoint));

	LIBCFS_FREE(dev->kfd_endpoints,
		    dev->kfd_ni->ni_ncpts * sizeof(*dev->kfd_endpoints));

	kfi_close(&dev->kfd_sep->fid);
	kfi_close(&dev->kfd_av->fid);

	kfilnd_dom_put(dev->dom);

	LIBCFS_FREE(dev, sizeof(*dev));

	module_put(THIS_MODULE);
}

/**
 * kfilnd_dev_alloc() - Allocate a new KFI LND device a LNet NI.
 * @ni: LNet NI used to allocate the KFI LND device.
 * @node: Node string which can be passed into kfi_getinfo().
 *
 * During KFI LND device allocation, the LNet NID NID is used to build node
 * and service string. The LNet NID address (IPv4 address) is used for the node
 * string. The LNet NID net number is used for the service string. Together, the
 * node and service string define the address of the KFI LND device.
 *
 * The node and service strings are used to allocate a KFI scalable endpoint.
 * The KFI scalable endpoint is later used to allocate KFI LND endpoints.
 *
 * For each successful KFI LND device allocation, a reference is taken against
 * this module to it free being prematurely removed.
 *
 * Return: On success, valid pointer. On error, negative errno pointer.
 */
struct kfilnd_dev *kfilnd_dev_alloc(struct lnet_ni *ni,
				    const char *node)
{
	int i;
	int rc;
	struct kfi_av_attr av_attr = {};
	struct kfi_info *dev_info;
	int cpt;
	int lnet_ncpts;
	struct kfilnd_dev *dev;
#ifdef HAVE_KFI_CXI_DOM_OPS
	struct kfi_cxi_domain_ops *dom_ops;
#endif

	if (!ni) {
		rc = -EINVAL;
		goto err;
	}

	/* Start allocating memory and underlying hardware resources for the
	 * LNet NI.
	 */
	LIBCFS_ALLOC(dev, sizeof(*dev));
	if (!dev) {
		rc = -ENOMEM;
		goto err;
	}

	dev->kfd_ni = ni;
	spin_lock_init(&dev->kfd_lock);
	atomic_set(&dev->session_keys, 0);

	dev->dom = kfilnd_dom_get(ni, node, &dev_info);
	if (IS_ERR(dev->dom)) {
		rc = PTR_ERR(dev->dom);
		CERROR("Failed to get KFI LND domain: rc=%d\n", rc);
		goto err_free_dev;
	}

	/* KFI LNet NID address needs to be unique per LNet NID and something
	 * which can be inserted into the KFI AV. The NIC address is one of the
	 * unique components. Local interface NIC address needs to be extracted
	 * and used to build the LNet NID.
	 *
	 * At this point, only the KFI CXI provider is supported.
	 */
	if (!dev_info->src_addr ||
	    dev_info->src_addrlen != sizeof(struct kcxi_addr)) {
		rc = -EADDRNOTAVAIL;
		CERROR("No kfabric source address returned\n");
		goto err_put_dom;
	}

	dev->nic_addr = ((struct kcxi_addr *)dev_info->src_addr)->nic;

	/* Get the device struct */
	dev->device = NULL;
#ifdef HAVE_KFI_CXI_DOM_OPS
	rc = kfi_open_ops(&dev->dom->domain->fid, KFI_CXI_DOM_OPS_1, 0,
				(void **)&dom_ops, NULL);
	if (!rc) {
		rc = dom_ops->get_device(&dev->dom->domain->fid,
					   &dev->device);
		if (!rc)
			CDEBUG(D_NET, "get_device failed\n");
	}
#endif

	/* Create an AV for this device */
	av_attr.type = KFI_AV_UNSPEC;
	av_attr.rx_ctx_bits = KFILND_FAB_RX_CTX_BITS;
	rc = kfi_av_open(dev->dom->domain, &av_attr, &dev->kfd_av, dev);
	if (rc) {
		CERROR("Could not open AV, rc = %d\n", rc);
		goto err_put_dom;
	}

	/* Create a scalable endpont to represent the device. */
	rc = kfi_scalable_ep(dev->dom->domain, dev_info, &dev->kfd_sep, dev);
	if (rc) {
		CERROR("Could not create scalable endpoint, rc = %d\n", rc);
		goto err_free_av;
	}

	/* Done with info. */
	kfi_freeinfo(dev_info);
	dev_info = NULL;

	/* Bind the endpoint to the AV */
	rc = kfi_scalable_ep_bind(dev->kfd_sep, &dev->kfd_av->fid, 0);
	if (rc) {
		CERROR("Could not bind scalable endpoint to AV, rc = %d\n", rc);
		goto err_free_sep;
	}

	/* Enable the scalable endpoint */
	rc = kfi_enable(dev->kfd_sep);
	if (rc) {
		CERROR("Could not enable scalable endpoint, rc = %d\n", rc);
		goto err_free_sep;
	}

	/* Allocate an array to store all the KFI LND endpoints. */
	LIBCFS_ALLOC_GFP(dev->kfd_endpoints,
			 ni->ni_ncpts * sizeof(*dev->kfd_endpoints),
			 GFP_KERNEL);
	if (!dev->kfd_endpoints) {
		rc = -ENOMEM;
		goto err_free_sep;
	}

	/* Map of all LNet CPTs to endpoints. */
	lnet_ncpts = cfs_cpt_number(lnet_cpt_table());
	LIBCFS_ALLOC_GFP(dev->cpt_to_endpoint,
			 lnet_ncpts * sizeof(*dev->cpt_to_endpoint),
			 GFP_KERNEL);
	if (!dev->cpt_to_endpoint) {
		rc = -ENOMEM;
		goto err_free_ep_array;
	}

	/* Create RX/TX contexts in kfabric for each LNet NI CPT. */
	for (i = 0; i < ni->ni_ncpts; i++) {
		cpt = !ni->ni_cpts ? i : ni->ni_cpts[i];

		dev->kfd_endpoints[i] =
			kfilnd_ep_alloc(dev, i, cpt,
					ni->ni_net->net_tunables.lct_max_tx_credits,
					KFILND_IMMEDIATE_MSG_SIZE);
		if (IS_ERR(dev->kfd_endpoints[i])) {
			rc = PTR_ERR(dev->kfd_endpoints[i]);
			goto err_free_endpoints;
		}

		dev->cpt_to_endpoint[cpt] = dev->kfd_endpoints[i];
	}

	kfilnd_peer_init(dev);

	/* Mark that the dev/NI has now been initialized */
	dev->kfd_state = KFILND_STATE_INITIALIZED;

	ni->ni_data = dev;
	ni->ni_nid.nid_addr[0] = cpu_to_be32(LNET_NIDADDR(dev->nic_addr));

	/* Initialize debugfs stats. */
	dev->dev_dir = debugfs_create_dir(libcfs_nidstr(&ni->ni_nid),
					  kfilnd_debug_dir);
	dev->initiator_state_stats_file =
		debugfs_create_file("initiator_state_stats", 0444,
				    dev->dev_dir, dev,
				    &kfilnd_initiator_state_stats_file_ops);
	dev->initiator_state_stats_file =
		debugfs_create_file("initiator_stats", 0444,
				    dev->dev_dir, dev,
				    &kfilnd_initiator_stats_file_ops);
	dev->initiator_state_stats_file =
		debugfs_create_file("target_state_stats", 0444, dev->dev_dir,
				    dev, &kfilnd_target_state_stats_file_ops);
	dev->initiator_state_stats_file =
		debugfs_create_file("target_stats", 0444, dev->dev_dir, dev,
				    &kfilnd_target_stats_file_ops);
	dev->initiator_state_stats_file =
		debugfs_create_file("reset_stats", 0444, dev->dev_dir, dev,
				    &kfilnd_reset_stats_file_ops);

	kfilnd_dev_reset_stats(dev);

	try_module_get(THIS_MODULE);

	return dev;

err_free_endpoints:
	for (i = 0; i < ni->ni_ncpts; i++)
		kfilnd_ep_free(dev->kfd_endpoints[i]);

	LIBCFS_FREE(dev->cpt_to_endpoint,
		    lnet_ncpts * sizeof(*dev->cpt_to_endpoint));
err_free_ep_array:
	LIBCFS_FREE(dev->kfd_endpoints,
		    ni->ni_ncpts * sizeof(*dev->kfd_endpoints));
err_free_sep:
	kfi_close(&dev->kfd_sep->fid);
err_free_av:
	kfi_close(&dev->kfd_av->fid);
err_put_dom:
	kfilnd_dom_put(dev->dom);
	if (dev_info)
		kfi_freeinfo(dev_info);
err_free_dev:
	LIBCFS_FREE(dev, sizeof(*dev));
err:
	return ERR_PTR(rc);
}

void kfilnd_dev_reset_stats(struct kfilnd_dev *dev)
{
	unsigned int data_size;
	enum tn_states state;
	struct kfilnd_tn_duration_stat *stat;

	for (data_size = 0; data_size < KFILND_DATA_SIZE_BUCKETS; data_size++) {
		stat = &dev->initiator_stats.data_size[data_size];
		atomic64_set(&stat->accumulated_duration, 0);
		atomic_set(&stat->accumulated_count, 0);
		atomic64_set(&stat->max_duration, 0);
		atomic64_set(&stat->min_duration, MIN_DURATION_RESET);

		stat = &dev->target_stats.data_size[data_size];
		atomic64_set(&stat->accumulated_duration, 0);
		atomic_set(&stat->accumulated_count, 0);
		atomic64_set(&stat->max_duration, 0);
		atomic64_set(&stat->min_duration, MIN_DURATION_RESET);

		for (state = 0; state < TN_STATE_MAX; state++) {
			stat = &dev->initiator_state_stats.state[state].data_size[data_size];
			atomic64_set(&stat->accumulated_duration, 0);
			atomic_set(&stat->accumulated_count, 0);
			atomic64_set(&stat->max_duration, 0);
			atomic64_set(&stat->min_duration, MIN_DURATION_RESET);

			stat = &dev->target_state_stats.state[state].data_size[data_size];
			atomic64_set(&stat->accumulated_duration, 0);
			atomic_set(&stat->accumulated_count, 0);
			atomic64_set(&stat->max_duration, 0);
			atomic64_set(&stat->min_duration, MIN_DURATION_RESET);
		}
	}
}

u32 kfilnd_dev_get_session_key(struct kfilnd_dev *dev)
{
	return (u32)atomic_add_return(1, &dev->session_keys);
}