Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

net/smc: Introduce smc_ops #8312

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/net/netns/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ struct netns_smc {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *smc_hdr;
#endif
#if IS_ENABLED(CONFIG_SMC_OPS)
struct smc_ops __rcu *ops;
#endif /* CONFIG_SMC_OPS */
unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time;
Expand Down
51 changes: 51 additions & 0 deletions include/net/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "linux/ism.h"

struct sock;
struct tcp_sock;
struct inet_request_sock;

#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */

Expand Down Expand Up @@ -97,4 +99,53 @@ struct smcd_dev {
u8 going_away : 1;
};

#define SMC_OPS_NAME_MAX 16

enum {
/* ops can be inherit from init_net */
SMC_OPS_FLAG_INHERITABLE = 0x1,

SMC_OPS_ALL_FLAGS = SMC_OPS_FLAG_INHERITABLE,
};

struct smc_ops {
/* priavte */

struct list_head list;
struct module *owner;

/* public */

/* unique name */
char name[SMC_OPS_NAME_MAX];
int flags;

/* Invoked before computing SMC option for SYN packets.
* We can control whether to set SMC options by returning varios value.
* Return 0 to disable SMC, or return any other value to enable it.
*/
int (*set_option)(struct tcp_sock *tp);

/* Invoked before Set up SMC options for SYN-ACK packets
* We can control whether to respond SMC options by returning varios value.
* Return 0 to disable SMC, or return any other value to enable it.
*/
int (*set_option_cond)(const struct tcp_sock *tp, struct inet_request_sock *ireq);
};

#if IS_ENABLED(CONFIG_SMC_OPS)
#define smc_call_retops(init_val, sk, func, ...) ({ \
typeof(init_val) __ret = (init_val); \
struct smc_ops *ops; \
rcu_read_lock(); \
ops = READ_ONCE(sock_net(sk)->smc.ops); \
if (ops && ops->func) \
__ret = ops->func(__VA_ARGS__); \
rcu_read_unlock(); \
!!__ret; \
})
#else
#define smc_call_retops(init_val, ...) (init_val)
#endif /* CONFIG_SMC_OPS */

#endif /* _SMC_H */
2 changes: 2 additions & 0 deletions kernel/bpf/bpf_struct_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ bool bpf_struct_ops_get(const void *kdata)
map = __bpf_map_inc_not_zero(&st_map->map, false);
return !IS_ERR(map);
}
EXPORT_SYMBOL_GPL(bpf_struct_ops_get);

void bpf_struct_ops_put(const void *kdata)
{
Expand All @@ -1151,6 +1152,7 @@ void bpf_struct_ops_put(const void *kdata)

bpf_map_put(&st_map->map);
}
EXPORT_SYMBOL_GPL(bpf_struct_ops_put);

int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
{
Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)

return src - orig_src;
}
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);

int map_check_no_btf(const struct bpf_map *map,
const struct btf *btf,
Expand Down
15 changes: 11 additions & 4 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include <linux/skbuff_ref.h>

#include <trace/events/tcp.h>
#include <net/smc.h>

/* Refresh clocks of a TCP socket,
* ensuring monotically increasing values.
Expand Down Expand Up @@ -759,14 +760,17 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
mptcp_options_write(th, ptr, tp, opts);
}

static void smc_set_option(const struct tcp_sock *tp,
static void smc_set_option(struct tcp_sock *tp,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc) {
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
tp->syn_smc = smc_call_retops(1, &tp->inet_conn.icsk_inet.sk,
set_option, tp);
/* re-check syn_smc */
if (tp->syn_smc && *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
Expand All @@ -776,14 +780,17 @@ static void smc_set_option(const struct tcp_sock *tp,
}

static void smc_set_option_cond(const struct tcp_sock *tp,
const struct inet_request_sock *ireq,
struct inet_request_sock *ireq,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc && ireq->smc_ok) {
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
ireq->smc_ok = smc_call_retops(1, &tp->inet_conn.icsk_inet.sk,
set_option_cond, tp, ireq);
/* re-check smc_ok */
if (ireq->smc_ok && *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
Expand Down
12 changes: 12 additions & 0 deletions net/smc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,15 @@ config SMC_LO
of architecture or hardware.

if unsure, say N.

config SMC_OPS
bool "Generic hook for SMC subsystem"
depends on SMC && BPF_SYSCALL
default n
help
SMC_OPS enables support to register genericfor hook via eBPF programs
for SMC subsystem. eBPF programs offer much greater flexibility
in modifying the behavior of the SMC protocol stack compared
to a complete kernel-based approach.

if unsure, say N.
1 change: 1 addition & 0 deletions net/smc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_sta
smc-y += smc_tracepoint.o smc_inet.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
smc-$(CONFIG_SMC_LO) += smc_loopback.o
smc-$(CONFIG_SMC_OPS) += smc_ops.o
10 changes: 10 additions & 0 deletions net/smc/af_smc.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "smc_sysctl.h"
#include "smc_loopback.h"
#include "smc_inet.h"
#include "smc_ops.h"

static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
Expand Down Expand Up @@ -3576,8 +3577,17 @@ static int __init smc_init(void)
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
goto out_ulp;
}

rc = smc_bpf_struct_ops_init();
if (rc) {
pr_err("%s: smc_bpf_struct_ops_init fails with %d\n", __func__, rc);
goto out_inet;
}

static_branch_enable(&tcp_have_smc);
return 0;
out_inet:
smc_inet_exit();
out_ulp:
tcp_unregister_ulp(&smc_ulp_ops);
out_lo:
Expand Down
130 changes: 130 additions & 0 deletions net/smc/smc_ops.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Generic hook for SMC subsystem.
*
* Copyright IBM Corp. 2016
* Copyright (c) 2024, Alibaba Inc.
*
* Author: D. Wythe <[email protected]>
*/

#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>

#include "smc_ops.h"

static DEFINE_SPINLOCK(smc_ops_list_lock);
static LIST_HEAD(smc_ops_list);

static int smc_ops_reg(struct smc_ops *ops)
{
int ret = 0;

spin_lock(&smc_ops_list_lock);
/* already exist or duplicate name */
if (smc_ops_find_by_name(ops->name))
ret = -EEXIST;
else
list_add_tail_rcu(&ops->list, &smc_ops_list);
spin_unlock(&smc_ops_list_lock);
return ret;
}

static void smc_ops_unreg(struct smc_ops *ops)
{
spin_lock(&smc_ops_list_lock);
list_del_rcu(&ops->list);
spin_unlock(&smc_ops_list_lock);

/* Ensure that all readers to complete */
synchronize_rcu();
}

struct smc_ops *smc_ops_find_by_name(const char *name)
{
struct smc_ops *ops;

list_for_each_entry_rcu(ops, &smc_ops_list, list) {
if (strcmp(ops->name, name) == 0)
return ops;
}
return NULL;
}

static int __bpf_smc_stub_set_tcp_option(struct tcp_sock *tp) { return 1; }
static int __bpf_smc_stub_set_tcp_option_cond(const struct tcp_sock *tp,
struct inet_request_sock *ireq)
{
return 1;
}

static struct smc_ops __bpf_smc_bpf_ops = {
.set_option = __bpf_smc_stub_set_tcp_option,
.set_option_cond = __bpf_smc_stub_set_tcp_option_cond,
};

static int smc_bpf_ops_init(struct btf *btf) { return 0; }

static int smc_bpf_ops_reg(void *kdata, struct bpf_link *link)
{
return smc_ops_reg(kdata);
}

static void smc_bpf_ops_unreg(void *kdata, struct bpf_link *link)
{
smc_ops_unreg(kdata);
}

static int smc_bpf_ops_init_member(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata)
{
const struct smc_ops *u_ops;
struct smc_ops *k_ops;
u32 moff;

u_ops = (const struct smc_ops *)udata;
k_ops = (struct smc_ops *)kdata;

moff = __btf_member_bit_offset(t, member) / 8;
switch (moff) {
case offsetof(struct smc_ops, name):
if (bpf_obj_name_cpy(k_ops->name, u_ops->name,
sizeof(u_ops->name)) <= 0)
return -EINVAL;
return 1;
case offsetof(struct smc_ops, flags):
if (u_ops->flags & ~SMC_OPS_ALL_FLAGS)
return -EINVAL;
k_ops->flags = u_ops->flags;
return 1;
default:
break;
}

return 0;
}

static const struct bpf_verifier_ops smc_bpf_verifier_ops = {
.get_func_proto = bpf_base_func_proto,
.is_valid_access = bpf_tracing_btf_ctx_access,
};

static struct bpf_struct_ops bpf_smc_bpf_ops = {
.name = "smc_ops",
.init = smc_bpf_ops_init,
.reg = smc_bpf_ops_reg,
.unreg = smc_bpf_ops_unreg,
.cfi_stubs = &__bpf_smc_bpf_ops,
.verifier_ops = &smc_bpf_verifier_ops,
.init_member = smc_bpf_ops_init_member,
.owner = THIS_MODULE,
};

int smc_bpf_struct_ops_init(void)
{
return register_bpf_struct_ops(&bpf_smc_bpf_ops, smc_ops);
}
30 changes: 30 additions & 0 deletions net/smc/smc_ops.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Generic hook for SMC subsystem.
*
* Copyright IBM Corp. 2016
* Copyright (c) 2024, Alibaba Inc.
*
* Author: D. Wythe <[email protected]>
*/

#ifndef __SMC_OPS
#define __SMC_OPS

#include <net/smc.h>

/* Find ops by the target name, which required to be a c-string.
* Return NULL if no such ops was found,otherwise, return a valid ops.
*
* Note: Caller MUST ensure it's was invoked under rcu_read_lock.
*/
struct smc_ops *smc_ops_find_by_name(const char *name);
#if IS_ENABLED(CONFIG_SMC_OPS)
int smc_bpf_struct_ops_init(void);
#else
static inline int smc_bpf_struct_ops_init(void) { return 0; }
#endif

#endif /* __SMC_OPS */
Loading
Loading