summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKuniyuki Iwashima <kuniyu@amazon.co.jp>2021-06-12 21:32:14 +0900
committerDaniel Borkmann <daniel@iogearbox.net>2021-06-15 18:01:05 +0200
commitf9ac779f881c2ec3d1cdcd7fa9d4f9442bf60e80 (patch)
tree1350ccbd2a33d8643e102339f265c5ec8a9931c8
parentbbf29d3a2e49e482d5267311798aec42f00e88f3 (diff)
net: Introduce net.ipv4.tcp_migrate_req.
This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled or eBPF program is attached, we will be able to migrate child sockets from a listener to another in the same reuseport group after close() or shutdown() syscalls. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20210612123224.12525-2-kuniyu@amazon.co.jp
-rw-r--r--Documentation/networking/ip-sysctl.rst25
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
3 files changed, 35 insertions, 0 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index a5c250044500..b0436d3a4f11 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -761,6 +761,31 @@ tcp_syncookies - INTEGER
network connections you can set this knob to 2 to enable
unconditionally generation of syncookies.
+tcp_migrate_req - BOOLEAN
+ The incoming connection is tied to a specific listening socket when
+ the initial SYN packet is received during the three-way handshake.
+ When a listener is closed, in-flight request sockets during the
+ handshake and established sockets in the accept queue are aborted.
+
+ If the listener has SO_REUSEPORT enabled, other listeners on the
+ same port should have been able to accept such connections. This
+ option makes it possible to migrate such child sockets to another
+ listener after close() or shutdown().
+
+ The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
+ usually be used to define the policy to pick an alive listener.
+ Otherwise, the kernel will randomly pick an alive listener only if
+ this option is enabled.
+
+ Note that migration between listeners with different settings may
+ crash applications. Let's say migration happens from listener A to
+ B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
+ the requests migrated from A. To avoid such a situation, cancel
+ migration by returning SK_DROP in the type of eBPF program, or
+ disable this option.
+
+ Default: 0
+
tcp_fastopen - INTEGER
Enable TCP Fast Open (RFC7413) to send and accept data in the opening
SYN packet.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 746c80cd4257..b8620519eace 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -126,6 +126,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_syn_retries;
u8 sysctl_tcp_synack_retries;
u8 sysctl_tcp_syncookies;
+ u8 sysctl_tcp_migrate_req;
int sysctl_tcp_reordering;
u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4fa77f182dcb..6f1e64d49232 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -961,6 +961,15 @@ static struct ctl_table ipv4_net_table[] = {
},
#endif
{
+ .procname = "tcp_migrate_req",
+ .data = &init_net.ipv4.sysctl_tcp_migrate_req,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
+ {
.procname = "tcp_reordering",
.data = &init_net.ipv4.sysctl_tcp_reordering,
.maxlen = sizeof(int),