summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-02-09 11:43:57 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2017-02-09 11:43:57 +1100
commit4bb191583af2033e12802a232370c8bc78cecf23 (patch)
treeebae3aaac8bea6f05fbdf804f9f73858073ac1e9 /net
parentb0621d10b01934e9a8ee4033ba8037d32b14fa89 (diff)
parent935b7f643018878bd9d4193eea8b575aff736b9b (diff)
Merge remote-tracking branch 'netfilter-next/master'
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/Kconfig10
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/nf_tables_api.c54
-rw-r--r--net/netfilter/nft_ct.c195
-rw-r--r--net/netfilter/nft_exthdr.c139
-rw-r--r--net/netfilter/nft_set_bitmap.c314
-rw-r--r--net/netfilter/nft_set_hash.c14
-rw-r--r--net/netfilter/nft_set_rbtree.c14
8 files changed, 675 insertions, 66 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index dfbe9deeb8c4..9b28864cc36a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -467,10 +467,10 @@ config NF_TABLES_NETDEV
This option enables support for the "netdev" table.
config NFT_EXTHDR
- tristate "Netfilter nf_tables IPv6 exthdr module"
+ tristate "Netfilter nf_tables exthdr module"
help
This option adds the "exthdr" expression that you can use to match
- IPv6 extension headers.
+ IPv6 extension headers and tcp options.
config NFT_META
tristate "Netfilter nf_tables meta module"
@@ -509,6 +509,12 @@ config NFT_SET_HASH
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
+config NFT_SET_BITMAP
+ tristate "Netfilter nf_tables bitmap set module"
+ help
+ This option adds the "bitmap" set type that is used to build sets
+ whose keys are smaller or equal to 16 bits.
+
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6b3034f12661..c9b78e7b342f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
+obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 57eeae63f597..cb6ae46f6c48 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2401,9 +2401,10 @@ nft_select_set_ops(const struct nlattr * const nla[],
features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
- bops = NULL;
- best.size = ~0;
- best.class = ~0;
+ bops = NULL;
+ best.size = ~0;
+ best.lookup = ~0;
+ best.space = ~0;
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
@@ -2413,16 +2414,27 @@ nft_select_set_ops(const struct nlattr * const nla[],
switch (policy) {
case NFT_SET_POL_PERFORMANCE:
- if (est.class < best.class)
- break;
- if (est.class == best.class && est.size < best.size)
+ if (est.lookup < best.lookup)
break;
+ if (est.lookup == best.lookup) {
+ if (!desc->size) {
+ if (est.space < best.space)
+ break;
+ } else if (est.size < best.size) {
+ break;
+ }
+ }
continue;
case NFT_SET_POL_MEMORY:
- if (est.size < best.size)
- break;
- if (est.size == best.size && est.class < best.class)
+ if (!desc->size) {
+ if (est.space < best.space)
+ break;
+ if (est.space == best.space &&
+ est.lookup < best.lookup)
+ break;
+ } else if (est.size < best.size) {
break;
+ }
continue;
default:
break;
@@ -3121,6 +3133,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_bind_check_setelem;
+ iter.flush = false;
set->ops->walk(ctx, set, &iter);
if (iter.err < 0)
@@ -3374,6 +3387,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.iter.count = 0;
args.iter.err = 0;
args.iter.fn = nf_tables_dump_setelem;
+ args.iter.flush = false;
set->ops->walk(&ctx, set, &args.iter);
nla_nest_end(skb, nest);
@@ -3752,7 +3766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return 0;
err6:
- set->ops->remove(set, &elem);
+ set->ops->remove(ctx->net, set, &elem);
err5:
kfree(trans);
err4:
@@ -3898,7 +3912,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
if (!trans)
return -ENOMEM;
- if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
+ if (!set->ops->flush(ctx->net, set, elem->priv)) {
err = -ENOENT;
goto err1;
}
@@ -3936,15 +3950,14 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
- struct nft_set_dump_args args = {
- .iter = {
- .genmask = genmask,
- .fn = nft_flush_set,
- },
+ struct nft_set_iter iter = {
+ .genmask = genmask,
+ .fn = nft_flush_set,
+ .flush = true,
};
- set->ops->walk(&ctx, set, &args.iter);
+ set->ops->walk(&ctx, set, &iter);
- return args.iter.err;
+ return iter.err;
}
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
@@ -4804,7 +4817,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_DELSETELEM, 0);
- te->set->ops->remove(te->set, &te->elem);
+ te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
te->set->ndeact--;
break;
@@ -4925,7 +4938,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->remove(te->set, &te->elem);
+ te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
@@ -5091,6 +5104,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_loop_check_setelem;
+ iter.flush = false;
set->ops->walk(ctx, set, &iter);
if (iter.err < 0)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 66a2377510e1..c6b8022c0e47 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,11 @@ struct nft_ct {
};
};
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
+static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+#endif
+
static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
enum nft_ct_keys k,
enum ip_conntrack_dir d)
@@ -151,6 +156,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
case NFT_CT_PROTOCOL:
*dest = nf_ct_protonum(ct);
return;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE: {
+ const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+ if (priv->dir < IP_CT_DIR_MAX)
+ *dest = nf_ct_zone_id(zone, priv->dir);
+ else
+ *dest = zone->id;
+
+ return;
+ }
+#endif
default:
break;
}
@@ -179,6 +196,53 @@ err:
regs->verdict.code = NFT_BREAK;
}
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ enum ip_conntrack_info ctinfo;
+ u16 value = regs->data[priv->sreg];
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) /* already tracked */
+ return;
+
+ zone.id = value;
+
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ zone.dir = NF_CT_ZONE_DIR_ORIG;
+ break;
+ case IP_CT_DIR_REPLY:
+ zone.dir = NF_CT_ZONE_DIR_REPL;
+ break;
+ default:
+ break;
+ }
+
+ ct = this_cpu_read(nft_ct_pcpu_template);
+
+ if (likely(atomic_read(&ct->ct_general.use) == 1)) {
+ nf_ct_zone_add(ct, &zone);
+ } else {
+ /* previous skb got queued to userspace */
+ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
+ if (!ct) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+ }
+
+ atomic_inc(&ct->ct_general.use);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+}
+#endif
+
static void nft_ct_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -257,6 +321,45 @@ static void nft_ct_netns_put(struct net *net, uint8_t family)
nf_ct_netns_put(net, family);
}
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_tmpl_put_pcpu(void)
+{
+ struct nf_conn *ct;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ct = per_cpu(nft_ct_pcpu_template, cpu);
+ if (!ct)
+ break;
+ nf_ct_put(ct);
+ per_cpu(nft_ct_pcpu_template, cpu) = NULL;
+ }
+}
+
+static bool nft_ct_tmpl_alloc_pcpu(void)
+{
+ struct nf_conntrack_zone zone = { .id = 0 };
+ struct nf_conn *tmp;
+ int cpu;
+
+ if (nft_ct_pcpu_template_refcnt)
+ return true;
+
+ for_each_possible_cpu(cpu) {
+ tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
+ if (!tmp) {
+ nft_ct_tmpl_put_pcpu();
+ return false;
+ }
+
+ atomic_set(&tmp->ct_general.use, 1);
+ per_cpu(nft_ct_pcpu_template, cpu) = tmp;
+ }
+
+ return true;
+}
+#endif
+
static int nft_ct_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -266,6 +369,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+ priv->dir = IP_CT_DIR_MAX;
switch (priv->key) {
case NFT_CT_DIRECTION:
if (tb[NFTA_CT_DIRECTION] != NULL)
@@ -333,11 +437,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
case NFT_CT_BYTES:
case NFT_CT_PKTS:
case NFT_CT_AVGPKT:
- /* no direction? return sum of original + reply */
- if (tb[NFTA_CT_DIRECTION] == NULL)
- priv->dir = IP_CT_DIR_MAX;
len = sizeof(u64);
break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ len = sizeof(u16);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -371,15 +477,33 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
return 0;
}
+static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
+{
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+ nf_connlabels_put(ctx->net);
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ if (--nft_ct_pcpu_template_refcnt == 0)
+ nft_ct_tmpl_put_pcpu();
+#endif
+ default:
+ break;
+ }
+}
+
static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
- bool label_got = false;
unsigned int len;
int err;
+ priv->dir = IP_CT_DIR_MAX;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -397,13 +521,30 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
if (err)
return err;
- label_got = true;
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ if (!nft_ct_tmpl_alloc_pcpu())
+ return -ENOMEM;
+ nft_ct_pcpu_template_refcnt++;
break;
#endif
default:
return -EOPNOTSUPP;
}
+ if (tb[NFTA_CT_DIRECTION]) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
@@ -416,8 +557,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
return 0;
err1:
- if (label_got)
- nf_connlabels_put(ctx->net);
+ __nft_ct_set_destroy(ctx, priv);
return err;
}
@@ -432,16 +572,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
{
struct nft_ct *priv = nft_expr_priv(expr);
- switch (priv->key) {
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- case NFT_CT_LABELS:
- nf_connlabels_put(ctx->net);
- break;
-#endif
- default:
- break;
- }
-
+ __nft_ct_set_destroy(ctx, priv);
nft_ct_netns_put(ctx->net, ctx->afi->family);
}
@@ -465,6 +596,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
case NFT_CT_BYTES:
case NFT_CT_PKTS:
case NFT_CT_AVGPKT:
+ case NFT_CT_ZONE:
if (priv->dir < IP_CT_DIR_MAX &&
nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
goto nla_put_failure;
@@ -487,6 +619,17 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
+
+ switch (priv->key) {
+ case NFT_CT_ZONE:
+ if (priv->dir < IP_CT_DIR_MAX &&
+ nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ break;
+ default:
+ break;
+ }
+
return 0;
nla_put_failure:
@@ -512,6 +655,17 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.dump = nft_ct_set_dump,
};
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static const struct nft_expr_ops nft_ct_set_zone_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_set_zone_eval,
+ .init = nft_ct_set_init,
+ .destroy = nft_ct_set_destroy,
+ .dump = nft_ct_set_dump,
+};
+#endif
+
static const struct nft_expr_ops *
nft_ct_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -525,8 +679,13 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DREG])
return &nft_ct_get_ops;
- if (tb[NFTA_CT_SREG])
+ if (tb[NFTA_CT_SREG]) {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
+ return &nft_ct_set_zone_ops;
+#endif
return &nft_ct_set_ops;
+ }
return ERR_PTR(-EINVAL);
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 47beb3abcc9d..c308920b194c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -15,19 +15,29 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-// FIXME:
-#include <net/ipv6.h>
+#include <net/tcp.h>
struct nft_exthdr {
u8 type;
u8 offset;
u8 len;
+ u8 op;
enum nft_registers dreg:8;
+ u8 flags;
};
-static void nft_exthdr_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static unsigned int optlen(const u8 *opt, unsigned int offset)
+{
+ /* Beware zero-length options: make finite progress */
+ if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
+ return 1;
+ else
+ return opt[offset + 1];
+}
+
+static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_exthdr *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
@@ -35,8 +45,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
int err;
err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
- if (err < 0)
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = (err >= 0);
+ return;
+ } else if (err < 0) {
goto err;
+ }
offset += priv->offset;
dest[priv->len / NFT_REG32_SIZE] = 0;
@@ -47,11 +61,59 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, optl, tcphdr_len, offset;
+ u32 *dest = &regs->data[priv->dreg];
+ struct tcphdr *tcph;
+ u8 *opt;
+
+ if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ goto err;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
+ if (!tcph)
+ goto err;
+
+ tcphdr_len = __tcp_hdrlen(tcph);
+ if (tcphdr_len < sizeof(*tcph))
+ goto err;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+ if (!tcph)
+ goto err;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ optl = optlen(opt, i);
+
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+ goto err;
+
+ offset = i + priv->offset;
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+
+ return;
+ }
+
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
[NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
[NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
};
static int nft_exthdr_init(const struct nft_ctx *ctx,
@@ -59,13 +121,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- u32 offset, len;
+ u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
int err;
- if (tb[NFTA_EXTHDR_DREG] == NULL ||
- tb[NFTA_EXTHDR_TYPE] == NULL ||
- tb[NFTA_EXTHDR_OFFSET] == NULL ||
- tb[NFTA_EXTHDR_LEN] == NULL)
+ if (!tb[NFTA_EXTHDR_DREG] ||
+ !tb[NFTA_EXTHDR_TYPE] ||
+ !tb[NFTA_EXTHDR_OFFSET] ||
+ !tb[NFTA_EXTHDR_LEN])
return -EINVAL;
err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
@@ -76,10 +138,27 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (tb[NFTA_EXTHDR_FLAGS]) {
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags);
+ if (err < 0)
+ return err;
+
+ if (flags & ~NFT_EXTHDR_F_PRESENT)
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_EXTHDR_OP]) {
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+ if (err < 0)
+ return err;
+ }
+
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
+ priv->flags = flags;
+ priv->op = op;
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, priv->len);
@@ -97,6 +176,10 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -104,17 +187,45 @@ nla_put_failure:
}
static struct nft_expr_type nft_exthdr_type;
-static const struct nft_expr_ops nft_exthdr_ops = {
+static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
- .eval = nft_exthdr_eval,
+ .eval = nft_exthdr_ipv6_eval,
.init = nft_exthdr_init,
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_tcp_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_eval,
+ .init = nft_exthdr_init,
+ .dump = nft_exthdr_dump,
+};
+
+static const struct nft_expr_ops *
+nft_exthdr_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ u32 op;
+
+ if (!tb[NFTA_EXTHDR_OP])
+ return &nft_exthdr_ipv6_ops;
+
+ op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+ switch (op) {
+ case NFT_EXTHDR_OP_TCPOPT:
+ return &nft_exthdr_tcp_ops;
+ case NFT_EXTHDR_OP_IPV6:
+ return &nft_exthdr_ipv6_ops;
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_exthdr_type __read_mostly = {
.name = "exthdr",
- .ops = &nft_exthdr_ops,
+ .select_ops = &nft_exthdr_select_ops,
.policy = nft_exthdr_policy,
.maxattr = NFTA_EXTHDR_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
new file mode 100644
index 000000000000..97f9649bcc7e
--- /dev/null
+++ b/net/netfilter/nft_set_bitmap.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* This bitmap uses two bits to represent one element. These two bits determine
+ * the element state in the current and the future generation.
+ *
+ * An element can be in three states. The generation cursor is represented using
+ * the ^ character, note that this cursor shifts on every succesful transaction.
+ * If no transaction is going on, we observe all elements are in the following
+ * state:
+ *
+ * 11 = this element is active in the current generation. In case of no updates,
+ * ^ it stays active in the next generation.
+ * 00 = this element is inactive in the current generation. In case of no
+ * ^ updates, it stays inactive in the next generation.
+ *
+ * On transaction handling, we observe these two temporary states:
+ *
+ * 01 = this element is inactive in the current generation and it becomes active
+ * ^ in the next one. This happens when the element is inserted but commit
+ * path has not yet been executed yet, so activation is still pending. On
+ * transaction abortion, the element is removed.
+ * 10 = this element is active in the current generation and it becomes inactive
+ * ^ in the next one. This happens when the element is deactivated but commit
+ * path has not yet been executed yet, so removal is still pending. On
+ * transation abortion, the next generation bit is reset to go back to
+ * restore its previous state.
+ */
+struct nft_bitmap {
+ u16 bitmap_size;
+ u8 bitmap[];
+};
+
+static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+{
+ u32 k = (key << 1);
+
+ *idx = k / BITS_PER_BYTE;
+ *off = k % BITS_PER_BYTE;
+}
+
+/* Fetch the two bits that represent the element and check if it is active based
+ * on the generation mask.
+ */
+static inline bool
+nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
+{
+ return (bitmap[idx] & (0x3 << off)) & (genmask << off);
+}
+
+static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ u32 idx, off;
+
+ nft_bitmap_location(*key, &idx, &off);
+
+ return nft_bitmap_active(priv->bitmap, idx, off, genmask);
+}
+
+static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **_ext)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return -EEXIST;
+
+ /* Enter 01 state. */
+ priv->bitmap[idx] |= (genmask << off);
+
+ return 0;
+}
+
+static void nft_bitmap_remove(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 00 state. */
+ priv->bitmap[idx] &= ~(genmask << off);
+}
+
+static void nft_bitmap_activate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 11 state. */
+ priv->bitmap[idx] |= (genmask << off);
+}
+
+static bool nft_bitmap_flush(const struct net *net,
+ const struct nft_set *set, void *ext)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 10 state, similar to deactivation. */
+ priv->bitmap[idx] &= ~(genmask << off);
+
+ return true;
+}
+
+static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_ext *ext;
+
+ nft_set_ext_prepare(&tmpl);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+
+ ext = kzalloc(tmpl.len, GFP_KERNEL);
+ if (!ext)
+ return NULL;
+
+ nft_set_ext_init(ext, &tmpl);
+ memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
+
+ return ext;
+}
+
+static void *nft_bitmap_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ struct nft_set_ext *ext;
+ u32 idx, off, key = 0;
+
+ memcpy(&key, elem->key.val.data, set->klen);
+ nft_bitmap_location(key, &idx, &off);
+
+ if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return NULL;
+
+ /* We have no real set extension since this is a bitmap, allocate this
+ * dummy object that is released from the commit/abort path.
+ */
+ ext = nft_bitmap_ext_alloc(set, elem);
+ if (!ext)
+ return NULL;
+
+ /* Enter 10 state. */
+ priv->bitmap[idx] &= ~(genmask << off);
+
+ return ext;
+}
+
+static void nft_bitmap_walk(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+ int idx, off;
+ u16 key;
+
+ nft_set_ext_prepare(&tmpl);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+
+ for (idx = 0; idx < priv->bitmap_size; idx++) {
+ for (off = 0; off < BITS_PER_BYTE; off += 2) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ if (!nft_bitmap_active(priv->bitmap, idx, off,
+ iter->genmask))
+ goto cont;
+
+ ext = kzalloc(tmpl.len, GFP_KERNEL);
+ if (!ext) {
+ iter->err = -ENOMEM;
+ return;
+ }
+ nft_set_ext_init(ext, &tmpl);
+ key = ((idx * BITS_PER_BYTE) + off) >> 1;
+ memcpy(nft_set_ext_key(ext), &key, set->klen);
+
+ elem.priv = ext;
+ iter->err = iter->fn(ctx, set, iter, &elem);
+
+ /* On set flush, this dummy extension object is released
+ * from the commit/abort path.
+ */
+ if (!iter->flush)
+ kfree(ext);
+
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+ }
+}
+
+/* The bitmap size is pow(2, key length in bits) / bits per byte. This is
+ * multiplied by two since each element takes two bits. For 8 bit keys, the
+ * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes.
+ */
+static inline u32 nft_bitmap_size(u32 klen)
+{
+ return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
+}
+
+static inline u32 nft_bitmap_total_size(u32 klen)
+{
+ return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
+}
+
+static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[])
+{
+ u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+
+ return nft_bitmap_total_size(klen);
+}
+
+static int nft_bitmap_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const nla[])
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+
+ priv->bitmap_size = nft_bitmap_total_size(set->klen);
+
+ return 0;
+}
+
+static void nft_bitmap_destroy(const struct nft_set *set)
+{
+}
+
+static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */
+ if (desc->klen > 2)
+ return false;
+
+ est->size = nft_bitmap_total_size(desc->klen);
+ est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
+static struct nft_set_ops nft_bitmap_ops __read_mostly = {
+ .privsize = nft_bitmap_privsize,
+ .estimate = nft_bitmap_estimate,
+ .init = nft_bitmap_init,
+ .destroy = nft_bitmap_destroy,
+ .insert = nft_bitmap_insert,
+ .remove = nft_bitmap_remove,
+ .deactivate = nft_bitmap_deactivate,
+ .flush = nft_bitmap_flush,
+ .activate = nft_bitmap_activate,
+ .lookup = nft_bitmap_lookup,
+ .walk = nft_bitmap_walk,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_bitmap_module_init(void)
+{
+ return nft_register_set(&nft_bitmap_ops);
+}
+
+static void __exit nft_bitmap_module_exit(void)
+{
+ nft_unregister_set(&nft_bitmap_ops);
+}
+
+module_init(nft_bitmap_module_init);
+module_exit(nft_bitmap_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index e36069fb76ae..6938bc890f31 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -167,8 +167,8 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
nft_set_elem_clear_busy(&he->ext);
}
-static bool nft_hash_deactivate_one(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_hash_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
struct nft_hash_elem *he = priv;
@@ -195,7 +195,7 @@ static void *nft_hash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
if (he != NULL &&
- !nft_hash_deactivate_one(net, set, he))
+ !nft_hash_flush(net, set, he))
he = NULL;
rcu_read_unlock();
@@ -203,7 +203,8 @@ static void *nft_hash_deactivate(const struct net *net,
return he;
}
-static void nft_hash_remove(const struct nft_set *set,
+static void nft_hash_remove(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
@@ -383,7 +384,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
est->size = esize + 2 * sizeof(struct nft_hash_elem *);
}
- est->class = NFT_SET_CLASS_O_1;
+ est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_N;
return true;
}
@@ -397,7 +399,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
- .deactivate_one = nft_hash_deactivate_one,
+ .flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.update = nft_hash_update,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index f06f55ee516d..3387ed7dd231 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -151,7 +151,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return err;
}
-static void nft_rbtree_remove(const struct nft_set *set,
+static void nft_rbtree_remove(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree *priv = nft_set_priv(set);
@@ -171,8 +172,8 @@ static void nft_rbtree_activate(const struct net *net,
nft_set_elem_change_active(net, set, &rbe->ext);
}
-static bool nft_rbtree_deactivate_one(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_rbtree_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
struct nft_rbtree_elem *rbe = priv;
@@ -213,7 +214,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
continue;
}
- nft_rbtree_deactivate_one(net, set, rbe);
+ nft_rbtree_flush(net, set, rbe);
return rbe;
}
}
@@ -290,7 +291,8 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
else
est->size = nsize;
- est->class = NFT_SET_CLASS_O_LOG_N;
+ est->lookup = NFT_SET_CLASS_O_LOG_N;
+ est->space = NFT_SET_CLASS_O_N;
return true;
}
@@ -304,7 +306,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
- .deactivate_one = nft_rbtree_deactivate_one,
+ .flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,