From 9410c9409d3e3a1ee6a02a830f9b6ab678c456d1 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:21 +0200 Subject: net: sched: Introduce ingress classification function TC multi chain configuration can cause offloaded tc chains to miss in hardware after jumping to some chain. In such cases the software should continue from the chain that missed in hardware, as the hardware may have manipulated the packet and updated some counters. Currently a single tcf classification function serves both ingress and egress. However, multi chain miss processing (get tc skb extension on hw miss, set tc skb extension on tc miss) should happen only on ingress. Refactor the code to use ingress classification function, and move setting the tc skb extension from general classification to it, as a prestep for supporting the hw miss scenario. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/pkt_cls.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 53946b509b51..109cbe3a0d51 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,6 +72,8 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -133,6 +135,14 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { return TC_ACT_UNSPEC; } + +static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} + #endif static inline unsigned long -- cgit v1.2.3 From 7d17c544cd304c15317e64ac77617bc774fb3f55 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:22 +0200 Subject: net: sched: Pass ingress block to tcf_classify_ingress On ingress and cls_act qdiscs init, save the block on ingress mini_Qdisc and and pass it on to ingress classification, so it can be used for the looking up a specified chain index. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/pkt_cls.h | 7 +++++-- include/net/sch_generic.h | 3 +++ net/core/dev.c | 4 ++-- net/sched/cls_api.c | 4 +++- net/sched/sch_generic.c | 8 ++++++++ net/sched/sch_ingress.c | 11 ++++++++++- 6 files changed, 31 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 109cbe3a0d51..75be5c065dba 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,8 +72,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); -int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, + bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -137,6 +139,7 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 151208704ed2..bcdf98d21094 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1269,6 +1269,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, */ struct mini_Qdisc { struct tcf_proto *filter_list; + struct tcf_block *block; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct rcu_head rcu; @@ -1295,6 +1296,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head); void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block); static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) { diff --git a/net/core/dev.c b/net/core/dev.c index 107af00e4932..4866e6198a29 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4860,8 +4860,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify_ingress(skb, miniq->filter_list, &cl_res, - false)) { + switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, + &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d52b43c56d5d..bbd8b5e7b74b 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1623,7 +1623,9 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } EXPORT_SYMBOL(tcf_classify); -int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6c9595f1048a..2efd5b61acef 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1391,6 +1391,14 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, } EXPORT_SYMBOL(mini_qdisc_pair_swap); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block) +{ + miniqp->miniq1.block = block; + miniqp->miniq2.block = block; +} +EXPORT_SYMBOL(mini_qdisc_pair_block_init); + void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq) { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index bf56aa519797..84838128b9c5 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -78,6 +78,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int err; net_inc_ingress_queue(); @@ -87,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; - return tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + if (err) + return err; + + mini_qdisc_pair_block_init(&q->miniqp, q->block); + + return 0; } static void ingress_destroy(struct Qdisc *sch) @@ -226,6 +233,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; + mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS; -- cgit v1.2.3 From 0f0d3827c0b4d6c3d219a73ea103077dc5bc17aa Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:26 +0200 Subject: net/mlx5: E-Switch, Move source port on reg_c0 to the upper 16 bits Multi chain support requires the miss path to continue the processing from the last chain id, and for that we need to save the chain miss tag (a mapping for 32bit chain id) on reg_c0 which will come in a next patch. Currently reg_c0 is exclusively used to store the source port metadata, giving it 32bit, it is created from 16bits of vcha_id, and 16bits of vport number. We will move this source port metadata to upper 16bits, and leave the lower bits for the chain miss tag. We compress the reg_c0 source port metadata to 16bits by taking 8 bits from vhca_id, and 8bits from the vport number. Since we compress the vport number to 8bits statically, and leave two top ids for special PF/ECPF numbers, we will only support a max of 254 vports with this strategy. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 3 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 64 ++++++++++++++++++---- include/linux/mlx5/eswitch.h | 29 +++++++++- 3 files changed, 83 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e874d688d040..230028cb292c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3570,7 +3570,8 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 979f13bdc203..788bb83b18e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -85,7 +85,8 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, attr->in_rep->vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); @@ -621,7 +622,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -851,8 +853,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -1134,7 +1137,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -1604,11 +1608,19 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_act flow_act = {}; int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - MLX5_SET(set_action_in, action, data, - mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); vport->ingress.offloads.modify_metadata = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, @@ -2470,9 +2482,41 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) } EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { - return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; + u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0); + u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); + u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + u32 val; + + /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ + WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); + + /* Trim vhca_id to ESW_VHCA_ID_BITS */ + vhca_id &= vhca_id_mask; + + /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they + * don't overlap with VF numbers, and themselves, after trimming. + */ + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) == + (MLX5_VPORT_ECPF & vport_num_mask)); + + /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't + * overlap with pf and ecpf. + */ + if (vport_num != MLX5_VPORT_UPLINK && + vport_num != MLX5_VPORT_ECPF) + WARN_ON_ONCE(vport_num >= vport_num_mask - 1); + + /* We can now trim vport_num to ESW_VPORT_BITS */ + vport_num &= vport_num_mask; + + val = (vhca_id << ESW_VPORT_BITS) | vport_num; + return val << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 98e667b176ef..dd1333f29f6e 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -71,7 +71,26 @@ enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + +/* Reg C0 usage: + * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) > + * + * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num, + * the rest (lowest 16 bits) is left for tc chain tag restoration. + * VHCA_ID + VPORT comprise the SOURCE_PORT matching. + */ +#define ESW_VHCA_ID_BITS 8 +#define ESW_VPORT_BITS 8 +#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) +#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) + +static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) +{ + return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -94,11 +113,17 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) }; static inline u32 -mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, int vport_num) { return 0; }; + +static inline u32 +mlx5_eswitch_get_vport_metadata_mask(void) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif -- cgit v1.2.3 From 11b717d6152699623fb1133759f9b8f235935a51 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:27 +0200 Subject: net/mlx5: E-Switch, Get reg_c0 value on CQE On RX side create a restore table in OFFLOADS namespace. This table will match on all values for reg_c0 we will use, and set it to the flow_tag. This flow tag can then be read on the CQE. As there is no copy action from reg c0 to flow tag, instead we have to set the flow tag explictily. We add an API so callers can add all the used reg_c0 values (tags) and for each of those we add a restore rule. This will be used in a following patch to save the miss chain mapping tag on reg_c0 and from it restore the tc chain on the skb. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 14 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 147 +++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 +- include/linux/mlx5/eswitch.h | 2 + 4 files changed, 156 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4472710ccc9c..a94d91cdc758 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -189,6 +189,9 @@ struct mlx5_eswitch_fdb { }; struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; @@ -623,6 +626,11 @@ void esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -638,6 +646,12 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} +static struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 788bb83b18e5..81c2cbf0c308 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -838,6 +838,54 @@ out: return err; } +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw) +{ + return ESW_CHAIN_TAG_METADATA_MASK; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -1060,6 +1108,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) } ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; + ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1164,6 +1213,81 @@ out: return flow_rule; } +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + + return 0; + +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; +} + static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1923,13 +2047,17 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) if (err) return err; - err = esw_create_offloads_fdb_tables(esw, total_vports); + err = esw_create_offloads_table(esw, total_vports); if (err) - goto create_fdb_err; + goto create_offloads_err; - err = esw_create_offloads_table(esw, total_vports); + err = esw_create_restore_table(esw); if (err) - goto create_ft_err; + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; err = esw_create_vport_rx_group(esw, total_vports); if (err) @@ -1938,12 +2066,12 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) return 0; create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: esw_destroy_offloads_fdb_tables(esw); - create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); return err; @@ -1952,8 +2080,9 @@ create_fdb_err: static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9dc24241dc91..2660ffabb09f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -111,8 +111,8 @@ #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) #define OFFLOADS_MAX_FT 1 -#define OFFLOADS_NUM_PRIOS 1 -#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) #define LAG_PRIO_NUM_LEVELS 1 #define LAG_NUM_PRIOS 1 diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index dd1333f29f6e..61705e74a5bb 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -84,6 +84,8 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); #define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) #define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ + 0) static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) { -- cgit v1.2.3