[][MAC80211][misc][Refactor wed/flowblock/mtk_ppe patches]
[Description]
Refactor wed/flowblock/mtk_ppe patches
[Release-log]
Change-Id: I4353142dea601460dcae92b8b70e71dd79cc38fb
Reviewed-on: https://gerrit.mediatek.inc/c/openwrt/feeds/mtk_openwrt_feeds/+/8021862
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/999-3001-mt7622-backport-nf-hw-offload-framework-and-upstream.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/999-3001-mt7622-backport-nf-hw-offload-framework-and-upstream.patch
new file mode 100644
index 0000000..ec6efb9
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/999-3001-mt7622-backport-nf-hw-offload-framework-and-upstream.patch
@@ -0,0 +1,6932 @@
+From b80c745d2b90b30558e4f5b12060af956ae8e76d Mon Sep 17 00:00:00 2001
+From: Bo Jiao <Bo.Jiao@mediatek.com>
+Date: Mon, 18 Sep 2023 10:52:27 +0800
+Subject: [PATCH 02/22] mt7622 backport nf hw offload framework and upstream
+ hnat plus xt-FLOWOFFLOAD update v2
+
+---
+ drivers/net/ethernet/mediatek/Makefile | 3 +-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 25 +-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 19 +-
+ drivers/net/ethernet/mediatek/mtk_ppe.c | 510 +++++++
+ drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
+ .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
+ .../net/ethernet/mediatek/mtk_ppe_offload.c | 535 ++++++++
+ drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
+ drivers/net/ppp/ppp_generic.c | 22 +
+ drivers/net/ppp/pppoe.c | 24 +
+ include/linux/netdevice.h | 60 +
+ include/linux/ppp_channel.h | 3 +
+ include/net/dsa.h | 10 +
+ include/net/flow_offload.h | 4 +
+ include/net/ip6_route.h | 5 +-
+ .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
+ include/net/netfilter/nf_conntrack.h | 12 +
+ include/net/netfilter/nf_conntrack_acct.h | 11 +
+ include/net/netfilter/nf_flow_table.h | 266 +++-
+ include/net/netns/conntrack.h | 6 +
+ .../linux/netfilter/nf_conntrack_common.h | 9 +-
+ include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
+ net/8021q/vlan_dev.c | 21 +
+ net/bridge/br_device.c | 49 +
+ net/bridge/br_private.h | 20 +
+ net/bridge/br_vlan.c | 55 +
+ net/core/dev.c | 46 +
+ net/dsa/dsa.c | 9 +
+ net/dsa/slave.c | 37 +-
+ net/ipv4/netfilter/Kconfig | 4 +-
+ net/ipv6/ip6_output.c | 2 +-
+ net/ipv6/netfilter/Kconfig | 3 +-
+ net/ipv6/route.c | 22 +-
+ net/netfilter/Kconfig | 14 +-
+ net/netfilter/Makefile | 4 +-
+ net/netfilter/nf_conntrack_core.c | 20 +-
+ net/netfilter/nf_conntrack_proto_tcp.c | 4 +
+ net/netfilter/nf_conntrack_proto_udp.c | 4 +
+ net/netfilter/nf_conntrack_standalone.c | 34 +-
+ net/netfilter/nf_flow_table_core.c | 462 ++++---
+ net/netfilter/nf_flow_table_ip.c | 447 +++---
+ net/netfilter/nf_flow_table_offload.c | 1199 +++++++++++++++++
+ net/netfilter/xt_FLOWOFFLOAD.c | 794 +++++++++++
+ 43 files changed, 5005 insertions(+), 435 deletions(-)
+ mode change 100644 => 100755 drivers/net/ethernet/mediatek/Makefile
+ mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.c
+ mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.h
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+ create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
+ create mode 100644 net/netfilter/nf_flow_table_offload.c
+ create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
+
+diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
+old mode 100644
+new mode 100755
+index 634640d..5f342f4
+--- a/drivers/net/ethernet/mediatek/Makefile
++++ b/drivers/net/ethernet/mediatek/Makefile
+@@ -4,5 +4,6 @@
+ #
+
+ obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
+-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
++mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
++ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
+ obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+old mode 100644
+new mode 100755
+index c4bea4d..9c85e16
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -3573,6 +3573,7 @@ static int mtk_open(struct net_device *dev)
+ u32 id = mtk_mac2xgmii_id(eth, mac->id);
+ int err, i;
+ struct device_node *phy_node;
++ u32 gdm_config = MTK_GDMA_TO_PDMA;
+
+ err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
+ if (err) {
+@@ -3650,7 +3651,10 @@ static int mtk_open(struct net_device *dev)
+ regmap_write(eth->sgmii->pcs[id].regmap,
+ SGMSYS_QPHY_PWR_STATE_CTRL, 0);
+
+- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
++ if (eth->soc->offload_version && mtk_ppe_start(ð->ppe) == 0)
++ gdm_config = MTK_GDMA_TO_PPE;
++
++ mtk_gdm_config(eth, mac->id, gdm_config);
+
+ return 0;
+ }
+@@ -3730,6 +3734,9 @@ static int mtk_stop(struct net_device *dev)
+
+ mtk_dma_free(eth);
+
++ if (eth->soc->offload_version)
++ mtk_ppe_stop(ð->ppe);
++
+ return 0;
+ }
+
+@@ -4576,6 +4583,7 @@ static const struct net_device_ops mtk_netdev_ops = {
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = mtk_poll_controller,
+ #endif
++ .ndo_setup_tc = mtk_eth_setup_tc,
+ };
+
+ static void mux_poll(struct work_struct *work)
+@@ -5161,6 +5169,17 @@ static int mtk_probe(struct platform_device *pdev)
+ goto err_free_dev;
+ }
+
++ if (eth->soc->offload_version) {
++ err = mtk_ppe_init(ð->ppe, eth->dev,
++ eth->base + MTK_ETH_PPE_BASE, 2);
++ if (err)
++ goto err_free_dev;
++
++ err = mtk_eth_offload_init(eth);
++ if (err)
++ goto err_free_dev;
++ }
++
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
+ if (!eth->netdev[i])
+ continue;
+@@ -5254,6 +5273,7 @@ static const struct mtk_soc_data mt2701_data = {
+ .required_clks = MT7623_CLKS_BITMAP,
+ .required_pctl = true,
+ .has_sram = false,
++ .offload_version = 2,
+ .rss_num = 0,
+ .txrx = {
+ .txd_size = sizeof(struct mtk_tx_dma),
+@@ -5271,6 +5291,7 @@ static const struct mtk_soc_data mt7621_data = {
+ .required_clks = MT7621_CLKS_BITMAP,
+ .required_pctl = false,
+ .has_sram = false,
++ .offload_version = 2,
+ .rss_num = 0,
+ .txrx = {
+ .txd_size = sizeof(struct mtk_tx_dma),
+@@ -5289,6 +5310,7 @@ static const struct mtk_soc_data mt7622_data = {
+ .required_clks = MT7622_CLKS_BITMAP,
+ .required_pctl = false,
+ .has_sram = false,
++ .offload_version = 2,
+ .rss_num = 0,
+ .txrx = {
+ .txd_size = sizeof(struct mtk_tx_dma),
+@@ -5306,6 +5328,7 @@ static const struct mtk_soc_data mt7623_data = {
+ .required_clks = MT7623_CLKS_BITMAP,
+ .required_pctl = true,
+ .has_sram = false,
++ .offload_version = 2,
+ .rss_num = 0,
+ .txrx = {
+ .txd_size = sizeof(struct mtk_tx_dma),
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+old mode 100644
+new mode 100755
+index 8a9b615..a87e46d
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -15,6 +15,8 @@
+ #include <linux/u64_stats_sync.h>
+ #include <linux/refcount.h>
+ #include <linux/phylink.h>
++#include <linux/rhashtable.h>
++#include "mtk_ppe.h"
+
+ #define MTK_QDMA_PAGE_SIZE 2048
+ #define MTK_MAX_RX_LENGTH 1536
+@@ -44,7 +46,8 @@
+ NETIF_F_HW_VLAN_CTAG_TX | \
+ NETIF_F_SG | NETIF_F_TSO | \
+ NETIF_F_TSO6 | \
+- NETIF_F_IPV6_CSUM)
++ NETIF_F_IPV6_CSUM |\
++ NETIF_F_HW_TC)
+ #define MTK_SET_FEATURES (NETIF_F_LRO | \
+ NETIF_F_HW_VLAN_CTAG_RX)
+ #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
+@@ -127,6 +130,7 @@
+ #define MTK_GDMA_UCS_EN BIT(20)
+ #define MTK_GDMA_STRP_CRC BIT(16)
+ #define MTK_GDMA_TO_PDMA 0x0
++#define MTK_GDMA_TO_PPE 0x4444
+ #define MTK_GDMA_DROP_ALL 0x7777
+
+ /* GDM Egress Control Register */
+@@ -617,6 +621,12 @@
+ #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
+ #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
+
++/* QDMA descriptor rxd4 */
++#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
++#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
++#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
++#define MTK_RXD4_ALG GENMASK(31, 22)
++
+ /* QDMA descriptor rxd4 */
+ #define RX_DMA_L4_VALID BIT(24)
+ #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
+@@ -1651,6 +1661,7 @@ struct mtk_soc_data {
+ u64 caps;
+ u64 required_clks;
+ bool required_pctl;
++ u8 offload_version;
+ netdev_features_t hw_features;
+ bool has_sram;
+ struct {
+@@ -1847,6 +1858,9 @@ struct mtk_eth {
+ int ip_align;
+ spinlock_t syscfg0_lock;
+ struct timer_list mtk_dma_monitor_timer;
++
++ struct mtk_ppe ppe;
++ struct rhashtable flow_table;
+ };
+
+ /* struct mtk_mac - the structure that holds the info about the MACs of the
+@@ -1927,6 +1941,9 @@ int mtk_toprgu_init(struct mtk_eth *eth, struct device_node *r);
+ int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
+ void mtk_usxgmii_link_poll(struct work_struct *work);
+
++int mtk_eth_offload_init(struct mtk_eth *eth);
++int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
++ void *type_data);
+ void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
+ u32 mtk_rss_indr_table(struct mtk_rss_params *rss_params, int index);
+ #endif /* MTK_ETH_H */
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+new file mode 100644
+index 0000000..27b5be5
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -0,0 +1,510 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/kernel.h>
++#include <linux/io.h>
++#include <linux/iopoll.h>
++#include <linux/etherdevice.h>
++#include <linux/platform_device.h>
++#include "mtk_ppe.h"
++#include "mtk_ppe_regs.h"
++
++static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
++{
++ writel(val, ppe->base + reg);
++}
++
++static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
++{
++ return readl(ppe->base + reg);
++}
++
++static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
++{
++ u32 val;
++
++ val = ppe_r32(ppe, reg);
++ val &= ~mask;
++ val |= set;
++ ppe_w32(ppe, reg, val);
++
++ return val;
++}
++
++static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
++{
++ return ppe_m32(ppe, reg, 0, val);
++}
++
++static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
++{
++ return ppe_m32(ppe, reg, val, 0);
++}
++
++static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
++{
++ int ret;
++ u32 val;
++
++ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
++ !(val & MTK_PPE_GLO_CFG_BUSY),
++ 20, MTK_PPE_WAIT_TIMEOUT_US);
++
++ if (ret)
++ dev_err(ppe->dev, "PPE table busy");
++
++ return ret;
++}
++
++static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
++{
++ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
++ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
++}
++
++static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
++{
++ mtk_ppe_cache_clear(ppe);
++
++ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
++ enable * MTK_PPE_CACHE_CTL_EN);
++}
++
++static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
++{
++ u32 hv1, hv2, hv3;
++ u32 hash;
++
++ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
++ case MTK_PPE_PKT_TYPE_BRIDGE:
++ hv1 = e->bridge.src_mac_lo;
++ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
++ hv2 = e->bridge.src_mac_hi >> 16;
++ hv2 ^= e->bridge.dest_mac_lo;
++ hv3 = e->bridge.dest_mac_hi;
++ break;
++ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++ hv1 = e->ipv4.orig.ports;
++ hv2 = e->ipv4.orig.dest_ip;
++ hv3 = e->ipv4.orig.src_ip;
++ break;
++ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
++ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
++ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
++ hv1 ^= e->ipv6.ports;
++
++ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
++ hv2 ^= e->ipv6.dest_ip[0];
++
++ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
++ hv3 ^= e->ipv6.src_ip[0];
++ break;
++ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++ case MTK_PPE_PKT_TYPE_IPV6_6RD:
++ default:
++ WARN_ON_ONCE(1);
++ return MTK_PPE_HASH_MASK;
++ }
++
++ hash = (hv1 & hv2) | ((~hv1) & hv3);
++ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
++ hash ^= hv1 ^ hv2 ^ hv3;
++ hash ^= hash >> 16;
++ hash <<= 1;
++ hash &= MTK_PPE_ENTRIES - 1;
++
++ return hash;
++}
++
++static inline struct mtk_foe_mac_info *
++mtk_foe_entry_l2(struct mtk_foe_entry *entry)
++{
++ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++
++ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
++ return &entry->ipv6.l2;
++
++ return &entry->ipv4.l2;
++}
++
++static inline u32 *
++mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
++{
++ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++
++ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
++ return &entry->ipv6.ib2;
++
++ return &entry->ipv4.ib2;
++}
++
++int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
++ u8 pse_port, u8 *src_mac, u8 *dest_mac)
++{
++ struct mtk_foe_mac_info *l2;
++ u32 ports_pad, val;
++
++ memset(entry, 0, sizeof(*entry));
++
++ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
++ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
++ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
++ MTK_FOE_IB1_BIND_TTL |
++ MTK_FOE_IB1_BIND_CACHE;
++ entry->ib1 = val;
++
++ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
++ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
++ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
++
++ if (is_multicast_ether_addr(dest_mac))
++ val |= MTK_FOE_IB2_MULTICAST;
++
++ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
++ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
++ entry->ipv4.orig.ports = ports_pad;
++ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
++ entry->ipv6.ports = ports_pad;
++
++ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
++ entry->ipv6.ib2 = val;
++ l2 = &entry->ipv6.l2;
++ } else {
++ entry->ipv4.ib2 = val;
++ l2 = &entry->ipv4.l2;
++ }
++
++ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
++ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
++ l2->src_mac_hi = get_unaligned_be32(src_mac);
++ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
++
++ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
++ l2->etype = ETH_P_IPV6;
++ else
++ l2->etype = ETH_P_IP;
++
++ return 0;
++}
++
++int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
++{
++ u32 *ib2 = mtk_foe_entry_ib2(entry);
++ u32 val;
++
++ val = *ib2;
++ val &= ~MTK_FOE_IB2_DEST_PORT;
++ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
++ *ib2 = val;
++
++ return 0;
++}
++
++int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
++ __be32 src_addr, __be16 src_port,
++ __be32 dest_addr, __be16 dest_port)
++{
++ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++ struct mtk_ipv4_tuple *t;
++
++ switch (type) {
++ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++ if (egress) {
++ t = &entry->ipv4.new;
++ break;
++ }
++ fallthrough;
++ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++ t = &entry->ipv4.orig;
++ break;
++ case MTK_PPE_PKT_TYPE_IPV6_6RD:
++ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
++ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
++ return 0;
++ default:
++ WARN_ON_ONCE(1);
++ return -EINVAL;
++ }
++
++ t->src_ip = be32_to_cpu(src_addr);
++ t->dest_ip = be32_to_cpu(dest_addr);
++
++ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
++ return 0;
++
++ t->src_port = be16_to_cpu(src_port);
++ t->dest_port = be16_to_cpu(dest_port);
++
++ return 0;
++}
++
++int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
++ __be32 *src_addr, __be16 src_port,
++ __be32 *dest_addr, __be16 dest_port)
++{
++ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++ u32 *src, *dest;
++ int i;
++
++ switch (type) {
++ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++ src = entry->dslite.tunnel_src_ip;
++ dest = entry->dslite.tunnel_dest_ip;
++ break;
++ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
++ case MTK_PPE_PKT_TYPE_IPV6_6RD:
++ entry->ipv6.src_port = be16_to_cpu(src_port);
++ entry->ipv6.dest_port = be16_to_cpu(dest_port);
++ fallthrough;
++ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
++ src = entry->ipv6.src_ip;
++ dest = entry->ipv6.dest_ip;
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ return -EINVAL;
++ }
++
++ for (i = 0; i < 4; i++)
++ src[i] = be32_to_cpu(src_addr[i]);
++ for (i = 0; i < 4; i++)
++ dest[i] = be32_to_cpu(dest_addr[i]);
++
++ return 0;
++}
++
++int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
++{
++ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++
++ l2->etype = BIT(port);
++
++ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
++ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
++ else
++ l2->etype |= BIT(8);
++
++ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
++
++ return 0;
++}
++
++int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
++{
++ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++
++ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
++ case 0:
++ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
++ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
++ l2->vlan1 = vid;
++ return 0;
++ case 1:
++ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
++ l2->vlan1 = vid;
++ l2->etype |= BIT(8);
++ } else {
++ l2->vlan2 = vid;
++ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
++ }
++ return 0;
++ default:
++ return -ENOSPC;
++ }
++}
++
++int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
++{
++ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++
++ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
++ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
++ l2->etype = ETH_P_PPP_SES;
++
++ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
++ l2->pppoe_id = sid;
++
++ return 0;
++}
++
++static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
++{
++ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
++ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
++}
++
++int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
++ u16 timestamp)
++{
++ struct mtk_foe_entry *hwe;
++ u32 hash;
++
++ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
++ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
++ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
++
++ hash = mtk_ppe_hash_entry(entry);
++ hwe = &ppe->foe_table[hash];
++ if (!mtk_foe_entry_usable(hwe)) {
++ hwe++;
++ hash++;
++
++ if (!mtk_foe_entry_usable(hwe))
++ return -ENOSPC;
++ }
++
++ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
++ wmb();
++ hwe->ib1 = entry->ib1;
++
++ dma_wmb();
++
++ mtk_ppe_cache_clear(ppe);
++
++ return hash;
++}
++
++int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
++ int version)
++{
++ struct mtk_foe_entry *foe;
++
++ /* need to allocate a separate device, since it PPE DMA access is
++ * not coherent.
++ */
++ ppe->base = base;
++ ppe->dev = dev;
++ ppe->version = version;
++
++ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
++ &ppe->foe_phys, GFP_KERNEL);
++ if (!foe)
++ return -ENOMEM;
++
++ ppe->foe_table = foe;
++
++ mtk_ppe_debugfs_init(ppe);
++
++ return 0;
++}
++
++static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
++{
++ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
++ int i, k;
++
++ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
++
++ if (!IS_ENABLED(CONFIG_SOC_MT7621))
++ return;
++
++ /* skip all entries that cross the 1024 byte boundary */
++ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
++ for (k = 0; k < ARRAY_SIZE(skip); k++)
++ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
++}
++
++int mtk_ppe_start(struct mtk_ppe *ppe)
++{
++ u32 val;
++
++ mtk_ppe_init_foe_table(ppe);
++ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
++
++ val = MTK_PPE_TB_CFG_ENTRY_80B |
++ MTK_PPE_TB_CFG_AGE_NON_L4 |
++ MTK_PPE_TB_CFG_AGE_UNBIND |
++ MTK_PPE_TB_CFG_AGE_TCP |
++ MTK_PPE_TB_CFG_AGE_UDP |
++ MTK_PPE_TB_CFG_AGE_TCP_FIN |
++ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
++ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
++ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
++ MTK_PPE_KEEPALIVE_DISABLE) |
++ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
++ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
++ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
++ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
++ MTK_PPE_ENTRIES_SHIFT);
++ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
++
++ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
++ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
++
++ mtk_ppe_cache_enable(ppe, true);
++
++ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
++ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
++ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
++ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
++ MTK_PPE_FLOW_CFG_IP6_6RD |
++ MTK_PPE_FLOW_CFG_IP4_NAT |
++ MTK_PPE_FLOW_CFG_IP4_NAPT |
++ MTK_PPE_FLOW_CFG_IP4_DSLITE |
++ MTK_PPE_FLOW_CFG_L2_BRIDGE |
++ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
++ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
++
++ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
++ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
++ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
++
++ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
++ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
++ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
++
++ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
++ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
++ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
++
++ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
++ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
++
++ val = MTK_PPE_BIND_LIMIT1_FULL |
++ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
++ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
++
++ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
++ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
++ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
++
++ /* enable PPE */
++ val = MTK_PPE_GLO_CFG_EN |
++ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
++ MTK_PPE_GLO_CFG_IP4_CS_DROP |
++ MTK_PPE_GLO_CFG_MCAST_TB_EN |
++ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
++ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
++
++ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
++
++ return 0;
++}
++
++int mtk_ppe_stop(struct mtk_ppe *ppe)
++{
++ u32 val;
++ int i;
++
++ for (i = 0; i < MTK_PPE_ENTRIES; i++)
++ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
++ MTK_FOE_STATE_INVALID);
++
++ mtk_ppe_cache_enable(ppe, false);
++
++ /* disable offload engine */
++ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
++ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
++
++ /* disable aging */
++ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
++ MTK_PPE_TB_CFG_AGE_UNBIND |
++ MTK_PPE_TB_CFG_AGE_TCP |
++ MTK_PPE_TB_CFG_AGE_UDP |
++ MTK_PPE_TB_CFG_AGE_TCP_FIN;
++ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
++
++ return mtk_ppe_wait_busy(ppe);
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
+new file mode 100644
+index 0000000..242fb8f
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -0,0 +1,288 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_PPE_H
++#define __MTK_PPE_H
++
++#include <linux/kernel.h>
++#include <linux/bitfield.h>
++
++#define MTK_ETH_PPE_BASE 0xc00
++
++#define MTK_PPE_ENTRIES_SHIFT 3
++#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
++#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
++#define MTK_PPE_WAIT_TIMEOUT_US 1000000
++
++#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
++#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
++#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
++
++#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
++#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
++#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
++#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
++#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
++#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
++#define MTK_FOE_IB1_BIND_CACHE BIT(22)
++#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
++#define MTK_FOE_IB1_BIND_TTL BIT(24)
++
++#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
++#define MTK_FOE_IB1_STATE GENMASK(29, 28)
++#define MTK_FOE_IB1_UDP BIT(30)
++#define MTK_FOE_IB1_STATIC BIT(31)
++
++enum {
++ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
++ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
++ MTK_PPE_PKT_TYPE_BRIDGE = 2,
++ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
++ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
++ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
++ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
++};
++
++#define MTK_FOE_IB2_QID GENMASK(3, 0)
++#define MTK_FOE_IB2_PSE_QOS BIT(4)
++#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
++#define MTK_FOE_IB2_MULTICAST BIT(8)
++
++#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
++#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
++#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
++
++#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
++
++#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
++
++#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
++
++#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
++#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
++#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
++
++enum {
++ MTK_FOE_STATE_INVALID,
++ MTK_FOE_STATE_UNBIND,
++ MTK_FOE_STATE_BIND,
++ MTK_FOE_STATE_FIN
++};
++
++struct mtk_foe_mac_info {
++ u16 vlan1;
++ u16 etype;
++
++ u32 dest_mac_hi;
++
++ u16 vlan2;
++ u16 dest_mac_lo;
++
++ u32 src_mac_hi;
++
++ u16 pppoe_id;
++ u16 src_mac_lo;
++};
++
++struct mtk_foe_bridge {
++ u32 dest_mac_hi;
++
++ u16 src_mac_lo;
++ u16 dest_mac_lo;
++
++ u32 src_mac_hi;
++
++ u32 ib2;
++
++ u32 _rsv[5];
++
++ u32 udf_tsid;
++ struct mtk_foe_mac_info l2;
++};
++
++struct mtk_ipv4_tuple {
++ u32 src_ip;
++ u32 dest_ip;
++ union {
++ struct {
++ u16 dest_port;
++ u16 src_port;
++ };
++ struct {
++ u8 protocol;
++ u8 _pad[3]; /* fill with 0xa5a5a5 */
++ };
++ u32 ports;
++ };
++};
++
++struct mtk_foe_ipv4 {
++ struct mtk_ipv4_tuple orig;
++
++ u32 ib2;
++
++ struct mtk_ipv4_tuple new;
++
++ u16 timestamp;
++ u16 _rsv0[3];
++
++ u32 udf_tsid;
++
++ struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_ipv4_dslite {
++ struct mtk_ipv4_tuple ip4;
++
++ u32 tunnel_src_ip[4];
++ u32 tunnel_dest_ip[4];
++
++ u8 flow_label[3];
++ u8 priority;
++
++ u32 udf_tsid;
++
++ u32 ib2;
++
++ struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_ipv6 {
++ u32 src_ip[4];
++ u32 dest_ip[4];
++
++ union {
++ struct {
++ u8 protocol;
++ u8 _pad[3]; /* fill with 0xa5a5a5 */
++ }; /* 3-tuple */
++ struct {
++ u16 dest_port;
++ u16 src_port;
++ }; /* 5-tuple */
++ u32 ports;
++ };
++
++ u32 _rsv[3];
++
++ u32 udf;
++
++ u32 ib2;
++ struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_ipv6_6rd {
++ u32 src_ip[4];
++ u32 dest_ip[4];
++ u16 dest_port;
++ u16 src_port;
++
++ u32 tunnel_src_ip;
++ u32 tunnel_dest_ip;
++
++ u16 hdr_csum;
++ u8 dscp;
++ u8 ttl;
++
++ u8 flag;
++ u8 pad;
++ u8 per_flow_6rd_id;
++ u8 pad2;
++
++ u32 ib2;
++ struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_entry {
++ u32 ib1;
++
++ union {
++ struct mtk_foe_bridge bridge;
++ struct mtk_foe_ipv4 ipv4;
++ struct mtk_foe_ipv4_dslite dslite;
++ struct mtk_foe_ipv6 ipv6;
++ struct mtk_foe_ipv6_6rd ipv6_6rd;
++ u32 data[19];
++ };
++};
++
++enum {
++ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
++ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
++ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
++ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
++ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
++ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
++ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
++ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
++ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
++ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
++ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
++ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
++ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
++ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
++ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
++ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
++ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
++ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
++ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
++ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
++ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
++ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
++ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
++ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
++ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
++ MTK_PPE_CPU_REASON_INVALID = 0x1f,
++};
++
++struct mtk_ppe {
++ struct device *dev;
++ void __iomem *base;
++ int version;
++
++ struct mtk_foe_entry *foe_table;
++ dma_addr_t foe_phys;
++
++ void *acct_table;
++};
++
++int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
++ int version);
++int mtk_ppe_start(struct mtk_ppe *ppe);
++int mtk_ppe_stop(struct mtk_ppe *ppe);
++
++static inline void
++mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
++{
++ ppe->foe_table[hash].ib1 = 0;
++ dma_wmb();
++}
++
++static inline int
++mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
++{
++ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
++
++ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
++ return -1;
++
++ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
++}
++
++int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
++ u8 pse_port, u8 *src_mac, u8 *dest_mac);
++int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
++int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
++ __be32 src_addr, __be16 src_port,
++ __be32 dest_addr, __be16 dest_port);
++int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
++ __be32 *src_addr, __be16 src_port,
++ __be32 *dest_addr, __be16 dest_port);
++int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
++int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
++int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
++int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
++ u16 timestamp);
++int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
++
++#endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+new file mode 100644
+index 0000000..d4b4823
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+@@ -0,0 +1,214 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/kernel.h>
++#include <linux/debugfs.h>
++#include "mtk_eth_soc.h"
++
++struct mtk_flow_addr_info
++{
++ void *src, *dest;
++ u16 *src_port, *dest_port;
++ bool ipv6;
++};
++
++static const char *mtk_foe_entry_state_str(int state)
++{
++ static const char * const state_str[] = {
++ [MTK_FOE_STATE_INVALID] = "INV",
++ [MTK_FOE_STATE_UNBIND] = "UNB",
++ [MTK_FOE_STATE_BIND] = "BND",
++ [MTK_FOE_STATE_FIN] = "FIN",
++ };
++
++ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
++ return "UNK";
++
++ return state_str[state];
++}
++
++static const char *mtk_foe_pkt_type_str(int type)
++{
++ static const char * const type_str[] = {
++ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
++ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
++ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
++ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
++ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
++ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
++ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
++ };
++
++ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
++ return "UNKNOWN";
++
++ return type_str[type];
++}
++
++static void
++mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
++{
++ u32 n_addr[4];
++ int i;
++
++ if (!ipv6) {
++ seq_printf(m, "%pI4h", addr);
++ return;
++ }
++
++ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
++ n_addr[i] = htonl(addr[i]);
++ seq_printf(m, "%pI6", n_addr);
++}
++
++static void
++mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
++{
++ mtk_print_addr(m, ai->src, ai->ipv6);
++ if (ai->src_port)
++ seq_printf(m, ":%d", *ai->src_port);
++ seq_printf(m, "->");
++ mtk_print_addr(m, ai->dest, ai->ipv6);
++ if (ai->dest_port)
++ seq_printf(m, ":%d", *ai->dest_port);
++}
++
++static int
++mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
++{
++ struct mtk_ppe *ppe = m->private;
++ int i;
++
++ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
++ struct mtk_foe_entry *entry = &ppe->foe_table[i];
++ struct mtk_foe_mac_info *l2;
++ struct mtk_flow_addr_info ai = {};
++ unsigned char h_source[ETH_ALEN];
++ unsigned char h_dest[ETH_ALEN];
++ int type, state;
++ u32 ib2;
++
++
++ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
++ if (!state)
++ continue;
++
++ if (bind && state != MTK_FOE_STATE_BIND)
++ continue;
++
++ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++ seq_printf(m, "%05x %s %7s", i,
++ mtk_foe_entry_state_str(state),
++ mtk_foe_pkt_type_str(type));
++
++ switch (type) {
++ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++ ai.src_port = &entry->ipv4.orig.src_port;
++ ai.dest_port = &entry->ipv4.orig.dest_port;
++ fallthrough;
++ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++ ai.src = &entry->ipv4.orig.src_ip;
++ ai.dest = &entry->ipv4.orig.dest_ip;
++ break;
++ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
++ ai.src_port = &entry->ipv6.src_port;
++ ai.dest_port = &entry->ipv6.dest_port;
++ fallthrough;
++ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
++ case MTK_PPE_PKT_TYPE_IPV6_6RD:
++ ai.src = &entry->ipv6.src_ip;
++ ai.dest = &entry->ipv6.dest_ip;
++ ai.ipv6 = true;
++ break;
++ }
++
++ seq_printf(m, " orig=");
++ mtk_print_addr_info(m, &ai);
++
++ switch (type) {
++ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++ ai.src_port = &entry->ipv4.new.src_port;
++ ai.dest_port = &entry->ipv4.new.dest_port;
++ fallthrough;
++ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++ ai.src = &entry->ipv4.new.src_ip;
++ ai.dest = &entry->ipv4.new.dest_ip;
++ seq_printf(m, " new=");
++ mtk_print_addr_info(m, &ai);
++ break;
++ }
++
++ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
++ l2 = &entry->ipv6.l2;
++ ib2 = entry->ipv6.ib2;
++ } else {
++ l2 = &entry->ipv4.l2;
++ ib2 = entry->ipv4.ib2;
++ }
++
++ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
++ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
++ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
++ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
++
++ seq_printf(m, " eth=%pM->%pM etype=%04x"
++ " vlan=%d,%d ib1=%08x ib2=%08x\n",
++ h_source, h_dest, ntohs(l2->etype),
++ l2->vlan1, l2->vlan2, entry->ib1, ib2);
++ }
++
++ return 0;
++}
++
++static int
++mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
++{
++ return mtk_ppe_debugfs_foe_show(m, private, false);
++}
++
++static int
++mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
++{
++ return mtk_ppe_debugfs_foe_show(m, private, true);
++}
++
++static int
++mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
++{
++ return single_open(file, mtk_ppe_debugfs_foe_show_all,
++ inode->i_private);
++}
++
++static int
++mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
++{
++ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
++ inode->i_private);
++}
++
++int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
++{
++ static const struct file_operations fops_all = {
++ .open = mtk_ppe_debugfs_foe_open_all,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++ };
++
++ static const struct file_operations fops_bind = {
++ .open = mtk_ppe_debugfs_foe_open_bind,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++ };
++
++ struct dentry *root;
++
++ root = debugfs_create_dir("mtk_ppe", NULL);
++ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
++ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
++
++ return 0;
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+new file mode 100644
+index 0000000..1380ef0
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -0,0 +1,535 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
++ */
++
++#include <linux/if_ether.h>
++#include <linux/rhashtable.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <net/flow_offload.h>
++#include <net/pkt_cls.h>
++#include <net/dsa.h>
++#include "mtk_eth_soc.h"
++
++struct mtk_flow_data {
++ struct ethhdr eth;
++
++ union {
++ struct {
++ __be32 src_addr;
++ __be32 dst_addr;
++ } v4;
++
++ struct {
++ struct in6_addr src_addr;
++ struct in6_addr dst_addr;
++ } v6;
++ };
++
++ __be16 src_port;
++ __be16 dst_port;
++
++ struct {
++ u16 id;
++ __be16 proto;
++ u8 num;
++ } vlan;
++ struct {
++ u16 sid;
++ u8 num;
++ } pppoe;
++};
++
++struct mtk_flow_entry {
++ struct rhash_head node;
++ unsigned long cookie;
++ u16 hash;
++};
++
++static const struct rhashtable_params mtk_flow_ht_params = {
++ .head_offset = offsetof(struct mtk_flow_entry, node),
++ .key_offset = offsetof(struct mtk_flow_entry, cookie),
++ .key_len = sizeof(unsigned long),
++ .automatic_shrinking = true,
++};
++
++static u32
++mtk_eth_timestamp(struct mtk_eth *eth)
++{
++ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
++}
++
++static int
++mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
++ bool egress)
++{
++ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
++ data->v4.src_addr, data->src_port,
++ data->v4.dst_addr, data->dst_port);
++}
++
++static int
++mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
++{
++ return mtk_foe_entry_set_ipv6_tuple(foe,
++ data->v6.src_addr.s6_addr32, data->src_port,
++ data->v6.dst_addr.s6_addr32, data->dst_port);
++}
++
++static void
++mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
++{
++ void *dest = eth + act->mangle.offset;
++ const void *src = &act->mangle.val;
++
++ if (act->mangle.offset > 8)
++ return;
++
++ if (act->mangle.mask == 0xffff) {
++ src += 2;
++ dest += 2;
++ }
++
++ memcpy(dest, src, act->mangle.mask ? 2 : 4);
++}
++
++
++static int
++mtk_flow_mangle_ports(const struct flow_action_entry *act,
++ struct mtk_flow_data *data)
++{
++ u32 val = ntohl(act->mangle.val);
++
++ switch (act->mangle.offset) {
++ case 0:
++ if (act->mangle.mask == ~htonl(0xffff))
++ data->dst_port = cpu_to_be16(val);
++ else
++ data->src_port = cpu_to_be16(val >> 16);
++ break;
++ case 2:
++ data->dst_port = cpu_to_be16(val);
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int
++mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
++ struct mtk_flow_data *data)
++{
++ __be32 *dest;
++
++ switch (act->mangle.offset) {
++ case offsetof(struct iphdr, saddr):
++ dest = &data->v4.src_addr;
++ break;
++ case offsetof(struct iphdr, daddr):
++ dest = &data->v4.dst_addr;
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ memcpy(dest, &act->mangle.val, sizeof(u32));
++
++ return 0;
++}
++
++static int
++mtk_flow_get_dsa_port(struct net_device **dev)
++{
++#if IS_ENABLED(CONFIG_NET_DSA)
++ struct dsa_port *dp;
++
++ dp = dsa_port_from_netdev(*dev);
++ if (IS_ERR(dp))
++ return -ENODEV;
++
++ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
++ return -ENODEV;
++
++ *dev = dp->cpu_dp->master;
++
++ return dp->index;
++#else
++ return -ENODEV;
++#endif
++}
++
++static int
++mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
++ struct net_device *dev)
++{
++ int pse_port, dsa_port;
++
++ dsa_port = mtk_flow_get_dsa_port(&dev);
++ if (dsa_port >= 0)
++ mtk_foe_entry_set_dsa(foe, dsa_port);
++
++ if (dev == eth->netdev[0])
++ pse_port = PSE_GDM1_PORT;
++ else if (dev == eth->netdev[1])
++ pse_port = PSE_GDM2_PORT;
++ else
++ return -EOPNOTSUPP;
++
++ mtk_foe_entry_set_pse_port(foe, pse_port);
++
++ return 0;
++}
++
++static int
++mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
++{
++ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
++ struct flow_action_entry *act;
++ struct mtk_flow_data data = {};
++ struct mtk_foe_entry foe;
++ struct net_device *odev = NULL;
++ struct mtk_flow_entry *entry;
++ int offload_type = 0;
++ u16 addr_type = 0;
++ u32 timestamp;
++ u8 l4proto = 0;
++ int err = 0;
++ int hash;
++ int i;
++
++ if (rhashtable_lookup(ð->flow_table, &f->cookie, mtk_flow_ht_params))
++ return -EEXIST;
++
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
++ struct flow_match_meta match;
++
++ flow_rule_match_meta(rule, &match);
++ } else {
++ return -EOPNOTSUPP;
++ }
++
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
++ struct flow_match_control match;
++
++ flow_rule_match_control(rule, &match);
++ addr_type = match.key->addr_type;
++ } else {
++ return -EOPNOTSUPP;
++ }
++
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
++ struct flow_match_basic match;
++
++ flow_rule_match_basic(rule, &match);
++ l4proto = match.key->ip_proto;
++ } else {
++ return -EOPNOTSUPP;
++ }
++
++ flow_action_for_each(i, act, &rule->action) {
++ switch (act->id) {
++ case FLOW_ACTION_MANGLE:
++ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
++ mtk_flow_offload_mangle_eth(act, &data.eth);
++ break;
++ case FLOW_ACTION_REDIRECT:
++ odev = act->dev;
++ break;
++ case FLOW_ACTION_CSUM:
++ break;
++ case FLOW_ACTION_VLAN_PUSH:
++ if (data.vlan.num == 1 ||
++ act->vlan.proto != htons(ETH_P_8021Q))
++ return -EOPNOTSUPP;
++
++ data.vlan.id = act->vlan.vid;
++ data.vlan.proto = act->vlan.proto;
++ data.vlan.num++;
++ break;
++ case FLOW_ACTION_VLAN_POP:
++ break;
++ case FLOW_ACTION_PPPOE_PUSH:
++ if (data.pppoe.num == 1)
++ return -EOPNOTSUPP;
++
++ data.pppoe.sid = act->pppoe.sid;
++ data.pppoe.num++;
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++ }
++
++ switch (addr_type) {
++ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
++ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
++ break;
++ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
++ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ if (!is_valid_ether_addr(data.eth.h_source) ||
++ !is_valid_ether_addr(data.eth.h_dest))
++ return -EINVAL;
++
++ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
++ data.eth.h_source,
++ data.eth.h_dest);
++ if (err)
++ return err;
++
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
++ struct flow_match_ports ports;
++
++ flow_rule_match_ports(rule, &ports);
++ data.src_port = ports.key->src;
++ data.dst_port = ports.key->dst;
++ } else {
++ return -EOPNOTSUPP;
++ }
++
++ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
++ struct flow_match_ipv4_addrs addrs;
++
++ flow_rule_match_ipv4_addrs(rule, &addrs);
++
++ data.v4.src_addr = addrs.key->src;
++ data.v4.dst_addr = addrs.key->dst;
++
++ mtk_flow_set_ipv4_addr(&foe, &data, false);
++ }
++
++ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
++ struct flow_match_ipv6_addrs addrs;
++
++ flow_rule_match_ipv6_addrs(rule, &addrs);
++
++ data.v6.src_addr = addrs.key->src;
++ data.v6.dst_addr = addrs.key->dst;
++
++ mtk_flow_set_ipv6_addr(&foe, &data);
++ }
++
++ flow_action_for_each(i, act, &rule->action) {
++ if (act->id != FLOW_ACTION_MANGLE)
++ continue;
++
++ switch (act->mangle.htype) {
++ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
++ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
++ err = mtk_flow_mangle_ports(act, &data);
++ break;
++ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
++ err = mtk_flow_mangle_ipv4(act, &data);
++ break;
++ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
++ /* handled earlier */
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ if (err)
++ return err;
++ }
++
++ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
++ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
++ if (err)
++ return err;
++ }
++
++ if (data.vlan.num == 1) {
++ if (data.vlan.proto != htons(ETH_P_8021Q))
++ return -EOPNOTSUPP;
++
++ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
++ }
++ if (data.pppoe.num == 1)
++ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
++
++ err = mtk_flow_set_output_device(eth, &foe, odev);
++ if (err)
++ return err;
++
++ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
++ if (!entry)
++ return -ENOMEM;
++
++ entry->cookie = f->cookie;
++ timestamp = mtk_eth_timestamp(eth);
++ hash = mtk_foe_entry_commit(ð->ppe, &foe, timestamp);
++ if (hash < 0) {
++ err = hash;
++ goto free;
++ }
++
++ entry->hash = hash;
++ err = rhashtable_insert_fast(ð->flow_table, &entry->node,
++ mtk_flow_ht_params);
++ if (err < 0)
++ goto clear_flow;
++
++ return 0;
++clear_flow:
++ mtk_foe_entry_clear(ð->ppe, hash);
++free:
++ kfree(entry);
++ return err;
++}
++
++static int
++mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
++{
++ struct mtk_flow_entry *entry;
++
++ entry = rhashtable_lookup(ð->flow_table, &f->cookie,
++ mtk_flow_ht_params);
++ if (!entry)
++ return -ENOENT;
++
++ mtk_foe_entry_clear(ð->ppe, entry->hash);
++ rhashtable_remove_fast(ð->flow_table, &entry->node,
++ mtk_flow_ht_params);
++ kfree(entry);
++
++ return 0;
++}
++
++static int
++mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
++{
++ struct mtk_flow_entry *entry;
++ int timestamp;
++ u32 idle;
++
++ entry = rhashtable_lookup(ð->flow_table, &f->cookie,
++ mtk_flow_ht_params);
++ if (!entry)
++ return -ENOENT;
++
++ timestamp = mtk_foe_entry_timestamp(ð->ppe, entry->hash);
++ if (timestamp < 0)
++ return -ETIMEDOUT;
++
++ idle = mtk_eth_timestamp(eth) - timestamp;
++ f->stats.lastused = jiffies - idle * HZ;
++
++ return 0;
++}
++
++static DEFINE_MUTEX(mtk_flow_offload_mutex);
++
++static int
++mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
++{
++ struct flow_cls_offload *cls = type_data;
++ struct net_device *dev = cb_priv;
++ struct mtk_mac *mac = netdev_priv(dev);
++ struct mtk_eth *eth = mac->hw;
++ int err;
++
++ if (!tc_can_offload(dev))
++ return -EOPNOTSUPP;
++
++ if (type != TC_SETUP_CLSFLOWER)
++ return -EOPNOTSUPP;
++
++ mutex_lock(&mtk_flow_offload_mutex);
++ switch (cls->command) {
++ case FLOW_CLS_REPLACE:
++ err = mtk_flow_offload_replace(eth, cls);
++ break;
++ case FLOW_CLS_DESTROY:
++ err = mtk_flow_offload_destroy(eth, cls);
++ break;
++ case FLOW_CLS_STATS:
++ err = mtk_flow_offload_stats(eth, cls);
++ break;
++ default:
++ err = -EOPNOTSUPP;
++ break;
++ }
++ mutex_unlock(&mtk_flow_offload_mutex);
++
++ return err;
++}
++
++static int
++mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
++{
++ struct mtk_mac *mac = netdev_priv(dev);
++ struct mtk_eth *eth = mac->hw;
++ static LIST_HEAD(block_cb_list);
++ struct flow_block_cb *block_cb;
++ flow_setup_cb_t *cb;
++ int err = 0;
++
++ if (!eth->ppe.foe_table)
++ return -EOPNOTSUPP;
++
++ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
++ return -EOPNOTSUPP;
++
++ cb = mtk_eth_setup_tc_block_cb;
++ f->driver_block_list = &block_cb_list;
++
++ switch (f->command) {
++ case FLOW_BLOCK_BIND:
++ block_cb = flow_block_cb_lookup(f->block, cb, dev);
++ if (block_cb) {
++ flow_block_cb_incref(block_cb);
++ goto unlock;
++ }
++ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
++ if (IS_ERR(block_cb)) {
++ err = PTR_ERR(block_cb);
++ goto unlock;
++ }
++
++ flow_block_cb_add(block_cb, f);
++ list_add_tail(&block_cb->driver_list, &block_cb_list);
++ break;
++ case FLOW_BLOCK_UNBIND:
++ block_cb = flow_block_cb_lookup(f->block, cb, dev);
++ if (!block_cb) {
++ err = -ENOENT;
++ goto unlock;
++ }
++
++ if (flow_block_cb_decref(block_cb)) {
++ flow_block_cb_remove(block_cb, f);
++ list_del(&block_cb->driver_list);
++ }
++ break;
++ default:
++ err = -EOPNOTSUPP;
++ break;
++ }
++
++unlock:
++ return err;
++}
++
++int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
++ void *type_data)
++{
++ if (type == TC_SETUP_FT)
++ return mtk_eth_setup_tc_block(dev, type_data);
++
++ return -EOPNOTSUPP;
++}
++
++int mtk_eth_offload_init(struct mtk_eth *eth)
++{
++ if (!eth->ppe.foe_table)
++ return 0;
++
++ return rhashtable_init(ð->flow_table, &mtk_flow_ht_params);
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+new file mode 100644
+index 0000000..0c45ea0
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+@@ -0,0 +1,144 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_PPE_REGS_H
++#define __MTK_PPE_REGS_H
++
++#define MTK_PPE_GLO_CFG 0x200
++#define MTK_PPE_GLO_CFG_EN BIT(0)
++#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
++#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
++#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
++#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
++#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
++#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
++#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
++#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
++#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
++#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
++#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
++#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
++#define MTK_PPE_GLO_CFG_BUSY BIT(31)
++
++#define MTK_PPE_FLOW_CFG 0x204
++#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
++#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
++#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
++#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
++#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
++#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
++#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
++#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
++#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
++#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
++#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
++#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
++#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
++#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
++
++#define MTK_PPE_IP_PROTO_CHK 0x208
++#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
++#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
++
++#define MTK_PPE_TB_CFG 0x21c
++#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
++#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
++#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
++#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
++#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
++#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
++#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
++#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
++#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
++#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
++#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
++#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
++#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
++
++enum {
++ MTK_PPE_SCAN_MODE_DISABLED,
++ MTK_PPE_SCAN_MODE_CHECK_AGE,
++ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
++};
++
++enum {
++ MTK_PPE_KEEPALIVE_DISABLE,
++ MTK_PPE_KEEPALIVE_UNICAST_CPU,
++ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
++};
++
++enum {
++ MTK_PPE_SEARCH_MISS_ACTION_DROP,
++ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
++ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
++};
++
++#define MTK_PPE_TB_BASE 0x220
++
++#define MTK_PPE_TB_USED 0x224
++#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
++
++#define MTK_PPE_BIND_RATE 0x228
++#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
++#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
++
++#define MTK_PPE_BIND_LIMIT0 0x22c
++#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
++#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
++
++#define MTK_PPE_BIND_LIMIT1 0x230
++#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
++#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
++
++#define MTK_PPE_KEEPALIVE 0x234
++#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
++#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
++#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
++
++#define MTK_PPE_UNBIND_AGE 0x238
++#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
++#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
++
++#define MTK_PPE_BIND_AGE0 0x23c
++#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
++#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
++
++#define MTK_PPE_BIND_AGE1 0x240
++#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
++#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
++
++#define MTK_PPE_HASH_SEED 0x244
++
++#define MTK_PPE_DEFAULT_CPU_PORT 0x248
++#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
++
++#define MTK_PPE_MTU_DROP 0x308
++
++#define MTK_PPE_VLAN_MTU0 0x30c
++#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
++#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
++
++#define MTK_PPE_VLAN_MTU1 0x310
++#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
++#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
++
++#define MTK_PPE_VPM_TPID 0x318
++
++#define MTK_PPE_CACHE_CTL 0x320
++#define MTK_PPE_CACHE_CTL_EN BIT(0)
++#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
++#define MTK_PPE_CACHE_CTL_REQ BIT(8)
++#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
++#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
++
++#define MTK_PPE_MIB_CFG 0x334
++#define MTK_PPE_MIB_CFG_EN BIT(0)
++#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
++
++#define MTK_PPE_MIB_TB_BASE 0x338
++
++#define MTK_PPE_MIB_CACHE_CTL 0x350
++#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
++#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
++
++#endif
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index 078c0f4..f8a98d8 100644
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
+ ppp_destroy_interface(ppp);
+ }
+
++static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
++ struct net_device_path *path)
++{
++ struct ppp *ppp = netdev_priv(ctx->dev);
++ struct ppp_channel *chan;
++ struct channel *pch;
++
++ if (ppp->flags & SC_MULTILINK)
++ return -EOPNOTSUPP;
++
++ if (list_empty(&ppp->channels))
++ return -ENODEV;
++
++ pch = list_first_entry(&ppp->channels, struct channel, clist);
++ chan = pch->chan;
++ if (!chan->ops->fill_forward_path)
++ return -EOPNOTSUPP;
++
++ return chan->ops->fill_forward_path(ctx, path, chan);
++}
++
+ static const struct net_device_ops ppp_netdev_ops = {
+ .ndo_init = ppp_dev_init,
+ .ndo_uninit = ppp_dev_uninit,
+ .ndo_start_xmit = ppp_start_xmit,
+ .ndo_do_ioctl = ppp_net_ioctl,
+ .ndo_get_stats64 = ppp_get_stats64,
++ .ndo_fill_forward_path = ppp_fill_forward_path,
+ };
+
+ static struct device_type ppp_type = {
+diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
+index 087b016..7a8c246 100644
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+ return __pppoe_xmit(sk, skb);
+ }
+
++static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
++ struct net_device_path *path,
++ const struct ppp_channel *chan)
++{
++ struct sock *sk = (struct sock *)chan->private;
++ struct pppox_sock *po = pppox_sk(sk);
++ struct net_device *dev = po->pppoe_dev;
++
++ if (sock_flag(sk, SOCK_DEAD) ||
++ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
++ return -1;
++
++ path->type = DEV_PATH_PPPOE;
++ path->encap.proto = htons(ETH_P_PPP_SES);
++ path->encap.id = be16_to_cpu(po->num);
++ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
++ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
++ path->dev = ctx->dev;
++ ctx->dev = dev;
++
++ return 0;
++}
++
+ static const struct ppp_channel_ops pppoe_chan_ops = {
+ .start_xmit = pppoe_xmit,
++ .fill_forward_path = pppoe_fill_forward_path,
+ };
+
+ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 631d158..ef44d9a 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -838,6 +838,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
+ struct sk_buff *skb,
+ struct net_device *sb_dev);
+
++enum net_device_path_type {
++ DEV_PATH_ETHERNET = 0,
++ DEV_PATH_VLAN,
++ DEV_PATH_BRIDGE,
++ DEV_PATH_PPPOE,
++ DEV_PATH_DSA,
++};
++
++struct net_device_path {
++ enum net_device_path_type type;
++ const struct net_device *dev;
++ union {
++ struct {
++ u16 id;
++ __be16 proto;
++ u8 h_dest[ETH_ALEN];
++ } encap;
++ struct {
++ enum {
++ DEV_PATH_BR_VLAN_KEEP,
++ DEV_PATH_BR_VLAN_TAG,
++ DEV_PATH_BR_VLAN_UNTAG,
++ DEV_PATH_BR_VLAN_UNTAG_HW,
++ } vlan_mode;
++ u16 vlan_id;
++ __be16 vlan_proto;
++ } bridge;
++ struct {
++ int port;
++ u16 proto;
++ } dsa;
++ };
++};
++
++#define NET_DEVICE_PATH_STACK_MAX 5
++#define NET_DEVICE_PATH_VLAN_MAX 2
++
++struct net_device_path_stack {
++ int num_paths;
++ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
++};
++
++struct net_device_path_ctx {
++ const struct net_device *dev;
++ u8 daddr[ETH_ALEN];
++
++ int num_vlans;
++ struct {
++ u16 id;
++ __be16 proto;
++ } vlan[NET_DEVICE_PATH_VLAN_MAX];
++};
++
+ enum tc_setup_type {
+ TC_SETUP_QDISC_MQPRIO,
+ TC_SETUP_CLSU32,
+@@ -853,6 +906,7 @@ enum tc_setup_type {
+ TC_SETUP_ROOT_QDISC,
+ TC_SETUP_QDISC_GRED,
+ TC_SETUP_QDISC_TAPRIO,
++ TC_SETUP_FT,
+ };
+
+ /* These structures hold the attributes of bpf state that are being passed
+@@ -1248,6 +1302,8 @@ struct tlsdev_ops;
+ * Get devlink port instance associated with a given netdev.
+ * Called with a reference on the netdevice and devlink locks only,
+ * rtnl_lock is not held.
++ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
++ * Get the forwarding path to reach the real device from the HW destination address
+ */
+ struct net_device_ops {
+ int (*ndo_init)(struct net_device *dev);
+@@ -1445,6 +1501,8 @@ struct net_device_ops {
+ int (*ndo_xsk_wakeup)(struct net_device *dev,
+ u32 queue_id, u32 flags);
+ struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
++ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
++ struct net_device_path *path);
+ };
+
+ /**
+@@ -2670,6 +2728,8 @@ void dev_remove_offload(struct packet_offload *po);
+
+ int dev_get_iflink(const struct net_device *dev);
+ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
++int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
++ struct net_device_path_stack *stack);
+ struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
+ unsigned short mask);
+ struct net_device *dev_get_by_name(struct net *net, const char *name);
+diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
+index 9896606..91f9a92 100644
+--- a/include/linux/ppp_channel.h
++++ b/include/linux/ppp_channel.h
+@@ -28,6 +28,9 @@ struct ppp_channel_ops {
+ int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
+ /* Handle an ioctl call that has come in via /dev/ppp. */
+ int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
++ int (*fill_forward_path)(struct net_device_path_ctx *,
++ struct net_device_path *,
++ const struct ppp_channel *);
+ };
+
+ struct ppp_channel {
+diff --git a/include/net/dsa.h b/include/net/dsa.h
+index d29ee9e..43f65cb 100644
+--- a/include/net/dsa.h
++++ b/include/net/dsa.h
+@@ -562,6 +562,8 @@ struct dsa_switch_ops {
+ struct sk_buff *skb);
+ };
+
++struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
++
+ struct dsa_switch_driver {
+ struct list_head list;
+ const struct dsa_switch_ops *ops;
+@@ -654,6 +656,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
+ #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+
++#if IS_ENABLED(CONFIG_NET_DSA)
++bool dsa_slave_dev_check(const struct net_device *dev);
++#else
++static inline bool dsa_slave_dev_check(const struct net_device *dev)
++{
++ return false;
++}
++#endif
+
+ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
+ int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
+diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
+index c6f7bd2..59b8736 100644
+--- a/include/net/flow_offload.h
++++ b/include/net/flow_offload.h
+@@ -138,6 +138,7 @@ enum flow_action_id {
+ FLOW_ACTION_MPLS_PUSH,
+ FLOW_ACTION_MPLS_POP,
+ FLOW_ACTION_MPLS_MANGLE,
++ FLOW_ACTION_PPPOE_PUSH,
+ NUM_FLOW_ACTIONS,
+ };
+
+@@ -213,6 +214,9 @@ struct flow_action_entry {
+ u8 bos;
+ u8 ttl;
+ } mpls_mangle;
++ struct { /* FLOW_ACTION_PPPOE_PUSH */
++ u16 sid;
++ } pppoe;
+ };
+ };
+
+diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
+index 2c739fc..89ab8f1 100644
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
+ !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
+ }
+
+-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
++static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
++ bool forwarding)
+ {
+ struct inet6_dev *idev;
+ unsigned int mtu;
+
+- if (dst_metric_locked(dst, RTAX_MTU)) {
++ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ goto out;
+diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+index 7b3c873..e954831 100644
+--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
++++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+@@ -4,7 +4,4 @@
+
+ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+
+-#include <linux/sysctl.h>
+-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
+-
+ #endif /* _NF_CONNTRACK_IPV6_H*/
+diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
+index 90690e3..ce0bc3e 100644
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
+ !nf_ct_is_dying(ct);
+ }
+
++#define NF_CT_DAY (86400 * HZ)
++
++/* Set an arbitrary timeout large enough not to ever expire, this save
++ * us a check for the IPS_OFFLOAD_BIT from the packet path via
++ * nf_ct_is_expired().
++ */
++static inline void nf_ct_offload_timeout(struct nf_conn *ct)
++{
++ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
++ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
++}
++
+ struct kernel_param;
+
+ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
+diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
+index f7a060c..7f44a77 100644
+--- a/include/net/netfilter/nf_conntrack_acct.h
++++ b/include/net/netfilter/nf_conntrack_acct.h
+@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
+ #endif
+ }
+
++void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
++ unsigned int bytes);
++
++static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
++ unsigned int bytes)
++{
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++ nf_ct_acct_add(ct, dir, 1, bytes);
++#endif
++}
++
+ void nf_conntrack_acct_pernet_init(struct net *net);
+
+ int nf_conntrack_acct_init(void);
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index 68d7fc9..feac793 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -8,31 +8,99 @@
+ #include <linux/rcupdate.h>
+ #include <linux/netfilter.h>
+ #include <linux/netfilter/nf_conntrack_tuple_common.h>
++#include <net/flow_offload.h>
+ #include <net/dst.h>
++#include <linux/if_pppox.h>
++#include <linux/ppp_defs.h>
+
+ struct nf_flowtable;
++struct nf_flow_rule;
++struct flow_offload;
++enum flow_offload_tuple_dir;
++
++struct nf_flow_key {
++ struct flow_dissector_key_meta meta;
++ struct flow_dissector_key_control control;
++ struct flow_dissector_key_control enc_control;
++ struct flow_dissector_key_basic basic;
++ struct flow_dissector_key_vlan vlan;
++ struct flow_dissector_key_vlan cvlan;
++ union {
++ struct flow_dissector_key_ipv4_addrs ipv4;
++ struct flow_dissector_key_ipv6_addrs ipv6;
++ };
++ struct flow_dissector_key_keyid enc_key_id;
++ union {
++ struct flow_dissector_key_ipv4_addrs enc_ipv4;
++ struct flow_dissector_key_ipv6_addrs enc_ipv6;
++ };
++ struct flow_dissector_key_tcp tcp;
++ struct flow_dissector_key_ports tp;
++} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
++
++struct nf_flow_match {
++ struct flow_dissector dissector;
++ struct nf_flow_key key;
++ struct nf_flow_key mask;
++};
++
++struct nf_flow_rule {
++ struct nf_flow_match match;
++ struct flow_rule *rule;
++};
+
+ struct nf_flowtable_type {
+ struct list_head list;
+ int family;
+ int (*init)(struct nf_flowtable *ft);
++ int (*setup)(struct nf_flowtable *ft,
++ struct net_device *dev,
++ enum flow_block_command cmd);
++ int (*action)(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule);
+ void (*free)(struct nf_flowtable *ft);
+ nf_hookfn *hook;
+ struct module *owner;
+ };
+
++enum nf_flowtable_flags {
++ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
++ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
++};
++
+ struct nf_flowtable {
+ struct list_head list;
+ struct rhashtable rhashtable;
++ int priority;
+ const struct nf_flowtable_type *type;
+ struct delayed_work gc_work;
++ unsigned int flags;
++ struct flow_block flow_block;
++ struct rw_semaphore flow_block_lock; /* Guards flow_block */
++ possible_net_t net;
+ };
+
++static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
++{
++ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
++}
++
+ enum flow_offload_tuple_dir {
+ FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
+ FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
+- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
+ };
++#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
++
++enum flow_offload_xmit_type {
++ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
++ FLOW_OFFLOAD_XMIT_NEIGH,
++ FLOW_OFFLOAD_XMIT_XFRM,
++ FLOW_OFFLOAD_XMIT_DIRECT,
++};
++
++#define NF_FLOW_TABLE_ENCAP_MAX 2
+
+ struct flow_offload_tuple {
+ union {
+@@ -52,13 +120,30 @@ struct flow_offload_tuple {
+
+ u8 l3proto;
+ u8 l4proto;
+- u8 dir;
++ struct {
++ u16 id;
++ __be16 proto;
++ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+
+- u16 mtu;
++ /* All members above are keys for lookups, see flow_offload_hash(). */
++ struct { } __hash;
+
+- struct {
+- struct dst_entry *dst_cache;
+- u32 dst_cookie;
++ u8 dir:2,
++ xmit_type:2,
++ encap_num:2,
++ in_vlan_ingress:2;
++ u16 mtu;
++ union {
++ struct {
++ struct dst_entry *dst_cache;
++ u32 dst_cookie;
++ };
++ struct {
++ u32 ifidx;
++ u32 hw_ifidx;
++ u8 h_source[ETH_ALEN];
++ u8 h_dest[ETH_ALEN];
++ } out;
+ };
+ };
+
+@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
+ struct flow_offload_tuple tuple;
+ };
+
+-#define FLOW_OFFLOAD_SNAT 0x1
+-#define FLOW_OFFLOAD_DNAT 0x2
+-#define FLOW_OFFLOAD_DYING 0x4
+-#define FLOW_OFFLOAD_TEARDOWN 0x8
++enum nf_flow_flags {
++ NF_FLOW_SNAT,
++ NF_FLOW_DNAT,
++ NF_FLOW_TEARDOWN,
++ NF_FLOW_HW,
++ NF_FLOW_HW_ACCT_DYING,
++ NF_FLOW_HW_DYING,
++ NF_FLOW_HW_DEAD,
++ NF_FLOW_HW_PENDING,
++};
++
++enum flow_offload_type {
++ NF_FLOW_OFFLOAD_UNSPEC = 0,
++ NF_FLOW_OFFLOAD_ROUTE,
++};
+
+ struct flow_offload {
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+- u32 flags;
+- union {
+- /* Your private driver data here. */
+- u32 timeout;
+- };
++ struct nf_conn *ct;
++ unsigned long flags;
++ u16 type;
++ u32 timeout;
++ struct rcu_head rcu_head;
+ };
+
+ #define NF_FLOW_TIMEOUT (30 * HZ)
++#define nf_flowtable_time_stamp (u32)jiffies
++
++unsigned long flow_offload_get_timeout(struct flow_offload *flow);
++
++static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
++{
++ return (__s32)(timeout - nf_flowtable_time_stamp);
++}
+
+ struct nf_flow_route {
+ struct {
+- struct dst_entry *dst;
++ struct dst_entry *dst;
++ struct {
++ u32 ifindex;
++ struct {
++ u16 id;
++ __be16 proto;
++ } encap[NF_FLOW_TABLE_ENCAP_MAX];
++ u8 num_encaps:2,
++ ingress_vlans:2;
++ } in;
++ struct {
++ u32 ifindex;
++ u32 hw_ifindex;
++ u8 h_source[ETH_ALEN];
++ u8 h_dest[ETH_ALEN];
++ } out;
++ enum flow_offload_xmit_type xmit_type;
+ } tuple[FLOW_OFFLOAD_DIR_MAX];
+ };
+
+-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
+- struct nf_flow_route *route);
++struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
+ void flow_offload_free(struct flow_offload *flow);
+
++static inline int
++nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
++ flow_setup_cb_t *cb, void *cb_priv)
++{
++ struct flow_block *block = &flow_table->flow_block;
++ struct flow_block_cb *block_cb;
++ int err = 0;
++
++ down_write(&flow_table->flow_block_lock);
++ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
++ if (block_cb) {
++ err = -EEXIST;
++ goto unlock;
++ }
++
++ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
++ if (IS_ERR(block_cb)) {
++ err = PTR_ERR(block_cb);
++ goto unlock;
++ }
++
++ list_add_tail(&block_cb->list, &block->cb_list);
++
++unlock:
++ up_write(&flow_table->flow_block_lock);
++ return err;
++}
++
++static inline void
++nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
++ flow_setup_cb_t *cb, void *cb_priv)
++{
++ struct flow_block *block = &flow_table->flow_block;
++ struct flow_block_cb *block_cb;
++
++ down_write(&flow_table->flow_block_lock);
++ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
++ if (block_cb) {
++ list_del(&block_cb->list);
++ flow_block_cb_free(block_cb);
++ } else {
++ WARN_ON(true);
++ }
++ up_write(&flow_table->flow_block_lock);
++}
++
++int flow_offload_route_init(struct flow_offload *flow,
++ const struct nf_flow_route *route);
++
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
++void flow_offload_refresh(struct nf_flowtable *flow_table,
++ struct flow_offload *flow);
++
+ struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple);
++void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
++ struct net_device *dev);
+ void nf_flow_table_cleanup(struct net_device *dev);
+
+ int nf_flow_table_init(struct nf_flowtable *flow_table);
+ void nf_flow_table_free(struct nf_flowtable *flow_table);
+
+ void flow_offload_teardown(struct flow_offload *flow);
+-static inline void flow_offload_dead(struct flow_offload *flow)
+-{
+- flow->flags |= FLOW_OFFLOAD_DYING;
+-}
+
+-int nf_flow_snat_port(const struct flow_offload *flow,
+- struct sk_buff *skb, unsigned int thoff,
+- u8 protocol, enum flow_offload_tuple_dir dir);
+-int nf_flow_dnat_port(const struct flow_offload *flow,
+- struct sk_buff *skb, unsigned int thoff,
+- u8 protocol, enum flow_offload_tuple_dir dir);
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
++ void (*iter)(struct flow_offload *flow, void *data),
++ void *data);
++
++void nf_flow_snat_port(const struct flow_offload *flow,
++ struct sk_buff *skb, unsigned int thoff,
++ u8 protocol, enum flow_offload_tuple_dir dir);
++void nf_flow_dnat_port(const struct flow_offload *flow,
++ struct sk_buff *skb, unsigned int thoff,
++ u8 protocol, enum flow_offload_tuple_dir dir);
+
+ struct flow_ports {
+ __be16 source, dest;
+@@ -126,4 +299,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ #define MODULE_ALIAS_NF_FLOWTABLE(family) \
+ MODULE_ALIAS("nf-flowtable-" __stringify(family))
+
++void nf_flow_offload_add(struct nf_flowtable *flowtable,
++ struct flow_offload *flow);
++void nf_flow_offload_del(struct nf_flowtable *flowtable,
++ struct flow_offload *flow);
++void nf_flow_offload_stats(struct nf_flowtable *flowtable,
++ struct flow_offload *flow, bool force);
++
++void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
++int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
++ struct net_device *dev,
++ enum flow_block_command cmd);
++int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule);
++int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule);
++
++int nf_flow_table_offload_init(void);
++void nf_flow_table_offload_exit(void);
++
++static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
++{
++ __be16 proto;
++
++ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
++ sizeof(struct pppoe_hdr)));
++ switch (proto) {
++ case htons(PPP_IP):
++ return htons(ETH_P_IP);
++ case htons(PPP_IPV6):
++ return htons(ETH_P_IPV6);
++ }
++
++ return 0;
++}
++
+ #endif /* _NF_FLOW_TABLE_H */
+diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
+index 806454e..9e3963c 100644
+--- a/include/net/netns/conntrack.h
++++ b/include/net/netns/conntrack.h
+@@ -27,6 +27,9 @@ struct nf_tcp_net {
+ int tcp_loose;
+ int tcp_be_liberal;
+ int tcp_max_retrans;
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ unsigned int offload_timeout;
++#endif
+ };
+
+ enum udp_conntrack {
+@@ -37,6 +40,9 @@ enum udp_conntrack {
+
+ struct nf_udp_net {
+ unsigned int timeouts[UDP_CT_MAX];
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ unsigned int offload_timeout;
++#endif
+ };
+
+ struct nf_icmp_net {
+diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
+index 336014b..ae698d1 100644
+--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
+@@ -105,14 +105,19 @@ enum ip_conntrack_status {
+ IPS_OFFLOAD_BIT = 14,
+ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+
++ /* Conntrack has been offloaded to hardware. */
++ IPS_HW_OFFLOAD_BIT = 15,
++ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
++
+ /* Be careful here, modifying these bits can make things messy,
+ * so don't let users modify them directly.
+ */
+ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+ IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
++ IPS_SEQ_ADJUST | IPS_TEMPLATE |
++ IPS_OFFLOAD | IPS_HW_OFFLOAD),
+
+- __IPS_MAX_BIT = 15,
++ __IPS_MAX_BIT = 16,
+ };
+
+ /* Connection tracking event types */
+diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
+new file mode 100644
+index 0000000..5841bbe
+--- /dev/null
++++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
+@@ -0,0 +1,17 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++#ifndef _XT_FLOWOFFLOAD_H
++#define _XT_FLOWOFFLOAD_H
++
++#include <linux/types.h>
++
++enum {
++ XT_FLOWOFFLOAD_HW = 1 << 0,
++
++ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
++};
++
++struct xt_flowoffload_target_info {
++ __u32 flags;
++};
++
++#endif /* _XT_FLOWOFFLOAD_H */
+diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
+index 0a3a167..6112266 100644
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
+ return real_dev->ifindex;
+ }
+
++static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
++ struct net_device_path *path)
++{
++ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
++
++ path->type = DEV_PATH_VLAN;
++ path->encap.id = vlan->vlan_id;
++ path->encap.proto = vlan->vlan_proto;
++ path->dev = ctx->dev;
++ ctx->dev = vlan->real_dev;
++ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
++ return -ENOSPC;
++
++ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
++ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
++ ctx->num_vlans++;
++
++ return 0;
++}
++
+ static const struct ethtool_ops vlan_ethtool_ops = {
+ .get_link_ksettings = vlan_ethtool_get_link_ksettings,
+ .get_drvinfo = vlan_ethtool_get_drvinfo,
+@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
+ #endif
+ .ndo_fix_features = vlan_dev_fix_features,
+ .ndo_get_iflink = vlan_dev_get_iflink,
++ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
+ };
+
+ static void vlan_dev_free(struct net_device *dev)
+diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
+index 501f77f..0940b44 100644
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
+ return br_del_if(br, slave_dev);
+ }
+
++static int br_fill_forward_path(struct net_device_path_ctx *ctx,
++ struct net_device_path *path)
++{
++ struct net_bridge_fdb_entry *f;
++ struct net_bridge_port *dst;
++ struct net_bridge *br;
++
++ if (netif_is_bridge_port(ctx->dev))
++ return -1;
++
++ br = netdev_priv(ctx->dev);
++
++ br_vlan_fill_forward_path_pvid(br, ctx, path);
++
++ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
++ if (!f || !f->dst)
++ return -1;
++
++ dst = READ_ONCE(f->dst);
++ if (!dst)
++ return -1;
++
++ if (br_vlan_fill_forward_path_mode(br, dst, path))
++ return -1;
++
++ path->type = DEV_PATH_BRIDGE;
++ path->dev = dst->br->dev;
++ ctx->dev = dst->dev;
++
++ switch (path->bridge.vlan_mode) {
++ case DEV_PATH_BR_VLAN_TAG:
++ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
++ return -ENOSPC;
++ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
++ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
++ ctx->num_vlans++;
++ break;
++ case DEV_PATH_BR_VLAN_UNTAG_HW:
++ case DEV_PATH_BR_VLAN_UNTAG:
++ ctx->num_vlans--;
++ break;
++ case DEV_PATH_BR_VLAN_KEEP:
++ break;
++ }
++
++ return 0;
++}
++
+ static const struct ethtool_ops br_ethtool_ops = {
+ .get_drvinfo = br_getinfo,
+ .get_link = ethtool_op_get_link,
+@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
+ .ndo_bridge_setlink = br_setlink,
+ .ndo_bridge_dellink = br_dellink,
+ .ndo_features_check = passthru_features_check,
++ .ndo_fill_forward_path = br_fill_forward_path,
+ };
+
+ static struct device_type br_type = {
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index a736be8..4bd9e9b 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
+ int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
+ void *ptr);
+
++void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
++ struct net_device_path_ctx *ctx,
++ struct net_device_path *path);
++int br_vlan_fill_forward_path_mode(struct net_bridge *br,
++ struct net_bridge_port *dst,
++ struct net_device_path *path);
++
+ static inline struct net_bridge_vlan_group *br_vlan_group(
+ const struct net_bridge *br)
+ {
+@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
+ return 0;
+ }
+
++static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
++ struct net_device_path_ctx *ctx,
++ struct net_device_path *path)
++{
++}
++
++static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
++ struct net_bridge_port *dst,
++ struct net_device_path *path)
++{
++ return 0;
++}
++
+ static inline struct net_bridge_vlan_group *br_vlan_group(
+ const struct net_bridge *br)
+ {
+diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
+index 9257292..bcfd169 100644
+--- a/net/bridge/br_vlan.c
++++ b/net/bridge/br_vlan.c
+@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
+ }
+ EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
+
++void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
++ struct net_device_path_ctx *ctx,
++ struct net_device_path *path)
++{
++ struct net_bridge_vlan_group *vg;
++ int idx = ctx->num_vlans - 1;
++ u16 vid;
++
++ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
++
++ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
++ return;
++
++ vg = br_vlan_group(br);
++
++ if (idx >= 0 &&
++ ctx->vlan[idx].proto == br->vlan_proto) {
++ vid = ctx->vlan[idx].id;
++ } else {
++ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
++ vid = br_get_pvid(vg);
++ }
++
++ path->bridge.vlan_id = vid;
++ path->bridge.vlan_proto = br->vlan_proto;
++}
++
++int br_vlan_fill_forward_path_mode(struct net_bridge *br,
++ struct net_bridge_port *dst,
++ struct net_device_path *path)
++{
++ struct net_bridge_vlan_group *vg;
++ struct net_bridge_vlan *v;
++
++ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
++ return 0;
++
++ vg = nbp_vlan_group_rcu(dst);
++ v = br_vlan_find(vg, path->bridge.vlan_id);
++ if (!v || !br_vlan_should_use(v))
++ return -EINVAL;
++
++ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
++ return 0;
++
++ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
++ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
++ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
++ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
++ else
++ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
++
++ return 0;
++}
++
+ int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
+ {
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 54cc544..a117bd0 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
++static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
++{
++ int k = stack->num_paths++;
++
++ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
++ return NULL;
++
++ return &stack->path[k];
++}
++
++int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
++ struct net_device_path_stack *stack)
++{
++ const struct net_device *last_dev;
++ struct net_device_path_ctx ctx = {
++ .dev = dev,
++ };
++ struct net_device_path *path;
++ int ret = 0;
++
++ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
++ stack->num_paths = 0;
++ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
++ last_dev = ctx.dev;
++ path = dev_fwd_path(stack);
++ if (!path)
++ return -1;
++
++ memset(path, 0, sizeof(struct net_device_path));
++ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
++ if (ret < 0)
++ return -1;
++
++ if (WARN_ON_ONCE(last_dev == ctx.dev))
++ return -1;
++ }
++ path = dev_fwd_path(stack);
++ if (!path)
++ return -1;
++ path->type = DEV_PATH_ETHERNET;
++ path->dev = ctx.dev;
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(dev_fill_forward_path);
++
+ /**
+ * __dev_get_by_name - find a device by its name
+ * @net: the applicable net namespace
+diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
+index ca80f86..35a1249 100644
+--- a/net/dsa/dsa.c
++++ b/net/dsa/dsa.c
+@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ }
+ EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+
++struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
++{
++ if (!netdev || !dsa_slave_dev_check(netdev))
++ return ERR_PTR(-ENODEV);
++
++ return dsa_slave_to_port(netdev);
++}
++EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
++
+ static int __init dsa_init_module(void)
+ {
+ int rc;
+diff --git a/net/dsa/slave.c b/net/dsa/slave.c
+index e2b91b3..2dfaa1e 100644
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -1031,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
+ }
+ }
+
++static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
++ void *type_data)
++{
++ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
++ struct net_device *master = cpu_dp->master;
++
++ if (!master->netdev_ops->ndo_setup_tc)
++ return -EOPNOTSUPP;
++
++ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
++}
++
+ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+ {
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+- if (type == TC_SETUP_BLOCK)
++ switch (type) {
++ case TC_SETUP_BLOCK:
+ return dsa_slave_setup_tc_block(dev, type_data);
++ case TC_SETUP_FT:
++ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
++ default:
++ break;
++ }
+
+ if (!ds->ops->port_setup_tc)
+ return -EOPNOTSUPP;
+@@ -1224,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
+ return dp->ds->devlink ? &dp->devlink_port : NULL;
+ }
+
++static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
++ struct net_device_path *path)
++{
++ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
++ struct dsa_port *cpu_dp = dp->cpu_dp;
++
++ path->dev = ctx->dev;
++ path->type = DEV_PATH_DSA;
++ path->dsa.proto = cpu_dp->tag_ops->proto;
++ path->dsa.port = dp->index;
++ ctx->dev = cpu_dp->master;
++
++ return 0;
++}
++
+ static const struct net_device_ops dsa_slave_netdev_ops = {
+ .ndo_open = dsa_slave_open,
+ .ndo_stop = dsa_slave_close,
+@@ -1248,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
+ .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
+ .ndo_get_devlink_port = dsa_slave_get_devlink_port,
++ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
+ };
+
+ static struct device_type dsa_type = {
+@@ -1499,6 +1533,7 @@ bool dsa_slave_dev_check(const struct net_device *dev)
+ {
+ return dev->netdev_ops == &dsa_slave_netdev_ops;
+ }
++EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
+
+ static int dsa_slave_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
+index f17b402..803b92e 100644
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -56,8 +56,6 @@ config NF_TABLES_ARP
+ help
+ This option enables the ARP support for nf_tables.
+
+-endif # NF_TABLES
+-
+ config NF_FLOW_TABLE_IPV4
+ tristate "Netfilter flow table IPv4 module"
+ depends on NF_FLOW_TABLE
+@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
+
+ To compile it as a module, choose M here.
+
++endif # NF_TABLES
++
+ config NF_DUP_IPV4
+ tristate "Netfilter IPv4 packet duplication to alternate destination"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 8231a7a..7176d7f 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
+ }
+ }
+
+- mtu = ip6_dst_mtu_forward(dst);
++ mtu = ip6_dst_mtu_maybe_forward(dst, true);
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
+index 69443e9..0b481d2 100644
+--- a/net/ipv6/netfilter/Kconfig
++++ b/net/ipv6/netfilter/Kconfig
+@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
+ multicast or blackhole.
+
+ endif # NF_TABLES_IPV6
+-endif # NF_TABLES
+
+ config NF_FLOW_TABLE_IPV6
+ tristate "Netfilter flow table IPv6 module"
+@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
+
+ To compile it as a module, choose M here.
+
++endif # NF_TABLES
++
+ config NF_DUP_IPV6
+ tristate "Netfilter IPv6 packet duplication to alternate destination"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 43d185c..82a752c 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -83,7 +83,7 @@ enum rt6_nud_state {
+
+ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+ static unsigned int ip6_default_advmss(const struct dst_entry *dst);
+-static unsigned int ip6_mtu(const struct dst_entry *dst);
++static unsigned int ip6_mtu(const struct dst_entry *dst);
+ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+ static void ip6_dst_destroy(struct dst_entry *);
+ static void ip6_dst_ifdown(struct dst_entry *,
+@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
+
+ static unsigned int ip6_mtu(const struct dst_entry *dst)
+ {
+- struct inet6_dev *idev;
+- unsigned int mtu;
+-
+- mtu = dst_metric_raw(dst, RTAX_MTU);
+- if (mtu)
+- goto out;
+-
+- mtu = IPV6_MIN_MTU;
+-
+- rcu_read_lock();
+- idev = __in6_dev_get(dst->dev);
+- if (idev)
+- mtu = idev->cnf.mtu6;
+- rcu_read_unlock();
+-
+-out:
+- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+-
+- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
++ return ip6_dst_mtu_maybe_forward(dst, false);
+ }
+
+ /* MTU selection:
+diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
+index b6e0a62..5d690ab 100644
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -689,8 +689,6 @@ config NFT_FIB_NETDEV
+
+ endif # NF_TABLES_NETDEV
+
+-endif # NF_TABLES
+-
+ config NF_FLOW_TABLE_INET
+ tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ depends on NF_FLOW_TABLE
+@@ -699,11 +697,12 @@ config NF_FLOW_TABLE_INET
+
+ To compile it as a module, choose M here.
+
++endif # NF_TABLES
++
+ config NF_FLOW_TABLE
+ tristate "Netfilter flow table module"
+ depends on NETFILTER_INGRESS
+ depends on NF_CONNTRACK
+- depends on NF_TABLES
+ help
+ This option adds the flow table core infrastructure.
+
+@@ -983,6 +982,15 @@ config NETFILTER_XT_TARGET_NOTRACK
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_TARGET_CT
+
++config NETFILTER_XT_TARGET_FLOWOFFLOAD
++ tristate '"FLOWOFFLOAD" target support'
++ depends on NF_FLOW_TABLE
++ depends on NETFILTER_INGRESS
++ help
++ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
++ module to speed up processing of packets by bypassing the usual
++ netfilter chains
++
+ config NETFILTER_XT_TARGET_RATEEST
+ tristate '"RATEEST" target support'
+ depends on NETFILTER_ADVANCED
+diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
+index 4fc075b..d93a121 100644
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
+
+ # flow table infrastructure
+ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
+-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
++nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
++ nf_flow_table_offload.o
+
+ obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+
+@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
++obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index f6ab6f4..f689e19 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -864,9 +864,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
+ }
+ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
+
+-static inline void nf_ct_acct_update(struct nf_conn *ct,
+- enum ip_conntrack_info ctinfo,
+- unsigned int len)
++void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
++ unsigned int bytes)
+ {
+ struct nf_conn_acct *acct;
+
+@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
+ if (acct) {
+ struct nf_conn_counter *counter = acct->counter;
+
+- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
++ atomic64_add(packets, &counter[dir].packets);
++ atomic64_add(bytes, &counter[dir].bytes);
+ }
+ }
++EXPORT_SYMBOL_GPL(nf_ct_acct_add);
+
+ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ const struct nf_conn *loser_ct)
+@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+
+ /* u32 should be fine since we must have seen one packet. */
+ bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
+- nf_ct_acct_update(ct, ctinfo, bytes);
++ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
+ }
+ }
+
+@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
+
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
++ nf_ct_offload_timeout(tmp);
+ continue;
++ }
+
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
+ WRITE_ONCE(ct->timeout, extra_jiffies);
+ acct:
+ if (do_acct)
+- nf_ct_acct_update(ct, ctinfo, skb->len);
++ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
+ }
+ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
+
+@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
+ {
+- nf_ct_acct_update(ct, ctinfo, skb->len);
++ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
+
+ return nf_ct_delete(ct, 0, 0);
+ }
+diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
+index e219b6f..5cdc627 100644
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -1463,6 +1463,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
+ tn->tcp_loose = nf_ct_tcp_loose;
+ tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
+ tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
++
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ tn->offload_timeout = 30 * HZ;
++#endif
+ }
+
+ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
+diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
+index e3a2d01..a1579d6 100644
+--- a/net/netfilter/nf_conntrack_proto_udp.c
++++ b/net/netfilter/nf_conntrack_proto_udp.c
+@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
+
+ for (i = 0; i < UDP_CT_MAX; i++)
+ un->timeouts[i] = udp_timeouts[i];
++
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ un->offload_timeout = 30 * HZ;
++#endif
+ }
+
+ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
+diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
+index 0b600b4..a2cfafa 100644
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
+ if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ goto release;
+
+- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
++ seq_puts(s, "[HW_OFFLOAD] ");
++ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_puts(s, "[OFFLOAD] ");
+ else if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ seq_puts(s, "[ASSURED] ");
+@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
++#endif
+ NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+ NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+ NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
++#endif
+ NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+ #ifdef CONFIG_NF_CT_PROTO_SCTP
+@@ -811,6 +819,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
++ .procname = "nf_flowtable_tcp_timeout",
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_jiffies,
++ },
++#endif
+ [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+ .procname = "nf_conntrack_tcp_loose",
+ .maxlen = sizeof(int),
+@@ -845,6 +861,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
++ .procname = "nf_flowtable_udp_timeout",
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_jiffies,
++ },
++#endif
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+ .procname = "nf_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+@@ -1021,6 +1045,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
+ XASSIGN(LIBERAL, &tn->tcp_be_liberal);
+ XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+ #undef XASSIGN
++
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
++#endif
++
+ }
+
+ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
+@@ -1107,6 +1136,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
++#endif
+
+ nf_conntrack_standalone_init_tcp_sysctl(net, table);
+ nf_conntrack_standalone_init_sctp_sysctl(net, table);
+diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
+index f212cec..c3054af 100644
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -7,43 +7,21 @@
+ #include <linux/netdevice.h>
+ #include <net/ip.h>
+ #include <net/ip6_route.h>
+-#include <net/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_core.h>
+ #include <net/netfilter/nf_conntrack_l4proto.h>
+ #include <net/netfilter/nf_conntrack_tuple.h>
+
+-struct flow_offload_entry {
+- struct flow_offload flow;
+- struct nf_conn *ct;
+- struct rcu_head rcu_head;
+-};
+-
+ static DEFINE_MUTEX(flowtable_lock);
+ static LIST_HEAD(flowtables);
+
+-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
+-{
+- const struct rt6_info *rt;
+-
+- if (flow_tuple->l3proto == NFPROTO_IPV6) {
+- rt = (const struct rt6_info *)flow_tuple->dst_cache;
+- return rt6_get_cookie(rt);
+- }
+-
+- return 0;
+-}
+-
+ static void
+-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+- struct nf_flow_route *route,
++flow_offload_fill_dir(struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir)
+ {
+ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+- struct dst_entry *other_dst = route->tuple[!dir].dst;
+- struct dst_entry *dst = route->tuple[dir].dst;
++ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
+
+ ft->dir = dir;
+
+@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+ case NFPROTO_IPV4:
+ ft->src_v4 = ctt->src.u3.in;
+ ft->dst_v4 = ctt->dst.u3.in;
+- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+ break;
+ case NFPROTO_IPV6:
+ ft->src_v6 = ctt->src.u3.in6;
+ ft->dst_v6 = ctt->dst.u3.in6;
+- ft->mtu = ip6_dst_mtu_forward(dst);
+ break;
+ }
+
+@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+ ft->l4proto = ctt->dst.protonum;
+ ft->src_port = ctt->src.u.tcp.port;
+ ft->dst_port = ctt->dst.u.tcp.port;
+-
+- ft->iifidx = other_dst->dev->ifindex;
+- ft->dst_cache = dst;
+- ft->dst_cookie = flow_offload_dst_cookie(ft);
+ }
+
+-struct flow_offload *
+-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
++struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
+ {
+- struct flow_offload_entry *entry;
+ struct flow_offload *flow;
+
+ if (unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
+ return NULL;
+
+- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+- if (!entry)
++ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
++ if (!flow)
+ goto err_ct_refcnt;
+
+- flow = &entry->flow;
+-
+- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+- goto err_dst_cache_original;
+-
+- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+- goto err_dst_cache_reply;
++ flow->ct = ct;
+
+- entry->ct = ct;
+-
+- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
++ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
++ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
+
+ if (ct->status & IPS_SRC_NAT)
+- flow->flags |= FLOW_OFFLOAD_SNAT;
++ __set_bit(NF_FLOW_SNAT, &flow->flags);
+ if (ct->status & IPS_DST_NAT)
+- flow->flags |= FLOW_OFFLOAD_DNAT;
++ __set_bit(NF_FLOW_DNAT, &flow->flags);
+
+ return flow;
+
+-err_dst_cache_reply:
+- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+-err_dst_cache_original:
+- kfree(entry);
+ err_ct_refcnt:
+ nf_ct_put(ct);
+
+@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_alloc);
+
+-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
++static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
+ {
+- tcp->state = TCP_CONNTRACK_ESTABLISHED;
+- tcp->seen[0].td_maxwin = 0;
+- tcp->seen[1].td_maxwin = 0;
++ const struct rt6_info *rt;
++
++ if (flow_tuple->l3proto == NFPROTO_IPV6) {
++ rt = (const struct rt6_info *)flow_tuple->dst_cache;
++ return rt6_get_cookie(rt);
++ }
++
++ return 0;
+ }
+
+-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
+-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
++static int flow_offload_fill_route(struct flow_offload *flow,
++ const struct nf_flow_route *route,
++ enum flow_offload_tuple_dir dir)
++{
++ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
++ struct dst_entry *dst = route->tuple[dir].dst;
++ int i, j = 0;
++
++ switch (flow_tuple->l3proto) {
++ case NFPROTO_IPV4:
++ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
++ break;
++ case NFPROTO_IPV6:
++ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
++ break;
++ }
++
++ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
++ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
++ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
++ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
++ if (route->tuple[dir].in.ingress_vlans & BIT(i))
++ flow_tuple->in_vlan_ingress |= BIT(j);
++ j++;
++ }
++ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
++
++ switch (route->tuple[dir].xmit_type) {
++ case FLOW_OFFLOAD_XMIT_DIRECT:
++ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
++ ETH_ALEN);
++ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
++ ETH_ALEN);
++ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
++ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
++ break;
++ case FLOW_OFFLOAD_XMIT_XFRM:
++ case FLOW_OFFLOAD_XMIT_NEIGH:
++ if (!dst_hold_safe(route->tuple[dir].dst))
++ return -1;
++
++ flow_tuple->dst_cache = dst;
++ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ break;
++ }
++ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
++
++ return 0;
++}
++
++static void nft_flow_dst_release(struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir)
++{
++ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
++ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
++ dst_release(flow->tuplehash[dir].tuple.dst_cache);
++}
++
++int flow_offload_route_init(struct flow_offload *flow,
++ const struct nf_flow_route *route)
++{
++ int err;
++
++ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
++ if (err < 0)
++ return err;
++
++ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
++ if (err < 0)
++ goto err_route_reply;
++
++ flow->type = NF_FLOW_OFFLOAD_ROUTE;
++
++ return 0;
++
++err_route_reply:
++ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
++
++ return err;
++}
++EXPORT_SYMBOL_GPL(flow_offload_route_init);
+
+-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
++static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+ {
+- return (__s32)(timeout - (u32)jiffies);
++ tcp->state = TCP_CONNTRACK_ESTABLISHED;
++ tcp->seen[0].td_maxwin = 0;
++ tcp->seen[1].td_maxwin = 0;
+ }
+
+ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+ {
+- const struct nf_conntrack_l4proto *l4proto;
++ struct net *net = nf_ct_net(ct);
+ int l4num = nf_ct_protonum(ct);
+- unsigned int timeout;
++ s32 timeout;
+
+- l4proto = nf_ct_l4proto_find(l4num);
+- if (!l4proto)
+- return;
++ if (l4num == IPPROTO_TCP) {
++ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+- if (l4num == IPPROTO_TCP)
+- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
+- else if (l4num == IPPROTO_UDP)
+- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
+- else
++ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++ timeout -= tn->offload_timeout;
++ } else if (l4num == IPPROTO_UDP) {
++ struct nf_udp_net *tn = nf_udp_pernet(net);
++
++ timeout = tn->timeouts[UDP_CT_REPLIED];
++ timeout -= tn->offload_timeout;
++ } else {
+ return;
++ }
++
++ if (timeout < 0)
++ timeout = 0;
+
+- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
+- ct->timeout = nfct_time_stamp + timeout;
++ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
++ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+
+ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
+ flow_offload_fixup_ct_timeout(ct);
+ }
+
+-void flow_offload_free(struct flow_offload *flow)
++static void flow_offload_route_release(struct flow_offload *flow)
+ {
+- struct flow_offload_entry *e;
++ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
++ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
++}
+
+- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+- e = container_of(flow, struct flow_offload_entry, flow);
+- if (flow->flags & FLOW_OFFLOAD_DYING)
+- nf_ct_delete(e->ct, 0, 0);
+- nf_ct_put(e->ct);
+- kfree_rcu(e, rcu_head);
++void flow_offload_free(struct flow_offload *flow)
++{
++ switch (flow->type) {
++ case NF_FLOW_OFFLOAD_ROUTE:
++ flow_offload_route_release(flow);
++ break;
++ default:
++ break;
++ }
++ nf_ct_put(flow->ct);
++ kfree_rcu(flow, rcu_head);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_free);
+
+@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+ {
+ const struct flow_offload_tuple *tuple = data;
+
+- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
++ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
+ }
+
+ static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+ {
+ const struct flow_offload_tuple_rhash *tuplehash = data;
+
+- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
++ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
+ }
+
+ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ const struct flow_offload_tuple *tuple = arg->key;
+ const struct flow_offload_tuple_rhash *x = ptr;
+
+- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
++ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
+ return 1;
+
+ return 0;
+@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
+ .automatic_shrinking = true,
+ };
+
+-#define DAY (86400 * HZ)
+-
+-/* Set an arbitrary timeout large enough not to ever expire, this save
+- * us a check for the IPS_OFFLOAD_BIT from the packet path via
+- * nf_ct_is_expired().
+- */
+-static void nf_ct_offload_timeout(struct flow_offload *flow)
++unsigned long flow_offload_get_timeout(struct flow_offload *flow)
+ {
+- struct flow_offload_entry *entry;
+- struct nf_conn *ct;
++ unsigned long timeout = NF_FLOW_TIMEOUT;
++ struct net *net = nf_ct_net(flow->ct);
++ int l4num = nf_ct_protonum(flow->ct);
+
+- entry = container_of(flow, struct flow_offload_entry, flow);
+- ct = entry->ct;
++ if (l4num == IPPROTO_TCP) {
++ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+- if (nf_ct_expires(ct) < DAY / 2)
+- ct->timeout = nfct_time_stamp + DAY;
++ timeout = tn->offload_timeout;
++ } else if (l4num == IPPROTO_UDP) {
++ struct nf_udp_net *tn = nf_udp_pernet(net);
++
++ timeout = tn->offload_timeout;
++ }
++
++ return timeout;
+ }
+
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ {
+ int err;
+
+- nf_ct_offload_timeout(flow);
+- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+
+ err = rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[0].node,
+@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ return err;
+ }
+
++ nf_ct_offload_timeout(flow->ct);
++
++ if (nf_flowtable_hw_offload(flow_table)) {
++ __set_bit(NF_FLOW_HW, &flow->flags);
++ nf_flow_offload_add(flow_table, flow);
++ }
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_add);
+
++void flow_offload_refresh(struct nf_flowtable *flow_table,
++ struct flow_offload *flow)
++{
++ u32 timeout;
++
++ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
++ if (timeout - READ_ONCE(flow->timeout) > HZ)
++ WRITE_ONCE(flow->timeout, timeout);
++ else
++ return;
++
++ if (likely(!nf_flowtable_hw_offload(flow_table)))
++ return;
++
++ nf_flow_offload_add(flow_table, flow);
++}
++EXPORT_SYMBOL_GPL(flow_offload_refresh);
++
+ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+ {
+ return nf_flow_timeout_delta(flow->timeout) <= 0;
+@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+ static void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+ {
+- struct flow_offload_entry *e;
+-
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ nf_flow_offload_rhash_params);
+@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ nf_flow_offload_rhash_params);
+
+- e = container_of(flow, struct flow_offload_entry, flow);
+- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
++ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+
+ if (nf_flow_has_expired(flow))
+- flow_offload_fixup_ct(e->ct);
+- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
+- flow_offload_fixup_ct_timeout(e->ct);
+-
+- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
+- flow_offload_fixup_ct_state(e->ct);
++ flow_offload_fixup_ct(flow->ct);
++ else
++ flow_offload_fixup_ct_timeout(flow->ct);
+
+ flow_offload_free(flow);
+ }
+
+ void flow_offload_teardown(struct flow_offload *flow)
+ {
+- struct flow_offload_entry *e;
+-
+- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
++ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+
+- e = container_of(flow, struct flow_offload_entry, flow);
+- flow_offload_fixup_ct_state(e->ct);
++ flow_offload_fixup_ct_state(flow->ct);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
+@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
+ {
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct flow_offload *flow;
+- struct flow_offload_entry *e;
+ int dir;
+
+ tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
+@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
++ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
+ return NULL;
+
+- e = container_of(flow, struct flow_offload_entry, flow);
+- if (unlikely(nf_ct_is_dying(e->ct)))
++ if (unlikely(nf_ct_is_dying(flow->ct)))
+ return NULL;
+
+ return tuplehash;
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+-static int
+-nf_flow_table_iterate(struct nf_flowtable *flow_table,
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data)
+ {
+@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+-
+ if (IS_ERR(tuplehash)) {
+ if (PTR_ERR(tuplehash) != -EAGAIN) {
+ err = PTR_ERR(tuplehash);
+@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+
+ return err;
+ }
++EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+
+-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
++static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
+ {
+- struct nf_flowtable *flow_table = data;
+- struct flow_offload_entry *e;
+- bool teardown;
++ struct dst_entry *dst;
+
+- e = container_of(flow, struct flow_offload_entry, flow);
++ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
++ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
++ dst = tuple->dst_cache;
++ if (!dst_check(dst, tuple->dst_cookie))
++ return true;
++ }
+
+- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
+- FLOW_OFFLOAD_TEARDOWN);
++ return false;
++}
+
+- if (!teardown)
+- nf_ct_offload_timeout(flow);
++static bool nf_flow_has_stale_dst(struct flow_offload *flow)
++{
++ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
++ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
++}
+
+- if (nf_flow_has_expired(flow) || teardown)
+- flow_offload_del(flow_table, flow);
++static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
++{
++ struct nf_flowtable *flow_table = data;
++
++ if (nf_flow_has_expired(flow) ||
++ nf_ct_is_dying(flow->ct) ||
++ nf_flow_has_stale_dst(flow))
++ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
++
++ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
++ if (test_bit(NF_FLOW_HW, &flow->flags)) {
++ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
++ nf_flow_offload_stats(flow_table, flow, true);
++
++ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
++ nf_flow_offload_del(flow_table, flow);
++ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
++ flow_offload_del(flow_table, flow);
++ } else {
++ flow_offload_del(flow_table, flow);
++ }
++ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
++ nf_flow_offload_stats(flow_table, flow, false);
++ }
+ }
+
+ static void nf_flow_offload_work_gc(struct work_struct *work)
+@@ -387,30 +487,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+ }
+
+-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+- __be16 port, __be16 new_port)
++static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
++ __be16 port, __be16 new_port)
+ {
+ struct tcphdr *tcph;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+- return -1;
+-
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
+-
+- return 0;
+ }
+
+-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+- __be16 port, __be16 new_port)
++static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
++ __be16 port, __be16 new_port)
+ {
+ struct udphdr *udph;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*udph)))
+- return -1;
+-
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace2(&udph->check, skb, port,
+@@ -418,38 +508,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+-
+- return 0;
+ }
+
+-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+- u8 protocol, __be16 port, __be16 new_port)
++static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
++ u8 protocol, __be16 port, __be16 new_port)
+ {
+ switch (protocol) {
+ case IPPROTO_TCP:
+- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+- return NF_DROP;
++ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
+ break;
+ case IPPROTO_UDP:
+- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+- return NF_DROP;
++ nf_flow_nat_port_udp(skb, thoff, port, new_port);
+ break;
+ }
+-
+- return 0;
+ }
+
+-int nf_flow_snat_port(const struct flow_offload *flow,
+- struct sk_buff *skb, unsigned int thoff,
+- u8 protocol, enum flow_offload_tuple_dir dir)
++void nf_flow_snat_port(const struct flow_offload *flow,
++ struct sk_buff *skb, unsigned int thoff,
++ u8 protocol, enum flow_offload_tuple_dir dir)
+ {
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+- return -1;
+-
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+@@ -463,25 +543,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+- default:
+- return -1;
+ }
+
+- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+
+-int nf_flow_dnat_port(const struct flow_offload *flow,
+- struct sk_buff *skb, unsigned int thoff,
+- u8 protocol, enum flow_offload_tuple_dir dir)
++void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
++ unsigned int thoff, u8 protocol,
++ enum flow_offload_tuple_dir dir)
+ {
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+- return -1;
+-
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+@@ -495,11 +569,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+- default:
+- return -1;
+ }
+
+- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
+@@ -507,7 +579,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
+ {
+ int err;
+
+- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
++ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
++ flow_block_init(&flowtable->flow_block);
++ init_rwsem(&flowtable->flow_block_lock);
+
+ err = rhashtable_init(&flowtable->rhashtable,
+ &nf_flow_offload_rhash_params);
+@@ -528,25 +602,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
+ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+ {
+ struct net_device *dev = data;
+- struct flow_offload_entry *e;
+-
+- e = container_of(flow, struct flow_offload_entry, flow);
+
+ if (!dev) {
+ flow_offload_teardown(flow);
+ return;
+ }
+- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
++
++ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
+ (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+ flow->tuplehash[1].tuple.iifidx == dev->ifindex))
+- flow_offload_dead(flow);
++ flow_offload_teardown(flow);
+ }
+
+-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+- struct net_device *dev)
++void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
++ struct net_device *dev)
+ {
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
+ flush_delayed_work(&flowtable->gc_work);
++ nf_flow_table_offload_flush(flowtable);
+ }
+
+ void nf_flow_table_cleanup(struct net_device *dev)
+@@ -555,7 +628,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
+
+ mutex_lock(&flowtable_lock);
+ list_for_each_entry(flowtable, &flowtables, list)
+- nf_flow_table_iterate_cleanup(flowtable, dev);
++ nf_flow_table_gc_cleanup(flowtable, dev);
+ mutex_unlock(&flowtable_lock);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+@@ -565,9 +638,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ mutex_lock(&flowtable_lock);
+ list_del(&flow_table->list);
+ mutex_unlock(&flowtable_lock);
++
+ cancel_delayed_work_sync(&flow_table->gc_work);
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
++ nf_flow_table_offload_flush(flow_table);
++ if (nf_flowtable_hw_offload(flow_table))
++ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
++ flow_table);
+ rhashtable_destroy(&flow_table->rhashtable);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
+@@ -591,12 +669,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
+
+ static int __init nf_flow_table_module_init(void)
+ {
+- return register_netdevice_notifier(&flow_offload_netdev_notifier);
++ int ret;
++
++ ret = nf_flow_table_offload_init();
++ if (ret)
++ return ret;
++
++ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
++ if (ret)
++ nf_flow_table_offload_exit();
++
++ return ret;
+ }
+
+ static void __exit nf_flow_table_module_exit(void)
+ {
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++ nf_flow_table_offload_exit();
+ }
+
+ module_init(nf_flow_table_module_init);
+@@ -604,3 +693,4 @@ module_exit(nf_flow_table_module_exit);
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_DESCRIPTION("Netfilter flow table module");
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index 397129b..6257d87 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -7,11 +7,13 @@
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
+ #include <linux/netdevice.h>
++#include <linux/if_ether.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+ #include <net/neighbour.h>
+ #include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_conntrack_acct.h>
+ /* For layer 4 checksum field offset. */
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
+@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
+ if (proto != IPPROTO_TCP)
+ return 0;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+- return -1;
+-
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ if (unlikely(tcph->fin || tcph->rst)) {
+ flow_offload_teardown(flow);
+@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
+ return 0;
+ }
+
+-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+- __be32 addr, __be32 new_addr)
++static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
++ __be32 addr, __be32 new_addr)
+ {
+ struct tcphdr *tcph;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+- return -1;
+-
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+-
+- return 0;
+ }
+
+-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+- __be32 addr, __be32 new_addr)
++static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
++ __be32 addr, __be32 new_addr)
+ {
+ struct udphdr *udph;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*udph)))
+- return -1;
+-
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&udph->check, skb, addr,
+@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+-
+- return 0;
+ }
+
+-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+- unsigned int thoff, __be32 addr,
+- __be32 new_addr)
++static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
++ unsigned int thoff, __be32 addr,
++ __be32 new_addr)
+ {
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+- return NF_DROP;
++ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
+ break;
+ case IPPROTO_UDP:
+- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+- return NF_DROP;
++ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
+ break;
+ }
+-
+- return 0;
+ }
+
+-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+- struct iphdr *iph, unsigned int thoff,
+- enum flow_offload_tuple_dir dir)
++static void nf_flow_snat_ip(const struct flow_offload *flow,
++ struct sk_buff *skb, struct iphdr *iph,
++ unsigned int thoff, enum flow_offload_tuple_dir dir)
+ {
+ __be32 addr, new_addr;
+
+@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+- default:
+- return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ }
+
+-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+- struct iphdr *iph, unsigned int thoff,
+- enum flow_offload_tuple_dir dir)
++static void nf_flow_dnat_ip(const struct flow_offload *flow,
++ struct sk_buff *skb, struct iphdr *iph,
++ unsigned int thoff, enum flow_offload_tuple_dir dir)
+ {
+ __be32 addr, new_addr;
+
+@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+- default:
+- return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ }
+
+-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+- unsigned int thoff, enum flow_offload_tuple_dir dir)
++static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++ unsigned int thoff, enum flow_offload_tuple_dir dir,
++ struct iphdr *iph)
+ {
+- struct iphdr *iph = ip_hdr(skb);
+-
+- if (flow->flags & FLOW_OFFLOAD_SNAT &&
+- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+- return -1;
+- if (flow->flags & FLOW_OFFLOAD_DNAT &&
+- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+- return -1;
+-
+- return 0;
++ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
++ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
++ }
++ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
++ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
++ }
+ }
+
+ static bool ip_has_options(unsigned int thoff)
+@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
+ return thoff != sizeof(struct iphdr);
+ }
+
++static void nf_flow_tuple_encap(struct sk_buff *skb,
++ struct flow_offload_tuple *tuple)
++{
++ struct vlan_ethhdr *veth;
++ struct pppoe_hdr *phdr;
++ int i = 0;
++
++ if (skb_vlan_tag_present(skb)) {
++ tuple->encap[i].id = skb_vlan_tag_get(skb);
++ tuple->encap[i].proto = skb->vlan_proto;
++ i++;
++ }
++ switch (skb->protocol) {
++ case htons(ETH_P_8021Q):
++ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
++ tuple->encap[i].proto = skb->protocol;
++ break;
++ case htons(ETH_P_PPP_SES):
++ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
++ tuple->encap[i].id = ntohs(phdr->sid);
++ tuple->encap[i].proto = skb->protocol;
++ break;
++ }
++}
++
+ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+- struct flow_offload_tuple *tuple)
++ struct flow_offload_tuple *tuple, u32 *hdrsize,
++ u32 offset)
+ {
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+- if (!pskb_may_pull(skb, sizeof(*iph)))
++ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+ return -1;
+
+- iph = ip_hdr(skb);
+- thoff = iph->ihl * 4;
++ iph = (struct iphdr *)(skb_network_header(skb) + offset);
++ thoff = (iph->ihl * 4);
+
+ if (ip_is_fragment(iph) ||
+ unlikely(ip_has_options(thoff)))
+ return -1;
+
+- if (iph->protocol != IPPROTO_TCP &&
+- iph->protocol != IPPROTO_UDP)
++ thoff += offset;
++
++ switch (iph->protocol) {
++ case IPPROTO_TCP:
++ *hdrsize = sizeof(struct tcphdr);
++ break;
++ case IPPROTO_UDP:
++ *hdrsize = sizeof(struct udphdr);
++ break;
++ default:
+ return -1;
++ }
+
+ if (iph->ttl <= 1)
+ return -1;
+
+- thoff = iph->ihl * 4;
+- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++ if (!pskb_may_pull(skb, thoff + *hdrsize))
+ return -1;
+
+- iph = ip_hdr(skb);
++ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+ tuple->iifidx = dev->ifindex;
++ nf_flow_tuple_encap(skb, tuple);
+
+ return 0;
+ }
+@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+ return NF_STOLEN;
+ }
+
++static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
++ u32 *offset)
++{
++ struct vlan_ethhdr *veth;
++
++ switch (skb->protocol) {
++ case htons(ETH_P_8021Q):
++ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++ if (veth->h_vlan_encapsulated_proto == proto) {
++ *offset += VLAN_HLEN;
++ return true;
++ }
++ break;
++ case htons(ETH_P_PPP_SES):
++ if (nf_flow_pppoe_proto(skb) == proto) {
++ *offset += PPPOE_SES_HLEN;
++ return true;
++ }
++ break;
++ }
++
++ return false;
++}
++
++static void nf_flow_encap_pop(struct sk_buff *skb,
++ struct flow_offload_tuple_rhash *tuplehash)
++{
++ struct vlan_hdr *vlan_hdr;
++ int i;
++
++ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
++ if (skb_vlan_tag_present(skb)) {
++ __vlan_hwaccel_clear_tag(skb);
++ continue;
++ }
++ switch (skb->protocol) {
++ case htons(ETH_P_8021Q):
++ vlan_hdr = (struct vlan_hdr *)skb->data;
++ __skb_pull(skb, VLAN_HLEN);
++ vlan_set_encap_proto(skb, vlan_hdr);
++ skb_reset_network_header(skb);
++ break;
++ case htons(ETH_P_PPP_SES):
++ skb->protocol = nf_flow_pppoe_proto(skb);
++ skb_pull(skb, PPPOE_SES_HLEN);
++ skb_reset_network_header(skb);
++ break;
++ }
++ }
++}
++
++static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
++ const struct flow_offload_tuple_rhash *tuplehash,
++ unsigned short type)
++{
++ struct net_device *outdev;
++
++ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
++ if (!outdev)
++ return NF_DROP;
++
++ skb->dev = outdev;
++ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
++ tuplehash->tuple.out.h_source, skb->len);
++ dev_queue_xmit(skb);
++
++ return NF_STOLEN;
++}
++
+ unsigned int
+ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
++ u32 hdrsize, offset = 0;
++ unsigned int thoff, mtu;
+ struct rtable *rt;
+- unsigned int thoff;
+ struct iphdr *iph;
+ __be32 nexthop;
++ int ret;
+
+- if (skb->protocol != htons(ETH_P_IP))
++ if (skb->protocol != htons(ETH_P_IP) &&
++ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
+ return NF_ACCEPT;
+
+- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
++ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+- outdev = rt->dst.dev;
+-
+- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+- return NF_ACCEPT;
+
+- if (skb_try_make_writable(skb, sizeof(*iph)))
+- return NF_DROP;
+-
+- thoff = ip_hdr(skb)->ihl * 4;
+- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
++ mtu = flow->tuplehash[dir].tuple.mtu + offset;
++ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
+ return NF_ACCEPT;
+
+- if (!dst_check(&rt->dst, 0)) {
+- flow_offload_teardown(flow);
++ iph = (struct iphdr *)(skb_network_header(skb) + offset);
++ thoff = (iph->ihl * 4) + offset;
++ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
+ return NF_ACCEPT;
+- }
+
+- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
++ if (skb_try_make_writable(skb, thoff + hdrsize))
+ return NF_DROP;
+
+- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++ flow_offload_refresh(flow_table, flow);
++
++ nf_flow_encap_pop(skb, tuplehash);
++ thoff -= offset;
++
+ iph = ip_hdr(skb);
++ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
++
+ ip_decrease_ttl(iph);
+ skb->tstamp = 0;
+
+- if (unlikely(dst_xfrm(&rt->dst))) {
++ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
++ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
++
++ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
++ rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ memset(skb->cb, 0, sizeof(struct inet_skb_parm));
+ IPCB(skb)->iif = skb->dev->ifindex;
+ IPCB(skb)->flags = IPSKB_FORWARDED;
+ return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+ }
+
+- skb->dev = outdev;
+- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+- skb_dst_set_noref(skb, &rt->dst);
+- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
++ switch (tuplehash->tuple.xmit_type) {
++ case FLOW_OFFLOAD_XMIT_NEIGH:
++ rt = (struct rtable *)tuplehash->tuple.dst_cache;
++ outdev = rt->dst.dev;
++ skb->dev = outdev;
++ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
++ skb_dst_set_noref(skb, &rt->dst);
++ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
++ ret = NF_STOLEN;
++ break;
++ case FLOW_OFFLOAD_XMIT_DIRECT:
++ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
++ if (ret == NF_DROP)
++ flow_offload_teardown(flow);
++ break;
++ }
+
+- return NF_STOLEN;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+- struct in6_addr *addr,
+- struct in6_addr *new_addr)
++static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
++ struct in6_addr *addr,
++ struct in6_addr *new_addr,
++ struct ipv6hdr *ip6h)
+ {
+ struct tcphdr *tcph;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+- return -1;
+-
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+-
+- return 0;
+ }
+
+-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+- struct in6_addr *addr,
+- struct in6_addr *new_addr)
++static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
++ struct in6_addr *addr,
++ struct in6_addr *new_addr)
+ {
+ struct udphdr *udph;
+
+- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+- skb_try_make_writable(skb, thoff + sizeof(*udph)))
+- return -1;
+-
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+-
+- return 0;
+ }
+
+-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+- unsigned int thoff, struct in6_addr *addr,
+- struct in6_addr *new_addr)
++static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
++ unsigned int thoff, struct in6_addr *addr,
++ struct in6_addr *new_addr)
+ {
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+- return NF_DROP;
++ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
+ break;
+ case IPPROTO_UDP:
+- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+- return NF_DROP;
++ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
+ break;
+ }
+-
+- return 0;
+ }
+
+-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+- struct sk_buff *skb, struct ipv6hdr *ip6h,
+- unsigned int thoff,
+- enum flow_offload_tuple_dir dir)
++static void nf_flow_snat_ipv6(const struct flow_offload *flow,
++ struct sk_buff *skb, struct ipv6hdr *ip6h,
++ unsigned int thoff,
++ enum flow_offload_tuple_dir dir)
+ {
+ struct in6_addr addr, new_addr;
+
+@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+- default:
+- return -1;
+ }
+
+- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ }
+
+-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+- struct sk_buff *skb, struct ipv6hdr *ip6h,
+- unsigned int thoff,
+- enum flow_offload_tuple_dir dir)
++static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
++ struct sk_buff *skb, struct ipv6hdr *ip6h,
++ unsigned int thoff,
++ enum flow_offload_tuple_dir dir)
+ {
+ struct in6_addr addr, new_addr;
+
+@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+- default:
+- return -1;
+ }
+
+- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ }
+
+-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+- struct sk_buff *skb,
+- enum flow_offload_tuple_dir dir)
++static void nf_flow_nat_ipv6(const struct flow_offload *flow,
++ struct sk_buff *skb,
++ enum flow_offload_tuple_dir dir,
++ struct ipv6hdr *ip6h)
+ {
+- struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+- if (flow->flags & FLOW_OFFLOAD_SNAT &&
+- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+- return -1;
+- if (flow->flags & FLOW_OFFLOAD_DNAT &&
+- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+- return -1;
+-
+- return 0;
++ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
++ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
++ }
++ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
++ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
++ }
+ }
+
+ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+- struct flow_offload_tuple *tuple)
++ struct flow_offload_tuple *tuple, u32 *hdrsize,
++ u32 offset)
+ {
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+- if (!pskb_may_pull(skb, sizeof(*ip6h)))
++ thoff = sizeof(*ip6h) + offset;
++ if (!pskb_may_pull(skb, thoff))
+ return -1;
+
+- ip6h = ipv6_hdr(skb);
++ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+
+- if (ip6h->nexthdr != IPPROTO_TCP &&
+- ip6h->nexthdr != IPPROTO_UDP)
++ switch (ip6h->nexthdr) {
++ case IPPROTO_TCP:
++ *hdrsize = sizeof(struct tcphdr);
++ break;
++ case IPPROTO_UDP:
++ *hdrsize = sizeof(struct udphdr);
++ break;
++ default:
+ return -1;
++ }
+
+ if (ip6h->hop_limit <= 1)
+ return -1;
+
+- thoff = sizeof(*ip6h);
+- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++ if (!pskb_may_pull(skb, thoff + *hdrsize))
+ return -1;
+
+- ip6h = ipv6_hdr(skb);
++ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
++ nf_flow_tuple_encap(skb, tuple);
+
+ return 0;
+ }
+@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct in6_addr *nexthop;
+ struct flow_offload *flow;
+ struct net_device *outdev;
++ unsigned int thoff, mtu;
++ u32 hdrsize, offset = 0;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
++ int ret;
+
+- if (skb->protocol != htons(ETH_P_IPV6))
++ if (skb->protocol != htons(ETH_P_IPV6) &&
++ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
+ return NF_ACCEPT;
+
+- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
++ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+- outdev = rt->dst.dev;
+
+- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
++ mtu = flow->tuplehash[dir].tuple.mtu + offset;
++ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
+ return NF_ACCEPT;
+
+- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
+- sizeof(*ip6h)))
++ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
++ thoff = sizeof(*ip6h) + offset;
++ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
+ return NF_ACCEPT;
+
+- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
+- flow_offload_teardown(flow);
+- return NF_ACCEPT;
+- }
+-
+- if (skb_try_make_writable(skb, sizeof(*ip6h)))
++ if (skb_try_make_writable(skb, thoff + hdrsize))
+ return NF_DROP;
+
+- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
+- return NF_DROP;
++ flow_offload_refresh(flow_table, flow);
++
++ nf_flow_encap_pop(skb, tuplehash);
+
+- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
++ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
++
+ ip6h->hop_limit--;
+ skb->tstamp = 0;
+
+- if (unlikely(dst_xfrm(&rt->dst))) {
++ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
++ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
++
++ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
++ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ IP6CB(skb)->iif = skb->dev->ifindex;
+ IP6CB(skb)->flags = IP6SKB_FORWARDED;
+ return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+ }
+
+- skb->dev = outdev;
+- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+- skb_dst_set_noref(skb, &rt->dst);
+- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
++ switch (tuplehash->tuple.xmit_type) {
++ case FLOW_OFFLOAD_XMIT_NEIGH:
++ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
++ outdev = rt->dst.dev;
++ skb->dev = outdev;
++ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
++ skb_dst_set_noref(skb, &rt->dst);
++ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
++ ret = NF_STOLEN;
++ break;
++ case FLOW_OFFLOAD_XMIT_DIRECT:
++ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
++ if (ret == NF_DROP)
++ flow_offload_teardown(flow);
++ break;
++ }
+
+- return NF_STOLEN;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+new file mode 100644
+index 0000000..50f2f2e
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -0,0 +1,1199 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/netdevice.h>
++#include <linux/tc_act/tc_csum.h>
++#include <net/flow_offload.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_tables.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_acct.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_tuple.h>
++
++static struct workqueue_struct *nf_flow_offload_add_wq;
++static struct workqueue_struct *nf_flow_offload_del_wq;
++static struct workqueue_struct *nf_flow_offload_stats_wq;
++
++struct flow_offload_work {
++ struct list_head list;
++ enum flow_cls_command cmd;
++ int priority;
++ struct nf_flowtable *flowtable;
++ struct flow_offload *flow;
++ struct work_struct work;
++};
++
++#define NF_FLOW_DISSECTOR(__match, __type, __field) \
++ (__match)->dissector.offset[__type] = \
++ offsetof(struct nf_flow_key, __field)
++
++static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
++ struct ip_tunnel_info *tun_info)
++{
++ struct nf_flow_key *mask = &match->mask;
++ struct nf_flow_key *key = &match->key;
++ unsigned int enc_keys;
++
++ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
++ return;
++
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
++ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
++ mask->enc_key_id.keyid = 0xffffffff;
++ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
++ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
++
++ if (ip_tunnel_info_af(tun_info) == AF_INET) {
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
++ enc_ipv4);
++ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
++ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
++ if (key->enc_ipv4.src)
++ mask->enc_ipv4.src = 0xffffffff;
++ if (key->enc_ipv4.dst)
++ mask->enc_ipv4.dst = 0xffffffff;
++ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
++ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
++ } else {
++ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
++ sizeof(struct in6_addr));
++ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
++ sizeof(struct in6_addr));
++ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
++ sizeof(struct in6_addr)))
++ memset(&mask->enc_ipv6.src, 0xff,
++ sizeof(struct in6_addr));
++ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
++ sizeof(struct in6_addr)))
++ memset(&mask->enc_ipv6.dst, 0xff,
++ sizeof(struct in6_addr));
++ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
++ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
++ }
++
++ match->dissector.used_keys |= enc_keys;
++}
++
++static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
++ struct flow_dissector_key_vlan *mask,
++ u16 vlan_id, __be16 proto)
++{
++ key->vlan_id = vlan_id;
++ mask->vlan_id = VLAN_VID_MASK;
++ key->vlan_tpid = proto;
++ mask->vlan_tpid = 0xffff;
++}
++
++static int nf_flow_rule_match(struct nf_flow_match *match,
++ const struct flow_offload_tuple *tuple,
++ struct dst_entry *other_dst)
++{
++ struct nf_flow_key *mask = &match->mask;
++ struct nf_flow_key *key = &match->key;
++ struct ip_tunnel_info *tun_info;
++ bool vlan_encap = false;
++
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
++
++ if (other_dst && other_dst->lwtstate) {
++ tun_info = lwt_tun_info(other_dst->lwtstate);
++ nf_flow_rule_lwt_match(match, tun_info);
++ }
++
++ key->meta.ingress_ifindex = tuple->iifidx;
++ mask->meta.ingress_ifindex = 0xffffffff;
++
++ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
++ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
++ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
++ tuple->encap[0].id,
++ tuple->encap[0].proto);
++ vlan_encap = true;
++ }
++
++ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
++ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
++ if (vlan_encap) {
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
++ cvlan);
++ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
++ tuple->encap[1].id,
++ tuple->encap[1].proto);
++ } else {
++ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
++ vlan);
++ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
++ tuple->encap[1].id,
++ tuple->encap[1].proto);
++ }
++ }
++
++ switch (tuple->l3proto) {
++ case AF_INET:
++ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
++ key->basic.n_proto = htons(ETH_P_IP);
++ key->ipv4.src = tuple->src_v4.s_addr;
++ mask->ipv4.src = 0xffffffff;
++ key->ipv4.dst = tuple->dst_v4.s_addr;
++ mask->ipv4.dst = 0xffffffff;
++ break;
++ case AF_INET6:
++ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
++ key->basic.n_proto = htons(ETH_P_IPV6);
++ key->ipv6.src = tuple->src_v6;
++ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
++ key->ipv6.dst = tuple->dst_v6;
++ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++ mask->control.addr_type = 0xffff;
++ match->dissector.used_keys |= BIT(key->control.addr_type);
++ mask->basic.n_proto = 0xffff;
++
++ switch (tuple->l4proto) {
++ case IPPROTO_TCP:
++ key->tcp.flags = 0;
++ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
++ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
++ break;
++ case IPPROTO_UDP:
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ key->basic.ip_proto = tuple->l4proto;
++ mask->basic.ip_proto = 0xff;
++
++ key->tp.src = tuple->src_port;
++ mask->tp.src = 0xffff;
++ key->tp.dst = tuple->dst_port;
++ mask->tp.dst = 0xffff;
++
++ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
++ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
++ BIT(FLOW_DISSECTOR_KEY_BASIC) |
++ BIT(FLOW_DISSECTOR_KEY_PORTS);
++ return 0;
++}
++
++static void flow_offload_mangle(struct flow_action_entry *entry,
++ enum flow_action_mangle_base htype, u32 offset,
++ const __be32 *value, const __be32 *mask)
++{
++ entry->id = FLOW_ACTION_MANGLE;
++ entry->mangle.htype = htype;
++ entry->mangle.offset = offset;
++ memcpy(&entry->mangle.mask, mask, sizeof(u32));
++ memcpy(&entry->mangle.val, value, sizeof(u32));
++}
++
++static inline struct flow_action_entry *
++flow_action_entry_next(struct nf_flow_rule *flow_rule)
++{
++ int i = flow_rule->rule->action.num_entries++;
++
++ return &flow_rule->rule->action.entries[i];
++}
++
++static int flow_offload_eth_src(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
++ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
++ const struct flow_offload_tuple *other_tuple, *this_tuple;
++ struct net_device *dev = NULL;
++ const unsigned char *addr;
++ u32 mask, val;
++ u16 val16;
++
++ this_tuple = &flow->tuplehash[dir].tuple;
++
++ switch (this_tuple->xmit_type) {
++ case FLOW_OFFLOAD_XMIT_DIRECT:
++ addr = this_tuple->out.h_source;
++ break;
++ case FLOW_OFFLOAD_XMIT_NEIGH:
++ other_tuple = &flow->tuplehash[!dir].tuple;
++ dev = dev_get_by_index(net, other_tuple->iifidx);
++ if (!dev)
++ return -ENOENT;
++
++ addr = dev->dev_addr;
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ mask = ~0xffff0000;
++ memcpy(&val16, addr, 2);
++ val = val16 << 16;
++ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
++ &val, &mask);
++
++ mask = ~0xffffffff;
++ memcpy(&val, addr + 2, 4);
++ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
++ &val, &mask);
++
++ if (dev)
++ dev_put(dev);
++
++ return 0;
++}
++
++static int flow_offload_eth_dst(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
++ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
++ const struct flow_offload_tuple *other_tuple, *this_tuple;
++ const struct dst_entry *dst_cache;
++ unsigned char ha[ETH_ALEN];
++ struct neighbour *n;
++ const void *daddr;
++ u32 mask, val;
++ u8 nud_state;
++ u16 val16;
++
++ this_tuple = &flow->tuplehash[dir].tuple;
++
++ switch (this_tuple->xmit_type) {
++ case FLOW_OFFLOAD_XMIT_DIRECT:
++ ether_addr_copy(ha, this_tuple->out.h_dest);
++ break;
++ case FLOW_OFFLOAD_XMIT_NEIGH:
++ other_tuple = &flow->tuplehash[!dir].tuple;
++ daddr = &other_tuple->src_v4;
++ dst_cache = this_tuple->dst_cache;
++ n = dst_neigh_lookup(dst_cache, daddr);
++ if (!n)
++ return -ENOENT;
++
++ read_lock_bh(&n->lock);
++ nud_state = n->nud_state;
++ ether_addr_copy(ha, n->ha);
++ read_unlock_bh(&n->lock);
++ neigh_release(n);
++
++ if (!(nud_state & NUD_VALID))
++ return -ENOENT;
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ mask = ~0xffffffff;
++ memcpy(&val, ha, 4);
++ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
++ &val, &mask);
++
++ mask = ~0x0000ffff;
++ memcpy(&val16, ha + 4, 2);
++ val = val16;
++ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
++ &val, &mask);
++
++ return 0;
++}
++
++static void flow_offload_ipv4_snat(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++ u32 mask = ~htonl(0xffffffff);
++ __be32 addr;
++ u32 offset;
++
++ switch (dir) {
++ case FLOW_OFFLOAD_DIR_ORIGINAL:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
++ offset = offsetof(struct iphdr, saddr);
++ break;
++ case FLOW_OFFLOAD_DIR_REPLY:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
++ offset = offsetof(struct iphdr, daddr);
++ break;
++ default:
++ return;
++ }
++
++ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
++ &addr, &mask);
++}
++
++static void flow_offload_ipv4_dnat(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++ u32 mask = ~htonl(0xffffffff);
++ __be32 addr;
++ u32 offset;
++
++ switch (dir) {
++ case FLOW_OFFLOAD_DIR_ORIGINAL:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
++ offset = offsetof(struct iphdr, daddr);
++ break;
++ case FLOW_OFFLOAD_DIR_REPLY:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
++ offset = offsetof(struct iphdr, saddr);
++ break;
++ default:
++ return;
++ }
++
++ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
++ &addr, &mask);
++}
++
++static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
++ unsigned int offset,
++ const __be32 *addr, const __be32 *mask)
++{
++ struct flow_action_entry *entry;
++ int i, j;
++
++ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
++ entry = flow_action_entry_next(flow_rule);
++ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
++ offset + i, &addr[j], mask);
++ }
++}
++
++static void flow_offload_ipv6_snat(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ u32 mask = ~htonl(0xffffffff);
++ const __be32 *addr;
++ u32 offset;
++
++ switch (dir) {
++ case FLOW_OFFLOAD_DIR_ORIGINAL:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
++ offset = offsetof(struct ipv6hdr, saddr);
++ break;
++ case FLOW_OFFLOAD_DIR_REPLY:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
++ offset = offsetof(struct ipv6hdr, daddr);
++ break;
++ default:
++ return;
++ }
++
++ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
++}
++
++static void flow_offload_ipv6_dnat(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ u32 mask = ~htonl(0xffffffff);
++ const __be32 *addr;
++ u32 offset;
++
++ switch (dir) {
++ case FLOW_OFFLOAD_DIR_ORIGINAL:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
++ offset = offsetof(struct ipv6hdr, daddr);
++ break;
++ case FLOW_OFFLOAD_DIR_REPLY:
++ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
++ offset = offsetof(struct ipv6hdr, saddr);
++ break;
++ default:
++ return;
++ }
++
++ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
++}
++
++static int flow_offload_l4proto(const struct flow_offload *flow)
++{
++ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
++ u8 type = 0;
++
++ switch (protonum) {
++ case IPPROTO_TCP:
++ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
++ break;
++ case IPPROTO_UDP:
++ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
++ break;
++ default:
++ break;
++ }
++
++ return type;
++}
++
++static void flow_offload_port_snat(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++ u32 mask, port;
++ u32 offset;
++
++ switch (dir) {
++ case FLOW_OFFLOAD_DIR_ORIGINAL:
++ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
++ offset = 0; /* offsetof(struct tcphdr, source); */
++ port = htonl(port << 16);
++ mask = ~htonl(0xffff0000);
++ break;
++ case FLOW_OFFLOAD_DIR_REPLY:
++ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
++ offset = 0; /* offsetof(struct tcphdr, dest); */
++ port = htonl(port);
++ mask = ~htonl(0xffff);
++ break;
++ default:
++ return;
++ }
++
++ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
++ &port, &mask);
++}
++
++static void flow_offload_port_dnat(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++ u32 mask, port;
++ u32 offset;
++
++ switch (dir) {
++ case FLOW_OFFLOAD_DIR_ORIGINAL:
++ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
++ offset = 0; /* offsetof(struct tcphdr, dest); */
++ port = htonl(port);
++ mask = ~htonl(0xffff);
++ break;
++ case FLOW_OFFLOAD_DIR_REPLY:
++ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
++ offset = 0; /* offsetof(struct tcphdr, source); */
++ port = htonl(port << 16);
++ mask = ~htonl(0xffff0000);
++ break;
++ default:
++ return;
++ }
++
++ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
++ &port, &mask);
++}
++
++static void flow_offload_ipv4_checksum(struct net *net,
++ const struct flow_offload *flow,
++ struct nf_flow_rule *flow_rule)
++{
++ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
++ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++
++ entry->id = FLOW_ACTION_CSUM;
++ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
++
++ switch (protonum) {
++ case IPPROTO_TCP:
++ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
++ break;
++ case IPPROTO_UDP:
++ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
++ break;
++ }
++}
++
++static void flow_offload_redirect(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ const struct flow_offload_tuple *this_tuple, *other_tuple;
++ struct flow_action_entry *entry;
++ struct net_device *dev;
++ int ifindex;
++
++ this_tuple = &flow->tuplehash[dir].tuple;
++ switch (this_tuple->xmit_type) {
++ case FLOW_OFFLOAD_XMIT_DIRECT:
++ this_tuple = &flow->tuplehash[dir].tuple;
++ ifindex = this_tuple->out.hw_ifidx;
++ break;
++ case FLOW_OFFLOAD_XMIT_NEIGH:
++ other_tuple = &flow->tuplehash[!dir].tuple;
++ ifindex = other_tuple->iifidx;
++ break;
++ default:
++ return;
++ }
++
++ dev = dev_get_by_index(net, ifindex);
++ if (!dev)
++ return;
++
++ entry = flow_action_entry_next(flow_rule);
++ entry->id = FLOW_ACTION_REDIRECT;
++ entry->dev = dev;
++}
++
++static void flow_offload_encap_tunnel(const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ const struct flow_offload_tuple *this_tuple;
++ struct flow_action_entry *entry;
++ struct dst_entry *dst;
++
++ this_tuple = &flow->tuplehash[dir].tuple;
++ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
++ return;
++
++ dst = this_tuple->dst_cache;
++ if (dst && dst->lwtstate) {
++ struct ip_tunnel_info *tun_info;
++
++ tun_info = lwt_tun_info(dst->lwtstate);
++ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
++ entry = flow_action_entry_next(flow_rule);
++ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
++ entry->tunnel = tun_info;
++ }
++ }
++}
++
++static void flow_offload_decap_tunnel(const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ const struct flow_offload_tuple *other_tuple;
++ struct flow_action_entry *entry;
++ struct dst_entry *dst;
++
++ other_tuple = &flow->tuplehash[!dir].tuple;
++ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
++ return;
++
++ dst = other_tuple->dst_cache;
++ if (dst && dst->lwtstate) {
++ struct ip_tunnel_info *tun_info;
++
++ tun_info = lwt_tun_info(dst->lwtstate);
++ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
++ entry = flow_action_entry_next(flow_rule);
++ entry->id = FLOW_ACTION_TUNNEL_DECAP;
++ }
++ }
++}
++
++static int
++nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ const struct flow_offload_tuple *other_tuple;
++ const struct flow_offload_tuple *tuple;
++ int i;
++
++ flow_offload_decap_tunnel(flow, dir, flow_rule);
++ flow_offload_encap_tunnel(flow, dir, flow_rule);
++
++ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
++ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
++ return -1;
++
++ tuple = &flow->tuplehash[dir].tuple;
++
++ for (i = 0; i < tuple->encap_num; i++) {
++ struct flow_action_entry *entry;
++
++ if (tuple->in_vlan_ingress & BIT(i))
++ continue;
++
++ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
++ entry = flow_action_entry_next(flow_rule);
++ entry->id = FLOW_ACTION_VLAN_POP;
++ }
++ }
++
++ other_tuple = &flow->tuplehash[!dir].tuple;
++
++ for (i = 0; i < other_tuple->encap_num; i++) {
++ struct flow_action_entry *entry;
++
++ if (other_tuple->in_vlan_ingress & BIT(i))
++ continue;
++
++ entry = flow_action_entry_next(flow_rule);
++
++ switch (other_tuple->encap[i].proto) {
++ case htons(ETH_P_PPP_SES):
++ entry->id = FLOW_ACTION_PPPOE_PUSH;
++ entry->pppoe.sid = other_tuple->encap[i].id;
++ break;
++ case htons(ETH_P_8021Q):
++ entry->id = FLOW_ACTION_VLAN_PUSH;
++ entry->vlan.vid = other_tuple->encap[i].id;
++ entry->vlan.proto = other_tuple->encap[i].proto;
++ break;
++ }
++ }
++
++ return 0;
++}
++
++int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
++ return -1;
++
++ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
++ flow_offload_port_snat(net, flow, dir, flow_rule);
++ }
++ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
++ flow_offload_port_dnat(net, flow, dir, flow_rule);
++ }
++ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
++ test_bit(NF_FLOW_DNAT, &flow->flags))
++ flow_offload_ipv4_checksum(net, flow, flow_rule);
++
++ flow_offload_redirect(net, flow, dir, flow_rule);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
++
++int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
++ return -1;
++
++ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
++ flow_offload_port_snat(net, flow, dir, flow_rule);
++ }
++ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
++ flow_offload_port_dnat(net, flow, dir, flow_rule);
++ }
++
++ flow_offload_redirect(net, flow, dir, flow_rule);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
++
++#define NF_FLOW_RULE_ACTION_MAX 16
++
++static struct nf_flow_rule *
++nf_flow_offload_rule_alloc(struct net *net,
++ const struct flow_offload_work *offload,
++ enum flow_offload_tuple_dir dir)
++{
++ const struct nf_flowtable *flowtable = offload->flowtable;
++ const struct flow_offload_tuple *tuple, *other_tuple;
++ const struct flow_offload *flow = offload->flow;
++ struct dst_entry *other_dst = NULL;
++ struct nf_flow_rule *flow_rule;
++ int err = -ENOMEM;
++
++ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
++ if (!flow_rule)
++ goto err_flow;
++
++ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
++ if (!flow_rule->rule)
++ goto err_flow_rule;
++
++ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
++ flow_rule->rule->match.mask = &flow_rule->match.mask;
++ flow_rule->rule->match.key = &flow_rule->match.key;
++
++ tuple = &flow->tuplehash[dir].tuple;
++ other_tuple = &flow->tuplehash[!dir].tuple;
++ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
++ other_dst = other_tuple->dst_cache;
++
++ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
++ if (err < 0)
++ goto err_flow_match;
++
++ flow_rule->rule->action.num_entries = 0;
++ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
++ goto err_flow_match;
++
++ return flow_rule;
++
++err_flow_match:
++ kfree(flow_rule->rule);
++err_flow_rule:
++ kfree(flow_rule);
++err_flow:
++ return NULL;
++}
++
++static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
++{
++ struct flow_action_entry *entry;
++ int i;
++
++ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
++ entry = &flow_rule->rule->action.entries[i];
++ if (entry->id != FLOW_ACTION_REDIRECT)
++ continue;
++
++ dev_put(entry->dev);
++ }
++ kfree(flow_rule->rule);
++ kfree(flow_rule);
++}
++
++static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
++{
++ int i;
++
++ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
++ __nf_flow_offload_destroy(flow_rule[i]);
++}
++
++static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
++ struct nf_flow_rule *flow_rule[])
++{
++ struct net *net = read_pnet(&offload->flowtable->net);
++
++ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
++ FLOW_OFFLOAD_DIR_ORIGINAL);
++ if (!flow_rule[0])
++ return -ENOMEM;
++
++ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
++ FLOW_OFFLOAD_DIR_REPLY);
++ if (!flow_rule[1]) {
++ __nf_flow_offload_destroy(flow_rule[0]);
++ return -ENOMEM;
++ }
++
++ return 0;
++}
++
++static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
++ __be16 proto, int priority,
++ enum flow_cls_command cmd,
++ const struct flow_offload_tuple *tuple,
++ struct netlink_ext_ack *extack)
++{
++ cls_flow->common.protocol = proto;
++ cls_flow->common.prio = priority;
++ cls_flow->common.extack = extack;
++ cls_flow->command = cmd;
++ cls_flow->cookie = (unsigned long)tuple;
++}
++
++static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
++ struct flow_offload *flow,
++ struct nf_flow_rule *flow_rule,
++ enum flow_offload_tuple_dir dir,
++ int priority, int cmd,
++ struct flow_stats *stats,
++ struct list_head *block_cb_list)
++{
++ struct flow_cls_offload cls_flow = {};
++ struct flow_block_cb *block_cb;
++ struct netlink_ext_ack extack;
++ __be16 proto = ETH_P_ALL;
++ int err, i = 0;
++
++ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
++ &flow->tuplehash[dir].tuple, &extack);
++ if (cmd == FLOW_CLS_REPLACE)
++ cls_flow.rule = flow_rule->rule;
++
++ down_read(&flowtable->flow_block_lock);
++ list_for_each_entry(block_cb, block_cb_list, list) {
++ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
++ block_cb->cb_priv);
++ if (err < 0)
++ continue;
++
++ i++;
++ }
++ up_read(&flowtable->flow_block_lock);
++
++ if (cmd == FLOW_CLS_STATS)
++ memcpy(stats, &cls_flow.stats, sizeof(*stats));
++
++ return i;
++}
++
++static int flow_offload_tuple_add(struct flow_offload_work *offload,
++ struct nf_flow_rule *flow_rule,
++ enum flow_offload_tuple_dir dir)
++{
++ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
++ flow_rule, dir, offload->priority,
++ FLOW_CLS_REPLACE, NULL,
++ &offload->flowtable->flow_block.cb_list);
++}
++
++static void flow_offload_tuple_del(struct flow_offload_work *offload,
++ enum flow_offload_tuple_dir dir)
++{
++ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
++ offload->priority, FLOW_CLS_DESTROY, NULL,
++ &offload->flowtable->flow_block.cb_list);
++}
++
++static int flow_offload_rule_add(struct flow_offload_work *offload,
++ struct nf_flow_rule *flow_rule[])
++{
++ int ok_count = 0;
++
++ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
++ FLOW_OFFLOAD_DIR_ORIGINAL);
++ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
++ FLOW_OFFLOAD_DIR_REPLY);
++ if (ok_count == 0)
++ return -ENOENT;
++
++ return 0;
++}
++
++static void flow_offload_work_add(struct flow_offload_work *offload)
++{
++ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
++ int err;
++
++ err = nf_flow_offload_alloc(offload, flow_rule);
++ if (err < 0)
++ return;
++
++ err = flow_offload_rule_add(offload, flow_rule);
++ if (err < 0)
++ goto out;
++
++ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
++
++out:
++ nf_flow_offload_destroy(flow_rule);
++}
++
++static void flow_offload_work_del(struct flow_offload_work *offload)
++{
++ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
++ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
++ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
++ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
++}
++
++static void flow_offload_tuple_stats(struct flow_offload_work *offload,
++ enum flow_offload_tuple_dir dir,
++ struct flow_stats *stats)
++{
++ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
++ offload->priority, FLOW_CLS_STATS, stats,
++ &offload->flowtable->flow_block.cb_list);
++}
++
++static void flow_offload_work_stats(struct flow_offload_work *offload)
++{
++ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
++ u64 lastused;
++
++ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
++ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
++
++ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
++ offload->flow->timeout = max_t(u64, offload->flow->timeout,
++ lastused + flow_offload_get_timeout(offload->flow));
++
++ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
++ if (stats[0].pkts)
++ nf_ct_acct_add(offload->flow->ct,
++ FLOW_OFFLOAD_DIR_ORIGINAL,
++ stats[0].pkts, stats[0].bytes);
++ if (stats[1].pkts)
++ nf_ct_acct_add(offload->flow->ct,
++ FLOW_OFFLOAD_DIR_REPLY,
++ stats[1].pkts, stats[1].bytes);
++ }
++}
++
++static void flow_offload_work_handler(struct work_struct *work)
++{
++ struct flow_offload_work *offload;
++
++ offload = container_of(work, struct flow_offload_work, work);
++ switch (offload->cmd) {
++ case FLOW_CLS_REPLACE:
++ flow_offload_work_add(offload);
++ break;
++ case FLOW_CLS_DESTROY:
++ flow_offload_work_del(offload);
++ break;
++ case FLOW_CLS_STATS:
++ flow_offload_work_stats(offload);
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ }
++
++ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
++ kfree(offload);
++}
++
++static void flow_offload_queue_work(struct flow_offload_work *offload)
++{
++ if (offload->cmd == FLOW_CLS_REPLACE)
++ queue_work(nf_flow_offload_add_wq, &offload->work);
++ else if (offload->cmd == FLOW_CLS_DESTROY)
++ queue_work(nf_flow_offload_del_wq, &offload->work);
++ else
++ queue_work(nf_flow_offload_stats_wq, &offload->work);
++}
++
++static struct flow_offload_work *
++nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
++ struct flow_offload *flow, unsigned int cmd)
++{
++ struct flow_offload_work *offload;
++
++ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
++ return NULL;
++
++ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
++ if (!offload) {
++ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
++ return NULL;
++ }
++
++ offload->cmd = cmd;
++ offload->flow = flow;
++ offload->priority = flowtable->priority;
++ offload->flowtable = flowtable;
++ INIT_WORK(&offload->work, flow_offload_work_handler);
++
++ return offload;
++}
++
++
++void nf_flow_offload_add(struct nf_flowtable *flowtable,
++ struct flow_offload *flow)
++{
++ struct flow_offload_work *offload;
++
++ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
++ if (!offload)
++ return;
++
++ flow_offload_queue_work(offload);
++}
++
++void nf_flow_offload_del(struct nf_flowtable *flowtable,
++ struct flow_offload *flow)
++{
++ struct flow_offload_work *offload;
++
++ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
++ if (!offload)
++ return;
++
++ set_bit(NF_FLOW_HW_DYING, &flow->flags);
++ flow_offload_queue_work(offload);
++}
++
++void nf_flow_offload_stats(struct nf_flowtable *flowtable,
++ struct flow_offload *flow, bool force)
++{
++ struct flow_offload_work *offload;
++ __s32 delta;
++
++ if (!force) {
++ delta = nf_flow_timeout_delta(flow->timeout);
++ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
++ return;
++ }
++
++ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
++ if (!offload)
++ return;
++
++ flow_offload_queue_work(offload);
++}
++
++void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
++{
++ if (nf_flowtable_hw_offload(flowtable)) {
++ flush_workqueue(nf_flow_offload_add_wq);
++ flush_workqueue(nf_flow_offload_del_wq);
++ flush_workqueue(nf_flow_offload_stats_wq);
++ }
++}
++
++static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
++ struct flow_block_offload *bo,
++ enum flow_block_command cmd)
++{
++ struct flow_block_cb *block_cb, *next;
++ int err = 0;
++
++ down_write(&flowtable->flow_block_lock);
++
++ switch (cmd) {
++ case FLOW_BLOCK_BIND:
++ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
++ break;
++ case FLOW_BLOCK_UNBIND:
++ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
++ list_del(&block_cb->list);
++ flow_block_cb_free(block_cb);
++ }
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ err = -EOPNOTSUPP;
++ }
++
++ up_write(&flowtable->flow_block_lock);
++
++ return err;
++}
++
++static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
++ struct net *net,
++ enum flow_block_command cmd,
++ struct nf_flowtable *flowtable,
++ struct netlink_ext_ack *extack)
++{
++ memset(bo, 0, sizeof(*bo));
++ bo->net = net;
++ bo->block = &flowtable->flow_block;
++ bo->command = cmd;
++ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
++ bo->extack = extack;
++ INIT_LIST_HEAD(&bo->cb_list);
++}
++
++static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
++ struct nf_flowtable *flowtable,
++ struct net_device *dev,
++ enum flow_block_command cmd,
++ struct netlink_ext_ack *extack)
++{
++ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
++ extack);
++ flow_indr_block_call(dev, bo, cmd);
++
++ if (list_empty(&bo->cb_list))
++ return -EOPNOTSUPP;
++
++ return 0;
++}
++
++static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
++ struct nf_flowtable *flowtable,
++ struct net_device *dev,
++ enum flow_block_command cmd,
++ struct netlink_ext_ack *extack)
++{
++ int err;
++
++ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
++ extack);
++ down_write(&flowtable->flow_block_lock);
++ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
++ up_write(&flowtable->flow_block_lock);
++ if (err < 0)
++ return err;
++
++ return 0;
++}
++
++int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
++ struct net_device *dev,
++ enum flow_block_command cmd)
++{
++ struct netlink_ext_ack extack = {};
++ struct flow_block_offload bo;
++ int err;
++
++ if (!nf_flowtable_hw_offload(flowtable))
++ return 0;
++
++ if (dev->netdev_ops->ndo_setup_tc)
++ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
++ &extack);
++ else
++ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
++ &extack);
++ if (err < 0)
++ return err;
++
++ return nf_flow_table_block_setup(flowtable, &bo, cmd);
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
++
++int nf_flow_table_offload_init(void)
++{
++ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
++ WQ_UNBOUND | WQ_SYSFS, 0);
++ if (!nf_flow_offload_add_wq)
++ return -ENOMEM;
++
++ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
++ WQ_UNBOUND | WQ_SYSFS, 0);
++ if (!nf_flow_offload_del_wq)
++ goto err_del_wq;
++
++ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
++ WQ_UNBOUND | WQ_SYSFS, 0);
++ if (!nf_flow_offload_stats_wq)
++ goto err_stats_wq;
++
++ return 0;
++
++err_stats_wq:
++ destroy_workqueue(nf_flow_offload_del_wq);
++err_del_wq:
++ destroy_workqueue(nf_flow_offload_add_wq);
++ return -ENOMEM;
++}
++
++void nf_flow_table_offload_exit(void)
++{
++ destroy_workqueue(nf_flow_offload_add_wq);
++ destroy_workqueue(nf_flow_offload_del_wq);
++ destroy_workqueue(nf_flow_offload_stats_wq);
++}
+diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
+new file mode 100644
+index 0000000..2cab008
+--- /dev/null
++++ b/net/netfilter/xt_FLOWOFFLOAD.c
+@@ -0,0 +1,794 @@
++/*
++ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter/xt_FLOWOFFLOAD.h>
++#include <linux/if_vlan.h>
++#include <net/ip.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_extend.h>
++#include <net/netfilter/nf_conntrack_helper.h>
++#include <net/netfilter/nf_flow_table.h>
++
++struct xt_flowoffload_hook {
++ struct hlist_node list;
++ struct nf_hook_ops ops;
++ struct net *net;
++ bool registered;
++ bool used;
++};
++
++struct xt_flowoffload_table {
++ struct nf_flowtable ft;
++ struct hlist_head hooks;
++ struct delayed_work work;
++};
++
++struct nf_forward_info {
++ const struct net_device *indev;
++ const struct net_device *outdev;
++ const struct net_device *hw_outdev;
++ struct id {
++ __u16 id;
++ __be16 proto;
++ } encap[NF_FLOW_TABLE_ENCAP_MAX];
++ u8 num_encaps;
++ u8 ingress_vlans;
++ u8 h_source[ETH_ALEN];
++ u8 h_dest[ETH_ALEN];
++ enum flow_offload_xmit_type xmit_type;
++};
++
++static DEFINE_SPINLOCK(hooks_lock);
++
++struct xt_flowoffload_table flowtable[2];
++
++static unsigned int
++xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ struct vlan_ethhdr *veth;
++ __be16 proto;
++
++ switch (skb->protocol) {
++ case htons(ETH_P_8021Q):
++ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++ proto = veth->h_vlan_encapsulated_proto;
++ break;
++ case htons(ETH_P_PPP_SES):
++ proto = nf_flow_pppoe_proto(skb);
++ break;
++ default:
++ proto = skb->protocol;
++ break;
++ }
++
++ switch (proto) {
++ case htons(ETH_P_IP):
++ return nf_flow_offload_ip_hook(priv, skb, state);
++ case htons(ETH_P_IPV6):
++ return nf_flow_offload_ipv6_hook(priv, skb, state);
++ }
++
++ return NF_ACCEPT;
++}
++
++static int
++xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
++ struct net_device *dev)
++{
++ struct xt_flowoffload_hook *hook;
++ struct nf_hook_ops *ops;
++
++ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
++ if (!hook)
++ return -ENOMEM;
++
++ ops = &hook->ops;
++ ops->pf = NFPROTO_NETDEV;
++ ops->hooknum = NF_NETDEV_INGRESS;
++ ops->priority = 10;
++ ops->priv = &table->ft;
++ ops->hook = xt_flowoffload_net_hook;
++ ops->dev = dev;
++
++ hlist_add_head(&hook->list, &table->hooks);
++ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
++
++ return 0;
++}
++
++static struct xt_flowoffload_hook *
++flow_offload_lookup_hook(struct xt_flowoffload_table *table,
++ struct net_device *dev)
++{
++ struct xt_flowoffload_hook *hook;
++
++ hlist_for_each_entry(hook, &table->hooks, list) {
++ if (hook->ops.dev == dev)
++ return hook;
++ }
++
++ return NULL;
++}
++
++static void
++xt_flowoffload_check_device(struct xt_flowoffload_table *table,
++ struct net_device *dev)
++{
++ struct xt_flowoffload_hook *hook;
++
++ if (!dev)
++ return;
++
++ spin_lock_bh(&hooks_lock);
++ hook = flow_offload_lookup_hook(table, dev);
++ if (hook)
++ hook->used = true;
++ else
++ xt_flowoffload_create_hook(table, dev);
++ spin_unlock_bh(&hooks_lock);
++}
++
++static void
++xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
++{
++ struct xt_flowoffload_hook *hook;
++
++restart:
++ hlist_for_each_entry(hook, &table->hooks, list) {
++ if (hook->registered)
++ continue;
++
++ hook->registered = true;
++ hook->net = dev_net(hook->ops.dev);
++ spin_unlock_bh(&hooks_lock);
++ nf_register_net_hook(hook->net, &hook->ops);
++ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
++ table->ft.type->setup(&table->ft, hook->ops.dev,
++ FLOW_BLOCK_BIND);
++ spin_lock_bh(&hooks_lock);
++ goto restart;
++ }
++
++}
++
++static bool
++xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
++{
++ struct xt_flowoffload_hook *hook;
++ bool active = false;
++
++restart:
++ spin_lock_bh(&hooks_lock);
++ hlist_for_each_entry(hook, &table->hooks, list) {
++ if (hook->used || !hook->registered) {
++ active = true;
++ continue;
++ }
++
++ hlist_del(&hook->list);
++ spin_unlock_bh(&hooks_lock);
++ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
++ table->ft.type->setup(&table->ft, hook->ops.dev,
++ FLOW_BLOCK_UNBIND);
++ nf_unregister_net_hook(hook->net, &hook->ops);
++ kfree(hook);
++ goto restart;
++ }
++ spin_unlock_bh(&hooks_lock);
++
++ return active;
++}
++
++static void
++xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
++{
++ struct xt_flowoffload_table *table = data;
++ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
++ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
++ struct xt_flowoffload_hook *hook;
++
++ spin_lock_bh(&hooks_lock);
++ hlist_for_each_entry(hook, &table->hooks, list) {
++ if (hook->ops.dev->ifindex != tuple0->iifidx &&
++ hook->ops.dev->ifindex != tuple1->iifidx)
++ continue;
++
++ hook->used = true;
++ }
++ spin_unlock_bh(&hooks_lock);
++}
++
++static void
++xt_flowoffload_hook_work(struct work_struct *work)
++{
++ struct xt_flowoffload_table *table;
++ struct xt_flowoffload_hook *hook;
++ int err;
++
++ table = container_of(work, struct xt_flowoffload_table, work.work);
++
++ spin_lock_bh(&hooks_lock);
++ xt_flowoffload_register_hooks(table);
++ hlist_for_each_entry(hook, &table->hooks, list)
++ hook->used = false;
++ spin_unlock_bh(&hooks_lock);
++
++ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
++ table);
++ if (err && err != -EAGAIN)
++ goto out;
++
++ if (!xt_flowoffload_cleanup_hooks(table))
++ return;
++
++out:
++ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
++}
++
++static bool
++xt_flowoffload_skip(struct sk_buff *skb, int family)
++{
++ if (skb_sec_path(skb))
++ return true;
++
++ if (family == NFPROTO_IPV4) {
++ const struct ip_options *opt = &(IPCB(skb)->opt);
++
++ if (unlikely(opt->optlen))
++ return true;
++ }
++
++ return false;
++}
++
++static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
++{
++ if (dst_xfrm(dst))
++ return FLOW_OFFLOAD_XMIT_XFRM;
++
++ return FLOW_OFFLOAD_XMIT_NEIGH;
++}
++
++static void nf_default_forward_path(struct nf_flow_route *route,
++ struct dst_entry *dst_cache,
++ enum ip_conntrack_dir dir,
++ struct net_device **dev)
++{
++ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
++ route->tuple[dir].dst = dst_cache;
++ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
++}
++
++static bool nf_is_valid_ether_device(const struct net_device *dev)
++{
++ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
++ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
++ return false;
++
++ return true;
++}
++
++static void nf_dev_path_info(const struct net_device_path_stack *stack,
++ struct nf_forward_info *info,
++ unsigned char *ha)
++{
++ const struct net_device_path *path;
++ int i;
++
++ memcpy(info->h_dest, ha, ETH_ALEN);
++
++ for (i = 0; i < stack->num_paths; i++) {
++ path = &stack->path[i];
++
++ info->indev = path->dev;
++
++ switch (path->type) {
++ case DEV_PATH_ETHERNET:
++ case DEV_PATH_DSA:
++ case DEV_PATH_VLAN:
++ case DEV_PATH_PPPOE:
++ if (is_zero_ether_addr(info->h_source))
++ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
++
++ if (path->type == DEV_PATH_ETHERNET)
++ break;
++ if (path->type == DEV_PATH_DSA) {
++ i = stack->num_paths;
++ break;
++ }
++
++ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
++ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
++ info->indev = NULL;
++ break;
++ }
++ if (!info->outdev)
++ info->outdev = path->dev;
++ info->encap[info->num_encaps].id = path->encap.id;
++ info->encap[info->num_encaps].proto = path->encap.proto;
++ info->num_encaps++;
++ if (path->type == DEV_PATH_PPPOE)
++ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
++ break;
++ case DEV_PATH_BRIDGE:
++ if (is_zero_ether_addr(info->h_source))
++ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
++
++ switch (path->bridge.vlan_mode) {
++ case DEV_PATH_BR_VLAN_UNTAG_HW:
++ info->ingress_vlans |= BIT(info->num_encaps - 1);
++ break;
++ case DEV_PATH_BR_VLAN_TAG:
++ info->encap[info->num_encaps].id = path->bridge.vlan_id;
++ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
++ info->num_encaps++;
++ break;
++ case DEV_PATH_BR_VLAN_UNTAG:
++ info->num_encaps--;
++ break;
++ case DEV_PATH_BR_VLAN_KEEP:
++ break;
++ }
++ break;
++ default:
++ break;
++ }
++ }
++ if (!info->outdev)
++ info->outdev = info->indev;
++
++ info->hw_outdev = info->indev;
++
++ if (nf_is_valid_ether_device(info->indev))
++ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
++}
++
++static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
++ const struct dst_entry *dst_cache,
++ const struct nf_conn *ct,
++ enum ip_conntrack_dir dir, u8 *ha,
++ struct net_device_path_stack *stack)
++{
++ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
++ struct net_device *dev = dst_cache->dev;
++ struct neighbour *n;
++ u8 nud_state;
++
++ if (!nf_is_valid_ether_device(dev))
++ goto out;
++
++ if (ct->status & IPS_NAT_MASK) {
++ n = dst_neigh_lookup(dst_cache, daddr);
++ if (!n)
++ return -1;
++
++ read_lock_bh(&n->lock);
++ nud_state = n->nud_state;
++ ether_addr_copy(ha, n->ha);
++ read_unlock_bh(&n->lock);
++ neigh_release(n);
++
++ if (!(nud_state & NUD_VALID))
++ return -1;
++ }
++
++out:
++ return dev_fill_forward_path(dev, ha, stack);
++}
++
++static int nf_dev_forward_path(struct sk_buff *skb,
++ struct nf_flow_route *route,
++ const struct nf_conn *ct,
++ enum ip_conntrack_dir dir,
++ struct net_device **devs)
++{
++ const struct dst_entry *dst = route->tuple[dir].dst;
++ struct ethhdr *eth;
++ enum ip_conntrack_dir skb_dir;
++ struct net_device_path_stack stack;
++ struct nf_forward_info info = {};
++ unsigned char ha[ETH_ALEN];
++ int i;
++
++ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
++ eth = eth_hdr(skb);
++ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
++
++ if (skb_dir != dir) {
++ memcpy(ha, eth->h_source, ETH_ALEN);
++ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
++ } else {
++ memcpy(ha, eth->h_dest, ETH_ALEN);
++ memcpy(info.h_source, eth->h_source, ETH_ALEN);
++ }
++ }
++
++ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
++ nf_dev_path_info(&stack, &info, ha);
++
++ devs[!dir] = (struct net_device *)info.indev;
++ if (!info.indev)
++ return -1;
++
++ route->tuple[!dir].in.ifindex = info.indev->ifindex;
++ for (i = 0; i < info.num_encaps; i++) {
++ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
++ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
++ }
++ route->tuple[!dir].in.num_encaps = info.num_encaps;
++ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
++
++ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
++ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
++ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
++ route->tuple[dir].out.ifindex = info.outdev->ifindex;
++ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
++ route->tuple[dir].xmit_type = info.xmit_type;
++ }
++
++ return 0;
++}
++
++static int
++xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
++ enum ip_conntrack_dir dir,
++ const struct xt_action_param *par, int ifindex,
++ struct net_device **devs)
++{
++ struct dst_entry *dst = NULL;
++ struct flowi fl;
++
++ memset(&fl, 0, sizeof(fl));
++ switch (xt_family(par)) {
++ case NFPROTO_IPV4:
++ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
++ fl.u.ip4.flowi4_oif = ifindex;
++ break;
++ case NFPROTO_IPV6:
++ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
++ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
++ fl.u.ip6.flowi6_oif = ifindex;
++ break;
++ }
++
++ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
++ if (!dst)
++ return -ENOENT;
++
++ nf_default_forward_path(route, dst, dir, devs);
++
++ return 0;
++}
++
++static int
++xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
++ const struct xt_action_param *par,
++ struct nf_flow_route *route, enum ip_conntrack_dir dir,
++ struct net_device **devs)
++{
++ struct dst_entry *this_dst = skb_dst(skb);
++ struct dst_entry *other_dst = NULL;
++ struct flowi fl;
++
++ memset(&fl, 0, sizeof(fl));
++ switch (xt_family(par)) {
++ case NFPROTO_IPV4:
++ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
++ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
++ break;
++ case NFPROTO_IPV6:
++ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
++ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
++ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
++ break;
++ }
++
++ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
++ if (!other_dst)
++ return -ENOENT;
++
++ nf_default_forward_path(route, this_dst, dir, devs);
++ nf_default_forward_path(route, other_dst, !dir, devs);
++
++ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
++ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
++ if (nf_dev_forward_path(skb, route, ct, dir, devs))
++ return -1;
++ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
++ return -1;
++ }
++
++ return 0;
++}
++
++static int
++xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
++ const struct xt_action_param *par,
++ struct nf_flow_route *route, enum ip_conntrack_dir dir,
++ struct net_device **devs)
++{
++ int ret;
++
++ ret = xt_flowoffload_route_dir(route, ct, dir, par,
++ devs[dir]->ifindex,
++ devs);
++ if (ret)
++ return ret;
++
++ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
++ devs[!dir]->ifindex,
++ devs);
++ if (ret)
++ goto err_route_dir1;
++
++ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
++ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
++ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
++ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
++ ret = -1;
++ goto err_route_dir2;
++ }
++ }
++
++ return 0;
++
++err_route_dir2:
++ dst_release(route->tuple[!dir].dst);
++err_route_dir1:
++ dst_release(route->tuple[dir].dst);
++ return ret;
++}
++
++static unsigned int
++flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
++{
++ struct xt_flowoffload_table *table;
++ const struct xt_flowoffload_target_info *info = par->targinfo;
++ struct tcphdr _tcph, *tcph = NULL;
++ enum ip_conntrack_info ctinfo;
++ enum ip_conntrack_dir dir;
++ struct nf_flow_route route = {};
++ struct flow_offload *flow = NULL;
++ struct net_device *devs[2] = {};
++ struct nf_conn *ct;
++ struct net *net;
++
++ if (xt_flowoffload_skip(skb, xt_family(par)))
++ return XT_CONTINUE;
++
++ ct = nf_ct_get(skb, &ctinfo);
++ if (ct == NULL)
++ return XT_CONTINUE;
++
++ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
++ case IPPROTO_TCP:
++ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
++ return XT_CONTINUE;
++
++ tcph = skb_header_pointer(skb, par->thoff,
++ sizeof(_tcph), &_tcph);
++ if (unlikely(!tcph || tcph->fin || tcph->rst))
++ return XT_CONTINUE;
++ break;
++ case IPPROTO_UDP:
++ break;
++ default:
++ return XT_CONTINUE;
++ }
++
++ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
++ ct->status & IPS_SEQ_ADJUST)
++ return XT_CONTINUE;
++
++ if (!nf_ct_is_confirmed(ct))
++ return XT_CONTINUE;
++
++ devs[dir] = xt_out(par);
++ devs[!dir] = xt_in(par);
++
++ if (!devs[dir] || !devs[!dir])
++ return XT_CONTINUE;
++
++ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
++ return XT_CONTINUE;
++
++ dir = CTINFO2DIR(ctinfo);
++
++ if (ct->status & IPS_NAT_MASK) {
++ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
++ goto err_flow_route;
++ } else {
++ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
++ goto err_flow_route;
++ }
++
++ flow = flow_offload_alloc(ct);
++ if (!flow)
++ goto err_flow_alloc;
++
++ if (flow_offload_route_init(flow, &route) < 0)
++ goto err_flow_add;
++
++ if (tcph) {
++ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
++ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
++ }
++
++ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
++
++ net = read_pnet(&table->ft.net);
++ if (!net)
++ write_pnet(&table->ft.net, xt_net(par));
++
++ if (flow_offload_add(&table->ft, flow) < 0)
++ goto err_flow_add;
++
++ xt_flowoffload_check_device(table, devs[0]);
++ xt_flowoffload_check_device(table, devs[1]);
++
++ if (!(ct->status & IPS_NAT_MASK))
++ dst_release(route.tuple[dir].dst);
++ dst_release(route.tuple[!dir].dst);
++
++ return XT_CONTINUE;
++
++err_flow_add:
++ flow_offload_free(flow);
++err_flow_alloc:
++ if (!(ct->status & IPS_NAT_MASK))
++ dst_release(route.tuple[dir].dst);
++ dst_release(route.tuple[!dir].dst);
++err_flow_route:
++ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
++
++ return XT_CONTINUE;
++}
++
++static int flowoffload_chk(const struct xt_tgchk_param *par)
++{
++ struct xt_flowoffload_target_info *info = par->targinfo;
++
++ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
++ return -EINVAL;
++
++ return 0;
++}
++
++static struct xt_target offload_tg_reg __read_mostly = {
++ .family = NFPROTO_UNSPEC,
++ .name = "FLOWOFFLOAD",
++ .revision = 0,
++ .targetsize = sizeof(struct xt_flowoffload_target_info),
++ .usersize = sizeof(struct xt_flowoffload_target_info),
++ .checkentry = flowoffload_chk,
++ .target = flowoffload_tg,
++ .me = THIS_MODULE,
++};
++
++static int flow_offload_netdev_event(struct notifier_block *this,
++ unsigned long event, void *ptr)
++{
++ struct xt_flowoffload_hook *hook0, *hook1;
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++ if (event != NETDEV_UNREGISTER)
++ return NOTIFY_DONE;
++
++ spin_lock_bh(&hooks_lock);
++ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
++ if (hook0)
++ hlist_del(&hook0->list);
++
++ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
++ if (hook1)
++ hlist_del(&hook1->list);
++ spin_unlock_bh(&hooks_lock);
++
++ if (hook0) {
++ nf_unregister_net_hook(hook0->net, &hook0->ops);
++ kfree(hook0);
++ }
++
++ if (hook1) {
++ nf_unregister_net_hook(hook1->net, &hook1->ops);
++ kfree(hook1);
++ }
++
++ nf_flow_table_cleanup(dev);
++
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block flow_offload_netdev_notifier = {
++ .notifier_call = flow_offload_netdev_event,
++};
++
++static int nf_flow_rule_route_inet(struct net *net,
++ const struct flow_offload *flow,
++ enum flow_offload_tuple_dir dir,
++ struct nf_flow_rule *flow_rule)
++{
++ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
++ int err;
++
++ switch (flow_tuple->l3proto) {
++ case NFPROTO_IPV4:
++ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
++ break;
++ case NFPROTO_IPV6:
++ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
++ break;
++ default:
++ err = -1;
++ break;
++ }
++
++ return err;
++}
++
++static struct nf_flowtable_type flowtable_inet = {
++ .family = NFPROTO_INET,
++ .init = nf_flow_table_init,
++ .setup = nf_flow_table_offload_setup,
++ .action = nf_flow_rule_route_inet,
++ .free = nf_flow_table_free,
++ .hook = xt_flowoffload_net_hook,
++ .owner = THIS_MODULE,
++};
++
++static int init_flowtable(struct xt_flowoffload_table *tbl)
++{
++ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
++ tbl->ft.type = &flowtable_inet;
++
++ return nf_flow_table_init(&tbl->ft);
++}
++
++static int __init xt_flowoffload_tg_init(void)
++{
++ int ret;
++
++ register_netdevice_notifier(&flow_offload_netdev_notifier);
++
++ ret = init_flowtable(&flowtable[0]);
++ if (ret)
++ return ret;
++
++ ret = init_flowtable(&flowtable[1]);
++ if (ret)
++ goto cleanup;
++
++ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
++
++ ret = xt_register_target(&offload_tg_reg);
++ if (ret)
++ goto cleanup2;
++
++ return 0;
++
++cleanup2:
++ nf_flow_table_free(&flowtable[1].ft);
++cleanup:
++ nf_flow_table_free(&flowtable[0].ft);
++ return ret;
++}
++
++static void __exit xt_flowoffload_tg_exit(void)
++{
++ xt_unregister_target(&offload_tg_reg);
++ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++ nf_flow_table_free(&flowtable[0].ft);
++ nf_flow_table_free(&flowtable[1].ft);
++}
++
++MODULE_LICENSE("GPL");
++module_init(xt_flowoffload_tg_init);
++module_exit(xt_flowoffload_tg_exit);
+--
+2.18.0
+