autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9990-mt7622-backport-nf-hw-offload-framework-and-ups.patch - openwrt/feeds/mtk-openwrt-feeds - Gitiles

 From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
 From: Bo Jiao <Bo.Jiao@mediatek.com>
 Date: Wed, 22 Jun 2022 09:42:19 +0800
 Subject: [PATCH 1/8]
  9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2

 ---
  drivers/net/ethernet/mediatek/Makefile        |    3 +-
  drivers/net/ethernet/mediatek/mtk_eth_soc.c   |   28 +-
  drivers/net/ethernet/mediatek/mtk_eth_soc.h   |   20 +-
  drivers/net/ethernet/mediatek/mtk_ppe.c       |  509 +++++++
  drivers/net/ethernet/mediatek/mtk_ppe.h       |  288 ++++
  .../net/ethernet/mediatek/mtk_ppe_debugfs.c   |  214 +++
  .../net/ethernet/mediatek/mtk_ppe_offload.c   |  526 ++++++++
  drivers/net/ethernet/mediatek/mtk_ppe_regs.h  |  144 ++
  drivers/net/ppp/ppp_generic.c                 |   22 +
  drivers/net/ppp/pppoe.c                       |   24 +
  include/linux/netdevice.h                     |   60 +
  include/linux/ppp_channel.h                   |    3 +
  include/net/dsa.h                             |   10 +
  include/net/flow_offload.h                    |    4 +
  include/net/ip6_route.h                       |    5 +-
  .../net/netfilter/ipv6/nf_conntrack_ipv6.h    |    3 -
  include/net/netfilter/nf_conntrack.h          |   12 +
  include/net/netfilter/nf_conntrack_acct.h     |   11 +
  include/net/netfilter/nf_flow_table.h         |  264 +++-
  include/net/netns/conntrack.h                 |    6 +
  .../linux/netfilter/nf_conntrack_common.h     |    9 +-
  include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h |   17 +
  net/8021q/vlan_dev.c                          |   21 +
  net/bridge/br_device.c                        |   49 +
  net/bridge/br_private.h                       |   20 +
  net/bridge/br_vlan.c                          |   55 +
  net/core/dev.c                                |   46 +
  net/dsa/dsa.c                                 |    9 +
  net/dsa/slave.c                               |   41 +-
  net/ipv4/netfilter/Kconfig                    |    4 +-
  net/ipv6/ip6_output.c                         |    2 +-
  net/ipv6/netfilter/Kconfig                    |    3 +-
  net/ipv6/route.c                              |   22 +-
  net/netfilter/Kconfig                         |   14 +-
  net/netfilter/Makefile                        |    4 +-
  net/netfilter/nf_conntrack_core.c             |   20 +-
  net/netfilter/nf_conntrack_proto_tcp.c        |    4 +
  net/netfilter/nf_conntrack_proto_udp.c        |    4 +
  net/netfilter/nf_conntrack_standalone.c       |   34 +-
  net/netfilter/nf_flow_table_core.c            |  446 +++---
  net/netfilter/nf_flow_table_ip.c              |  455 ++++---
  net/netfilter/nf_flow_table_offload.c         | 1191 +++++++++++++++++
  net/netfilter/xt_FLOWOFFLOAD.c                |  719 ++++++++++
  43 files changed, 4913 insertions(+), 432 deletions(-)
  create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
  create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
  create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
  create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
  create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
  create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
  create mode 100644 net/netfilter/nf_flow_table_offload.c
  create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c

 diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
 index 13c5b4e8f..0a6af99f1 100755
 --- a/drivers/net/ethernet/mediatek/Makefile
 +++ b/drivers/net/ethernet/mediatek/Makefile
 @@ -4,5 +4,6 @@
  #

  obj-$(CONFIG_NET_MEDIATEK_SOC)			+= mtk_eth.o
 -mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
 +mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o	\
 +	     mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
  obj-$(CONFIG_NET_MEDIATEK_HNAT)			+= mtk_hnat/
 diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
 index 2b21f7ed0..819d8a0be 100755
 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
 +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
 @@ -2654,12 +2654,17 @@ static int mtk_open(struct net_device *dev)

  	/* we run 2 netdevs on the same dma ring so we only bring it up once */
  	if (!refcount_read(&eth->dma_refcnt)) {
 -		int err = mtk_start_dma(eth);
 +		u32 gdm_config = MTK_GDMA_TO_PDMA;
 +		int err;

 +		err = mtk_start_dma(eth);
  		if (err)
  			return err;

 -		mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
 +		if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
 +			gdm_config = MTK_GDMA_TO_PPE;
 +
 +		mtk_gdm_config(eth, gdm_config);

  		/* Indicates CDM to parse the MTK special tag from CPU */
  		if (netdev_uses_dsa(dev)) {
 @@ -2772,6 +2777,9 @@ static int mtk_stop(struct net_device *dev)

  	mtk_dma_free(eth);

 +	if (eth->soc->offload_version)
 +		mtk_ppe_stop(&eth->ppe);
 +
  	return 0;
  }

 @@ -3391,6 +3399,7 @@ static const struct net_device_ops mtk_netdev_ops = {
  #ifdef CONFIG_NET_POLL_CONTROLLER
  	.ndo_poll_controller	= mtk_poll_controller,
  #endif
 +	.ndo_setup_tc		= mtk_eth_setup_tc,
  };

  static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 @@ -3682,6 +3691,17 @@ static int mtk_probe(struct platform_device *pdev)
  			goto err_free_dev;
  	}

 +	if (eth->soc->offload_version) {
 +		err = mtk_ppe_init(&eth->ppe, eth->dev,
 +				   eth->base + MTK_ETH_PPE_BASE, 2);
 +		if (err)
 +			goto err_free_dev;
 +
 +		err = mtk_eth_offload_init(eth);
 +		if (err)
 +			goto err_free_dev;
 +	}
 +
  	for (i = 0; i < MTK_MAX_DEVS; i++) {
  		if (!eth->netdev[i])
  			continue;
 @@ -3781,6 +3801,7 @@ static const struct mtk_soc_data mt2701_data = {
  	.required_clks = MT7623_CLKS_BITMAP,
  	.required_pctl = true,
  	.has_sram = false,
 +	.offload_version = 2,
  };

  static const struct mtk_soc_data mt7621_data = {
 @@ -3789,6 +3810,7 @@ static const struct mtk_soc_data mt7621_data = {
  	.required_clks = MT7621_CLKS_BITMAP,
  	.required_pctl = false,
  	.has_sram = false,
 +	.offload_version = 2,
  };

  static const struct mtk_soc_data mt7622_data = {
 @@ -3798,6 +3820,7 @@ static const struct mtk_soc_data mt7622_data = {
  	.required_clks = MT7622_CLKS_BITMAP,
  	.required_pctl = false,
  	.has_sram = false,
 +	.offload_version = 2,
  };

  static const struct mtk_soc_data mt7623_data = {
 @@ -3806,6 +3829,7 @@ static const struct mtk_soc_data mt7623_data = {
  	.required_clks = MT7623_CLKS_BITMAP,
  	.required_pctl = true,
  	.has_sram = false,
 +	.offload_version = 2,
  };

  static const struct mtk_soc_data mt7629_data = {
 diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
 index b6380ffeb..349f98503 100755
 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
 +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
 @@ -15,6 +15,8 @@
  #include <linux/u64_stats_sync.h>
  #include <linux/refcount.h>
  #include <linux/phylink.h>
 +#include <linux/rhashtable.h>
 +#include "mtk_ppe.h"

  #define MTK_QDMA_PAGE_SIZE	2048
  #define	MTK_MAX_RX_LENGTH	1536
 @@ -37,7 +39,8 @@
  				 NETIF_F_HW_VLAN_CTAG_TX | \
  				 NETIF_F_SG | NETIF_F_TSO | \
  				 NETIF_F_TSO6 | \
 -				 NETIF_F_IPV6_CSUM)
 +				 NETIF_F_IPV6_CSUM |\
 +				 NETIF_F_HW_TC)
  #define MTK_SET_FEATURES	(NETIF_F_LRO | \
  				 NETIF_F_HW_VLAN_CTAG_RX)
  #define MTK_HW_FEATURES_MT7628	(NETIF_F_SG | NETIF_F_RXCSUM)
 @@ -107,6 +110,7 @@
  #define MTK_GDMA_TCS_EN		BIT(21)
  #define MTK_GDMA_UCS_EN		BIT(20)
  #define MTK_GDMA_TO_PDMA	0x0
 +#define MTK_GDMA_TO_PPE		0x4444
  #define MTK_GDMA_DROP_ALL	0x7777

  /* Unicast Filter MAC Address Register - Low */
 @@ -547,6 +551,12 @@
  #define RX_DMA_TCI(_x)		((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
  #define RX_DMA_VPID(_x)		(((_x) >> 16) & 0xffff)

 +/* QDMA descriptor rxd4 */
 +#define MTK_RXD4_FOE_ENTRY	GENMASK(13, 0)
 +#define MTK_RXD4_PPE_CPU_REASON	GENMASK(18, 14)
 +#define MTK_RXD4_SRC_PORT	GENMASK(21, 19)
 +#define MTK_RXD4_ALG		GENMASK(31, 22)
 +
  /* QDMA descriptor rxd4 */
  #define RX_DMA_L4_VALID		BIT(24)
  #define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
 @@ -1158,6 +1168,7 @@ struct mtk_soc_data {
  	u32		caps;
  	u32		required_clks;
  	bool		required_pctl;
 +	u8		offload_version;
  	netdev_features_t hw_features;
  	bool		has_sram;
  };
 @@ -1271,6 +1282,9 @@ struct mtk_eth {
  	int				ip_align;
  	spinlock_t			syscfg0_lock;
  	struct timer_list		mtk_dma_monitor_timer;
 +
 +	struct mtk_ppe			ppe;
 +	struct rhashtable		flow_table;
  };

  /* struct mtk_mac -	the structure that holds the info about the MACs of the
 @@ -1319,4 +1333,8 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
  void mtk_gdm_config(struct mtk_eth *eth, u32 config);
  void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);

 +int mtk_eth_offload_init(struct mtk_eth *eth);
 +int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
 +		     void *type_data);
 +
  #endif /* MTK_ETH_H */
 diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
 new file mode 100644
 index 000000000..66298e223
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
 @@ -0,0 +1,509 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
 +
 +#include <linux/kernel.h>
 +#include <linux/io.h>
 +#include <linux/iopoll.h>
 +#include <linux/etherdevice.h>
 +#include <linux/platform_device.h>
 +#include "mtk_ppe.h"
 +#include "mtk_ppe_regs.h"
 +
 +static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
 +{
 +	writel(val, ppe->base + reg);
 +}
 +
 +static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
 +{
 +	return readl(ppe->base + reg);
 +}
 +
 +static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
 +{
 +	u32 val;
 +
 +	val = ppe_r32(ppe, reg);
 +	val &= ~mask;
 +	val |= set;
 +	ppe_w32(ppe, reg, val);
 +
 +	return val;
 +}
 +
 +static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
 +{
 +	return ppe_m32(ppe, reg, 0, val);
 +}
 +
 +static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
 +{
 +	return ppe_m32(ppe, reg, val, 0);
 +}
 +
 +static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
 +{
 +	int ret;
 +	u32 val;
 +
 +	ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
 +				 !(val & MTK_PPE_GLO_CFG_BUSY),
 +				 20, MTK_PPE_WAIT_TIMEOUT_US);
 +
 +	if (ret)
 +		dev_err(ppe->dev, "PPE table busy");
 +
 +	return ret;
 +}
 +
 +static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
 +{
 +	ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
 +	ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
 +}
 +
 +static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
 +{
 +	mtk_ppe_cache_clear(ppe);
 +
 +	ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
 +		enable * MTK_PPE_CACHE_CTL_EN);
 +}
 +
 +static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
 +{
 +	u32 hv1, hv2, hv3;
 +	u32 hash;
 +
 +	switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
 +		case MTK_PPE_PKT_TYPE_BRIDGE:
 +			hv1 = e->bridge.src_mac_lo;
 +			hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
 +			hv2 = e->bridge.src_mac_hi >> 16;
 +			hv2 ^= e->bridge.dest_mac_lo;
 +			hv3 = e->bridge.dest_mac_hi;
 +			break;
 +		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
 +		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
 +			hv1 = e->ipv4.orig.ports;
 +			hv2 = e->ipv4.orig.dest_ip;
 +			hv3 = e->ipv4.orig.src_ip;
 +			break;
 +		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
 +		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
 +			hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
 +			hv1 ^= e->ipv6.ports;
 +
 +			hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
 +			hv2 ^= e->ipv6.dest_ip[0];
 +
 +			hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
 +			hv3 ^= e->ipv6.src_ip[0];
 +			break;
 +		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
 +		case MTK_PPE_PKT_TYPE_IPV6_6RD:
 +		default:
 +			WARN_ON_ONCE(1);
 +			return MTK_PPE_HASH_MASK;
 +	}
 +
 +	hash = (hv1 & hv2) | ((~hv1) & hv3);
 +	hash = (hash >> 24) | ((hash & 0xffffff) << 8);
 +	hash ^= hv1 ^ hv2 ^ hv3;
 +	hash ^= hash >> 16;
 +	hash <<= 1;
 +	hash &= MTK_PPE_ENTRIES - 1;
 +
 +	return hash;
 +}
 +
 +static inline struct mtk_foe_mac_info *
 +mtk_foe_entry_l2(struct mtk_foe_entry *entry)
 +{
 +	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
 +
 +	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
 +		return &entry->ipv6.l2;
 +
 +	return &entry->ipv4.l2;
 +}
 +
 +static inline u32 *
 +mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
 +{
 +	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
 +
 +	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
 +		return &entry->ipv6.ib2;
 +
 +	return &entry->ipv4.ib2;
 +}
 +
 +int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
 +			  u8 pse_port, u8 *src_mac, u8 *dest_mac)
 +{
 +	struct mtk_foe_mac_info *l2;
 +	u32 ports_pad, val;
 +
 +	memset(entry, 0, sizeof(*entry));
 +
 +	val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
 +	      FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
 +	      FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
 +	      MTK_FOE_IB1_BIND_TTL |
 +	      MTK_FOE_IB1_BIND_CACHE;
 +	entry->ib1 = val;
 +
 +	val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
 +	      FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
 +	      FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
 +
 +	if (is_multicast_ether_addr(dest_mac))
 +		val |= MTK_FOE_IB2_MULTICAST;
 +
 +	ports_pad = 0xa5a5a500 | (l4proto & 0xff);
 +	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
 +		entry->ipv4.orig.ports = ports_pad;
 +	if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
 +		entry->ipv6.ports = ports_pad;
 +
 +	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
 +		entry->ipv6.ib2 = val;
 +		l2 = &entry->ipv6.l2;
 +	} else {
 +		entry->ipv4.ib2 = val;
 +		l2 = &entry->ipv4.l2;
 +	}
 +
 +	l2->dest_mac_hi = get_unaligned_be32(dest_mac);
 +	l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
 +	l2->src_mac_hi = get_unaligned_be32(src_mac);
 +	l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
 +
 +	if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
 +		l2->etype = ETH_P_IPV6;
 +	else
 +		l2->etype = ETH_P_IP;
 +
 +	return 0;
 +}
 +
 +int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
 +{
 +	u32 *ib2 = mtk_foe_entry_ib2(entry);
 +	u32 val;
 +
 +	val = *ib2;
 +	val &= ~MTK_FOE_IB2_DEST_PORT;
 +	val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
 +	*ib2 = val;
 +
 +	return 0;
 +}
 +
 +int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
 +				 __be32 src_addr, __be16 src_port,
 +				 __be32 dest_addr, __be16 dest_port)
 +{
 +	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
 +	struct mtk_ipv4_tuple *t;
 +
 +	switch (type) {
 +	case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
 +		if (egress) {
 +			t = &entry->ipv4.new;
 +			break;
 +		}
 +		fallthrough;
 +	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
 +	case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
 +		t = &entry->ipv4.orig;
 +		break;
 +	case MTK_PPE_PKT_TYPE_IPV6_6RD:
 +		entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
 +		entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
 +		return 0;
 +	default:
 +		WARN_ON_ONCE(1);
 +		return -EINVAL;
 +	}
 +
 +	t->src_ip = be32_to_cpu(src_addr);
 +	t->dest_ip = be32_to_cpu(dest_addr);
 +
 +	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
 +		return 0;
 +
 +	t->src_port = be16_to_cpu(src_port);
 +	t->dest_port = be16_to_cpu(dest_port);
 +
 +	return 0;
 +}
 +
 +int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
 +				 __be32 *src_addr, __be16 src_port,
 +				 __be32 *dest_addr, __be16 dest_port)
 +{
 +	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
 +	u32 *src, *dest;
 +	int i;
 +
 +	switch (type) {
 +	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
 +		src = entry->dslite.tunnel_src_ip;
 +		dest = entry->dslite.tunnel_dest_ip;
 +		break;
 +	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
 +	case MTK_PPE_PKT_TYPE_IPV6_6RD:
 +		entry->ipv6.src_port = be16_to_cpu(src_port);
 +		entry->ipv6.dest_port = be16_to_cpu(dest_port);
 +		fallthrough;
 +	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
 +		src = entry->ipv6.src_ip;
 +		dest = entry->ipv6.dest_ip;
 +		break;
 +	default:
 +		WARN_ON_ONCE(1);
 +		return -EINVAL;
 +	}
 +
 +	for (i = 0; i < 4; i++)
 +		src[i] = be32_to_cpu(src_addr[i]);
 +	for (i = 0; i < 4; i++)
 +		dest[i] = be32_to_cpu(dest_addr[i]);
 +
 +	return 0;
 +}
 +
 +int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
 +{
 +	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
 +
 +	l2->etype = BIT(port);
 +
 +	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
 +		entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
 +	else
 +		l2->etype |= BIT(8);
 +
 +	entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
 +
 +	return 0;
 +}
 +
 +int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
 +{
 +	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
 +
 +	switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
 +	case 0:
 +		entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
 +			      FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
 +		l2->vlan1 = vid;
 +		return 0;
 +	case 1:
 +		if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
 +			l2->vlan1 = vid;
 +			l2->etype |= BIT(8);
 +		} else {
 +			l2->vlan2 = vid;
 +			entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
 +		}
 +		return 0;
 +	default:
 +		return -ENOSPC;
 +	}
 +}
 +
 +int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
 +{
 +	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
 +
 +	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
 +	    (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
 +		l2->etype = ETH_P_PPP_SES;
 +
 +	entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
 +	l2->pppoe_id = sid;
 +
 +	return 0;
 +}
 +
 +static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
 +{
 +	return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
 +	       FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
 +}
 +
 +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
 +			 u16 timestamp)
 +{
 +	struct mtk_foe_entry *hwe;
 +	u32 hash;
 +
 +	timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
 +	entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
 +	entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
 +
 +	hash = mtk_ppe_hash_entry(entry);
 +	hwe = &ppe->foe_table[hash];
 +	if (!mtk_foe_entry_usable(hwe)) {
 +		hwe++;
 +		hash++;
 +
 +		if (!mtk_foe_entry_usable(hwe))
 +			return -ENOSPC;
 +	}
 +
 +	memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
 +	wmb();
 +	hwe->ib1 = entry->ib1;
 +
 +	dma_wmb();
 +
 +	mtk_ppe_cache_clear(ppe);
 +
 +	return hash;
 +}
 +
 +int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
 +		 int version)
 +{
 +	struct mtk_foe_entry *foe;
 +
 +	/* need to allocate a separate device, since it PPE DMA access is
 +	 * not coherent.
 +	 */
 +	ppe->base = base;
 +	ppe->dev = dev;
 +	ppe->version = version;
 +
 +	foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
 +				  &ppe->foe_phys, GFP_KERNEL);
 +	if (!foe)
 +		return -ENOMEM;
 +
 +	ppe->foe_table = foe;
 +
 +	mtk_ppe_debugfs_init(ppe);
 +
 +	return 0;
 +}
 +
 +static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
 +{
 +	static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
 +	int i, k;
 +
 +	memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
 +
 +	if (!IS_ENABLED(CONFIG_SOC_MT7621))
 +		return;
 +
 +	/* skip all entries that cross the 1024 byte boundary */
 +	for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
 +		for (k = 0; k < ARRAY_SIZE(skip); k++)
 +			ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
 +}
 +
 +int mtk_ppe_start(struct mtk_ppe *ppe)
 +{
 +	u32 val;
 +
 +	mtk_ppe_init_foe_table(ppe);
 +	ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
 +
 +	val = MTK_PPE_TB_CFG_ENTRY_80B |
 +	      MTK_PPE_TB_CFG_AGE_NON_L4 |
 +	      MTK_PPE_TB_CFG_AGE_UNBIND |
 +	      MTK_PPE_TB_CFG_AGE_TCP |
 +	      MTK_PPE_TB_CFG_AGE_UDP |
 +	      MTK_PPE_TB_CFG_AGE_TCP_FIN |
 +	      FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
 +			 MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
 +	      FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
 +			 MTK_PPE_KEEPALIVE_DISABLE) |
 +	      FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
 +	      FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
 +			 MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
 +	      FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
 +			 MTK_PPE_ENTRIES_SHIFT);
 +	ppe_w32(ppe, MTK_PPE_TB_CFG, val);
 +
 +	ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
 +		MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
 +
 +	mtk_ppe_cache_enable(ppe, true);
 +
 +	val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
 +	      MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
 +	      MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
 +	      MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
 +	      MTK_PPE_FLOW_CFG_IP6_6RD |
 +	      MTK_PPE_FLOW_CFG_IP4_NAT |
 +	      MTK_PPE_FLOW_CFG_IP4_NAPT |
 +	      MTK_PPE_FLOW_CFG_IP4_DSLITE |
 +	      MTK_PPE_FLOW_CFG_L2_BRIDGE |
 +	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
 +	ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
 +
 +	val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
 +	      FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
 +	ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
 +
 +	val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
 +	      FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
 +	ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
 +
 +	val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
 +	      FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
 +	ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
 +
 +	val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
 +	ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
 +
 +	val = MTK_PPE_BIND_LIMIT1_FULL |
 +	      FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
 +	ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
 +
 +	val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
 +	      FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
 +	ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
 +
 +	/* enable PPE */
 +	val = MTK_PPE_GLO_CFG_EN |
 +	      MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
 +	      MTK_PPE_GLO_CFG_IP4_CS_DROP |
 +	      MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
 +	ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
 +
 +	ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
 +
 +	return 0;
 +}
 +
 +int mtk_ppe_stop(struct mtk_ppe *ppe)
 +{
 +	u32 val;
 +	int i;
 +
 +	for (i = 0; i < MTK_PPE_ENTRIES; i++)
 +		ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
 +						   MTK_FOE_STATE_INVALID);
 +
 +	mtk_ppe_cache_enable(ppe, false);
 +
 +	/* disable offload engine */
 +	ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
 +	ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
 +
 +	/* disable aging */
 +	val = MTK_PPE_TB_CFG_AGE_NON_L4 |
 +	      MTK_PPE_TB_CFG_AGE_UNBIND |
 +	      MTK_PPE_TB_CFG_AGE_TCP |
 +	      MTK_PPE_TB_CFG_AGE_UDP |
 +	      MTK_PPE_TB_CFG_AGE_TCP_FIN;
 +	ppe_clear(ppe, MTK_PPE_TB_CFG, val);
 +
 +	return mtk_ppe_wait_busy(ppe);
 +}
 diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
 new file mode 100644
 index 000000000..242fb8f2a
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
 @@ -0,0 +1,288 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
 +
 +#ifndef __MTK_PPE_H
 +#define __MTK_PPE_H
 +
 +#include <linux/kernel.h>
 +#include <linux/bitfield.h>
 +
 +#define MTK_ETH_PPE_BASE		0xc00
 +
 +#define MTK_PPE_ENTRIES_SHIFT		3
 +#define MTK_PPE_ENTRIES			(1024 << MTK_PPE_ENTRIES_SHIFT)
 +#define MTK_PPE_HASH_MASK		(MTK_PPE_ENTRIES - 1)
 +#define MTK_PPE_WAIT_TIMEOUT_US		1000000
 +
 +#define MTK_FOE_IB1_UNBIND_TIMESTAMP	GENMASK(7, 0)
 +#define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(23, 8)
 +#define MTK_FOE_IB1_UNBIND_PREBIND	BIT(24)
 +
 +#define MTK_FOE_IB1_BIND_TIMESTAMP	GENMASK(14, 0)
 +#define MTK_FOE_IB1_BIND_KEEPALIVE	BIT(15)
 +#define MTK_FOE_IB1_BIND_VLAN_LAYER	GENMASK(18, 16)
 +#define MTK_FOE_IB1_BIND_PPPOE		BIT(19)
 +#define MTK_FOE_IB1_BIND_VLAN_TAG	BIT(20)
 +#define MTK_FOE_IB1_BIND_PKT_SAMPLE	BIT(21)
 +#define MTK_FOE_IB1_BIND_CACHE		BIT(22)
 +#define MTK_FOE_IB1_BIND_TUNNEL_DECAP	BIT(23)
 +#define MTK_FOE_IB1_BIND_TTL		BIT(24)
 +
 +#define MTK_FOE_IB1_PACKET_TYPE		GENMASK(27, 25)
 +#define MTK_FOE_IB1_STATE		GENMASK(29, 28)
 +#define MTK_FOE_IB1_UDP			BIT(30)
 +#define MTK_FOE_IB1_STATIC		BIT(31)
 +
 +enum {
 +	MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
 +	MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
 +	MTK_PPE_PKT_TYPE_BRIDGE = 2,
 +	MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
 +	MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
 +	MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
 +	MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
 +};
 +
 +#define MTK_FOE_IB2_QID			GENMASK(3, 0)
 +#define MTK_FOE_IB2_PSE_QOS		BIT(4)
 +#define MTK_FOE_IB2_DEST_PORT		GENMASK(7, 5)
 +#define MTK_FOE_IB2_MULTICAST		BIT(8)
 +
 +#define MTK_FOE_IB2_WHNAT_QID2		GENMASK(13, 12)
 +#define MTK_FOE_IB2_WHNAT_DEVIDX	BIT(16)
 +#define MTK_FOE_IB2_WHNAT_NAT		BIT(17)
 +
 +#define MTK_FOE_IB2_PORT_MG		GENMASK(17, 12)
 +
 +#define MTK_FOE_IB2_PORT_AG		GENMASK(23, 18)
 +
 +#define MTK_FOE_IB2_DSCP		GENMASK(31, 24)
 +
 +#define MTK_FOE_VLAN2_WHNAT_BSS		GEMMASK(5, 0)
 +#define MTK_FOE_VLAN2_WHNAT_WCID	GENMASK(13, 6)
 +#define MTK_FOE_VLAN2_WHNAT_RING	GENMASK(15, 14)
 +
 +enum {
 +	MTK_FOE_STATE_INVALID,
 +	MTK_FOE_STATE_UNBIND,
 +	MTK_FOE_STATE_BIND,
 +	MTK_FOE_STATE_FIN
 +};
 +
 +struct mtk_foe_mac_info {
 +	u16 vlan1;
 +	u16 etype;
 +
 +	u32 dest_mac_hi;
 +
 +	u16 vlan2;
 +	u16 dest_mac_lo;
 +
 +	u32 src_mac_hi;
 +
 +	u16 pppoe_id;
 +	u16 src_mac_lo;
 +};
 +
 +struct mtk_foe_bridge {
 +	u32 dest_mac_hi;
 +
 +	u16 src_mac_lo;
 +	u16 dest_mac_lo;
 +
 +	u32 src_mac_hi;
 +
 +	u32 ib2;
 +
 +	u32 _rsv[5];
 +
 +	u32 udf_tsid;
 +	struct mtk_foe_mac_info l2;
 +};
 +
 +struct mtk_ipv4_tuple {
 +	u32 src_ip;
 +	u32 dest_ip;
 +	union {
 +		struct {
 +			u16 dest_port;
 +			u16 src_port;
 +		};
 +		struct {
 +			u8 protocol;
 +			u8 _pad[3]; /* fill with 0xa5a5a5 */
 +		};
 +		u32 ports;
 +	};
 +};
 +
 +struct mtk_foe_ipv4 {
 +	struct mtk_ipv4_tuple orig;
 +
 +	u32 ib2;
 +
 +	struct mtk_ipv4_tuple new;
 +
 +	u16 timestamp;
 +	u16 _rsv0[3];
 +
 +	u32 udf_tsid;
 +
 +	struct mtk_foe_mac_info l2;
 +};
 +
 +struct mtk_foe_ipv4_dslite {
 +	struct mtk_ipv4_tuple ip4;
 +
 +	u32 tunnel_src_ip[4];
 +	u32 tunnel_dest_ip[4];
 +
 +	u8 flow_label[3];
 +	u8 priority;
 +
 +	u32 udf_tsid;
 +
 +	u32 ib2;
 +
 +	struct mtk_foe_mac_info l2;
 +};
 +
 +struct mtk_foe_ipv6 {
 +	u32 src_ip[4];
 +	u32 dest_ip[4];
 +
 +	union {
 +		struct {
 +			u8 protocol;
 +			u8 _pad[3]; /* fill with 0xa5a5a5 */
 +		}; /* 3-tuple */
 +		struct {
 +			u16 dest_port;
 +			u16 src_port;
 +		}; /* 5-tuple */
 +		u32 ports;
 +	};
 +
 +	u32 _rsv[3];
 +
 +	u32 udf;
 +
 +	u32 ib2;
 +	struct mtk_foe_mac_info l2;
 +};
 +
 +struct mtk_foe_ipv6_6rd {
 +	u32 src_ip[4];
 +	u32 dest_ip[4];
 +	u16 dest_port;
 +	u16 src_port;
 +
 +	u32 tunnel_src_ip;
 +	u32 tunnel_dest_ip;
 +
 +	u16 hdr_csum;
 +	u8 dscp;
 +	u8 ttl;
 +
 +	u8 flag;
 +	u8 pad;
 +	u8 per_flow_6rd_id;
 +	u8 pad2;
 +
 +	u32 ib2;
 +	struct mtk_foe_mac_info l2;
 +};
 +
 +struct mtk_foe_entry {
 +	u32 ib1;
 +
 +	union {
 +		struct mtk_foe_bridge bridge;
 +		struct mtk_foe_ipv4 ipv4;
 +		struct mtk_foe_ipv4_dslite dslite;
 +		struct mtk_foe_ipv6 ipv6;
 +		struct mtk_foe_ipv6_6rd ipv6_6rd;
 +		u32 data[19];
 +	};
 +};
 +
 +enum {
 +	MTK_PPE_CPU_REASON_TTL_EXCEEDED			= 0x02,
 +	MTK_PPE_CPU_REASON_OPTION_HEADER		= 0x03,
 +	MTK_PPE_CPU_REASON_NO_FLOW			= 0x07,
 +	MTK_PPE_CPU_REASON_IPV4_FRAG			= 0x08,
 +	MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG		= 0x09,
 +	MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP	= 0x0a,
 +	MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP		= 0x0b,
 +	MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST		= 0x0c,
 +	MTK_PPE_CPU_REASON_UN_HIT			= 0x0d,
 +	MTK_PPE_CPU_REASON_HIT_UNBIND			= 0x0e,
 +	MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED	= 0x0f,
 +	MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN		= 0x10,
 +	MTK_PPE_CPU_REASON_HIT_TTL_1			= 0x11,
 +	MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION	= 0x12,
 +	MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR		= 0x13,
 +	MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR		= 0x14,
 +	MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR	= 0x15,
 +	MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU		= 0x16,
 +	MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER		= 0x17,
 +	MTK_PPE_CPU_REASON_MULTICAST_TO_CPU		= 0x18,
 +	MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU	= 0x19,
 +	MTK_PPE_CPU_REASON_HIT_PRE_BIND			= 0x1a,
 +	MTK_PPE_CPU_REASON_PACKET_SAMPLING		= 0x1b,
 +	MTK_PPE_CPU_REASON_EXCEED_MTU			= 0x1c,
 +	MTK_PPE_CPU_REASON_PPE_BYPASS			= 0x1e,
 +	MTK_PPE_CPU_REASON_INVALID			= 0x1f,
 +};
 +
 +struct mtk_ppe {
 +	struct device *dev;
 +	void __iomem *base;
 +	int version;
 +
 +	struct mtk_foe_entry *foe_table;
 +	dma_addr_t foe_phys;
 +
 +	void *acct_table;
 +};
 +
 +int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
 +		 int version);
 +int mtk_ppe_start(struct mtk_ppe *ppe);
 +int mtk_ppe_stop(struct mtk_ppe *ppe);
 +
 +static inline void
 +mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
 +{
 +	ppe->foe_table[hash].ib1 = 0;
 +	dma_wmb();
 +}
 +
 +static inline int
 +mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
 +{
 +	u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
 +
 +	if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
 +		return -1;
 +
 +	return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
 +}
 +
 +int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
 +			  u8 pse_port, u8 *src_mac, u8 *dest_mac);
 +int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
 +int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
 +				 __be32 src_addr, __be16 src_port,
 +				 __be32 dest_addr, __be16 dest_port);
 +int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
 +				 __be32 *src_addr, __be16 src_port,
 +				 __be32 *dest_addr, __be16 dest_port);
 +int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
 +int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
 +int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
 +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
 +			 u16 timestamp);
 +int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
 +
 +#endif
 diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
 new file mode 100644
 index 000000000..d4b482340
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
 @@ -0,0 +1,214 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
 +
 +#include <linux/kernel.h>
 +#include <linux/debugfs.h>
 +#include "mtk_eth_soc.h"
 +
 +struct mtk_flow_addr_info
 +{
 +	void *src, *dest;
 +	u16 *src_port, *dest_port;
 +	bool ipv6;
 +};
 +
 +static const char *mtk_foe_entry_state_str(int state)
 +{
 +	static const char * const state_str[] = {
 +		[MTK_FOE_STATE_INVALID] = "INV",
 +		[MTK_FOE_STATE_UNBIND] = "UNB",
 +		[MTK_FOE_STATE_BIND] = "BND",
 +		[MTK_FOE_STATE_FIN] = "FIN",
 +	};
 +
 +	if (state >= ARRAY_SIZE(state_str) || !state_str[state])
 +		return "UNK";
 +
 +	return state_str[state];
 +}
 +
 +static const char *mtk_foe_pkt_type_str(int type)
 +{
 +	static const char * const type_str[] = {
 +		[MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
 +		[MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
 +		[MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
 +		[MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
 +		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
 +		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
 +		[MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
 +	};
 +
 +	if (type >= ARRAY_SIZE(type_str) || !type_str[type])
 +		return "UNKNOWN";
 +
 +	return type_str[type];
 +}
 +
 +static void
 +mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
 +{
 +	u32 n_addr[4];
 +	int i;
 +
 +	if (!ipv6) {
 +		seq_printf(m, "%pI4h", addr);
 +		return;
 +	}
 +
 +	for (i = 0; i < ARRAY_SIZE(n_addr); i++)
 +		n_addr[i] = htonl(addr[i]);
 +	seq_printf(m, "%pI6", n_addr);
 +}
 +
 +static void
 +mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
 +{
 +	mtk_print_addr(m, ai->src, ai->ipv6);
 +	if (ai->src_port)
 +		seq_printf(m, ":%d", *ai->src_port);
 +	seq_printf(m, "->");
 +	mtk_print_addr(m, ai->dest, ai->ipv6);
 +	if (ai->dest_port)
 +		seq_printf(m, ":%d", *ai->dest_port);
 +}
 +
 +static int
 +mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
 +{
 +	struct mtk_ppe *ppe = m->private;
 +	int i;
 +
 +	for (i = 0; i < MTK_PPE_ENTRIES; i++) {
 +		struct mtk_foe_entry *entry = &ppe->foe_table[i];
 +		struct mtk_foe_mac_info *l2;
 +		struct mtk_flow_addr_info ai = {};
 +		unsigned char h_source[ETH_ALEN];
 +		unsigned char h_dest[ETH_ALEN];
 +		int type, state;
 +		u32 ib2;
 +
 +
 +		state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
 +		if (!state)
 +			continue;
 +
 +		if (bind && state != MTK_FOE_STATE_BIND)
 +			continue;
 +
 +		type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
 +		seq_printf(m, "%05x %s %7s", i,
 +			   mtk_foe_entry_state_str(state),
 +			   mtk_foe_pkt_type_str(type));
 +
 +		switch (type) {
 +		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
 +		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
 +			ai.src_port = &entry->ipv4.orig.src_port;
 +			ai.dest_port = &entry->ipv4.orig.dest_port;
 +			fallthrough;
 +		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
 +			ai.src = &entry->ipv4.orig.src_ip;
 +			ai.dest = &entry->ipv4.orig.dest_ip;
 +			break;
 +		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
 +			ai.src_port = &entry->ipv6.src_port;
 +			ai.dest_port = &entry->ipv6.dest_port;
 +			fallthrough;
 +		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
 +		case MTK_PPE_PKT_TYPE_IPV6_6RD:
 +			ai.src = &entry->ipv6.src_ip;
 +			ai.dest = &entry->ipv6.dest_ip;
 +			ai.ipv6 = true;
 +			break;
 +		}
 +
 +		seq_printf(m, " orig=");
 +		mtk_print_addr_info(m, &ai);
 +
 +		switch (type) {
 +		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
 +		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
 +			ai.src_port = &entry->ipv4.new.src_port;
 +			ai.dest_port = &entry->ipv4.new.dest_port;
 +			fallthrough;
 +		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
 +			ai.src = &entry->ipv4.new.src_ip;
 +			ai.dest = &entry->ipv4.new.dest_ip;
 +			seq_printf(m, " new=");
 +			mtk_print_addr_info(m, &ai);
 +			break;
 +		}
 +
 +		if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
 +			l2 = &entry->ipv6.l2;
 +			ib2 = entry->ipv6.ib2;
 +		} else {
 +			l2 = &entry->ipv4.l2;
 +			ib2 = entry->ipv4.ib2;
 +		}
 +
 +		*((__be32 *)h_source) = htonl(l2->src_mac_hi);
 +		*((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
 +		*((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
 +		*((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
 +
 +		seq_printf(m, " eth=%pM->%pM etype=%04x"
 +			      " vlan=%d,%d ib1=%08x ib2=%08x\n",
 +			   h_source, h_dest, ntohs(l2->etype),
 +			   l2->vlan1, l2->vlan2, entry->ib1, ib2);
 +	}
 +
 +	return 0;
 +}
 +
 +static int
 +mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
 +{
 +	return mtk_ppe_debugfs_foe_show(m, private, false);
 +}
 +
 +static int
 +mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
 +{
 +	return mtk_ppe_debugfs_foe_show(m, private, true);
 +}
 +
 +static int
 +mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
 +{
 +	return single_open(file, mtk_ppe_debugfs_foe_show_all,
 +			   inode->i_private);
 +}
 +
 +static int
 +mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
 +{
 +	return single_open(file, mtk_ppe_debugfs_foe_show_bind,
 +			   inode->i_private);
 +}
 +
 +int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
 +{
 +	static const struct file_operations fops_all = {
 +		.open = mtk_ppe_debugfs_foe_open_all,
 +		.read = seq_read,
 +		.llseek = seq_lseek,
 +		.release = single_release,
 +	};
 +
 +	static const struct file_operations fops_bind = {
 +		.open = mtk_ppe_debugfs_foe_open_bind,
 +		.read = seq_read,
 +		.llseek = seq_lseek,
 +		.release = single_release,
 +	};
 +
 +	struct dentry *root;
 +
 +	root = debugfs_create_dir("mtk_ppe", NULL);
 +	debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
 +	debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
 +
 +	return 0;
 +}
 diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
 new file mode 100644
 index 000000000..4294f0c74
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
 @@ -0,0 +1,526 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/*
 + *  Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
 + */
 +
 +#include <linux/if_ether.h>
 +#include <linux/rhashtable.h>
 +#include <linux/ip.h>
 +#include <linux/ipv6.h>
 +#include <net/flow_offload.h>
 +#include <net/pkt_cls.h>
 +#include <net/dsa.h>
 +#include "mtk_eth_soc.h"
 +
 +struct mtk_flow_data {
 +	struct ethhdr eth;
 +
 +	union {
 +		struct {
 +			__be32 src_addr;
 +			__be32 dst_addr;
 +		} v4;
 +
 +		struct {
 +			struct in6_addr src_addr;
 +			struct in6_addr dst_addr;
 +		} v6;
 +	};
 +
 +	__be16 src_port;
 +	__be16 dst_port;
 +
 +	struct {
 +		u16 id;
 +		__be16 proto;
 +		u8 num;
 +	} vlan;
 +	struct {
 +		u16 sid;
 +		u8 num;
 +	} pppoe;
 +};
 +
 +struct mtk_flow_entry {
 +	struct rhash_head node;
 +	unsigned long cookie;
 +	u16 hash;
 +};
 +
 +static const struct rhashtable_params mtk_flow_ht_params = {
 +	.head_offset = offsetof(struct mtk_flow_entry, node),
 +	.key_offset = offsetof(struct mtk_flow_entry, cookie),
 +	.key_len = sizeof(unsigned long),
 +	.automatic_shrinking = true,
 +};
 +
 +static u32
 +mtk_eth_timestamp(struct mtk_eth *eth)
 +{
 +	return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
 +}
 +
 +static int
 +mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
 +		       bool egress)
 +{
 +	return mtk_foe_entry_set_ipv4_tuple(foe, egress,
 +					    data->v4.src_addr, data->src_port,
 +					    data->v4.dst_addr, data->dst_port);
 +}
 +
 +static int
 +mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
 +{
 +	return mtk_foe_entry_set_ipv6_tuple(foe,
 +					    data->v6.src_addr.s6_addr32, data->src_port,
 +					    data->v6.dst_addr.s6_addr32, data->dst_port);
 +}
 +
 +static void
 +mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
 +{
 +	void *dest = eth + act->mangle.offset;
 +	const void *src = &act->mangle.val;
 +
 +	if (act->mangle.offset > 8)
 +		return;
 +
 +	if (act->mangle.mask == 0xffff) {
 +		src += 2;
 +		dest += 2;
 +	}
 +
 +	memcpy(dest, src, act->mangle.mask ? 2 : 4);
 +}
 +
 +
 +static int
 +mtk_flow_mangle_ports(const struct flow_action_entry *act,
 +		      struct mtk_flow_data *data)
 +{
 +	u32 val = ntohl(act->mangle.val);
 +
 +	switch (act->mangle.offset) {
 +	case 0:
 +		if (act->mangle.mask == ~htonl(0xffff))
 +			data->dst_port = cpu_to_be16(val);
 +		else
 +			data->src_port = cpu_to_be16(val >> 16);
 +		break;
 +	case 2:
 +		data->dst_port = cpu_to_be16(val);
 +		break;
 +	default:
 +		return -EINVAL;
 +	}
 +
 +	return 0;
 +}
 +
 +static int
 +mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
 +		     struct mtk_flow_data *data)
 +{
 +	__be32 *dest;
 +
 +	switch (act->mangle.offset) {
 +	case offsetof(struct iphdr, saddr):
 +		dest = &data->v4.src_addr;
 +		break;
 +	case offsetof(struct iphdr, daddr):
 +		dest = &data->v4.dst_addr;
 +		break;
 +	default:
 +		return -EINVAL;
 +	}
 +
 +	memcpy(dest, &act->mangle.val, sizeof(u32));
 +
 +	return 0;
 +}
 +
 +static int
 +mtk_flow_get_dsa_port(struct net_device **dev)
 +{
 +#if IS_ENABLED(CONFIG_NET_DSA)
 +	struct dsa_port *dp;
 +
 +	dp = dsa_port_from_netdev(*dev);
 +	if (IS_ERR(dp))
 +		return -ENODEV;
 +
 +	if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
 +		return -ENODEV;
 +
 +	*dev = dp->cpu_dp->master;
 +
 +	return dp->index;
 +#else
 +	return -ENODEV;
 +#endif
 +}
 +
 +static int
 +mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
 +			   struct net_device *dev)
 +{
 +	int pse_port, dsa_port;
 +
 +	dsa_port = mtk_flow_get_dsa_port(&dev);
 +	if (dsa_port >= 0)
 +		mtk_foe_entry_set_dsa(foe, dsa_port);
 +
 +	if (dev == eth->netdev[0])
 +		pse_port = 1;
 +	else if (dev == eth->netdev[1])
 +		pse_port = 2;
 +	else
 +		return -EOPNOTSUPP;
 +
 +	mtk_foe_entry_set_pse_port(foe, pse_port);
 +
 +	return 0;
 +}
 +
 +static int
 +mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
 +{
 +	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 +	struct flow_action_entry *act;
 +	struct mtk_flow_data data = {};
 +	struct mtk_foe_entry foe;
 +	struct net_device *odev = NULL;
 +	struct mtk_flow_entry *entry;
 +	int offload_type = 0;
 +	u16 addr_type = 0;
 +	u32 timestamp;
 +	u8 l4proto = 0;
 +	int err = 0;
 +	int hash;
 +	int i;
 +
 +	if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
 +		return -EEXIST;
 +
 +	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
 +		struct flow_match_meta match;
 +
 +		flow_rule_match_meta(rule, &match);
 +	} else {
 +		return -EOPNOTSUPP;
 +	}
 +
 +	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 +		struct flow_match_control match;
 +
 +		flow_rule_match_control(rule, &match);
 +		addr_type = match.key->addr_type;
 +	} else {
 +		return -EOPNOTSUPP;
 +	}
 +
 +	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 +		struct flow_match_basic match;
 +
 +		flow_rule_match_basic(rule, &match);
 +		l4proto = match.key->ip_proto;
 +	} else {
 +		return -EOPNOTSUPP;
 +	}
 +
 +	flow_action_for_each(i, act, &rule->action) {
 +		switch (act->id) {
 +		case FLOW_ACTION_MANGLE:
 +			if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
 +				mtk_flow_offload_mangle_eth(act, &data.eth);
 +			break;
 +		case FLOW_ACTION_REDIRECT:
 +			odev = act->dev;
 +			break;
 +		case FLOW_ACTION_CSUM:
 +			break;
 +		case FLOW_ACTION_VLAN_PUSH:
 +			if (data.vlan.num == 1 ||
 +			    act->vlan.proto != htons(ETH_P_8021Q))
 +				return -EOPNOTSUPP;
 +
 +			data.vlan.id = act->vlan.vid;
 +			data.vlan.proto = act->vlan.proto;
 +			data.vlan.num++;
 +			break;
 +		case FLOW_ACTION_VLAN_POP:
 +			break;
 +		case FLOW_ACTION_PPPOE_PUSH:
 +			if (data.pppoe.num == 1)
 +				return -EOPNOTSUPP;
 +
 +			data.pppoe.sid = act->pppoe.sid;
 +			data.pppoe.num++;
 +			break;
 +		default:
 +			return -EOPNOTSUPP;
 +		}
 +	}
 +
 +	switch (addr_type) {
 +	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 +		offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
 +		break;
 +	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 +		offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +
 +	if (!is_valid_ether_addr(data.eth.h_source) ||
 +	    !is_valid_ether_addr(data.eth.h_dest))
 +		return -EINVAL;
 +
 +	err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
 +				    data.eth.h_source,
 +				    data.eth.h_dest);
 +	if (err)
 +		return err;
 +
 +	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 +		struct flow_match_ports ports;
 +
 +		flow_rule_match_ports(rule, &ports);
 +		data.src_port = ports.key->src;
 +		data.dst_port = ports.key->dst;
 +	} else {
 +		return -EOPNOTSUPP;
 +	}
 +
 +	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 +		struct flow_match_ipv4_addrs addrs;
 +
 +		flow_rule_match_ipv4_addrs(rule, &addrs);
 +
 +		data.v4.src_addr = addrs.key->src;
 +		data.v4.dst_addr = addrs.key->dst;
 +
 +		mtk_flow_set_ipv4_addr(&foe, &data, false);
 +	}
 +
 +	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 +		struct flow_match_ipv6_addrs addrs;
 +
 +		flow_rule_match_ipv6_addrs(rule, &addrs);
 +
 +		data.v6.src_addr = addrs.key->src;
 +		data.v6.dst_addr = addrs.key->dst;
 +
 +		mtk_flow_set_ipv6_addr(&foe, &data);
 +	}
 +
 +	flow_action_for_each(i, act, &rule->action) {
 +		if (act->id != FLOW_ACTION_MANGLE)
 +			continue;
 +
 +		switch (act->mangle.htype) {
 +		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 +		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 +			err = mtk_flow_mangle_ports(act, &data);
 +			break;
 +		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 +			err = mtk_flow_mangle_ipv4(act, &data);
 +			break;
 +		case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
 +			/* handled earlier */
 +			break;
 +		default:
 +			return -EOPNOTSUPP;
 +		}
 +
 +		if (err)
 +			return err;
 +	}
 +
 +	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 +		err = mtk_flow_set_ipv4_addr(&foe, &data, true);
 +		if (err)
 +			return err;
 +	}
 +
 +	if (data.vlan.num == 1) {
 +		if (data.vlan.proto != htons(ETH_P_8021Q))
 +			return -EOPNOTSUPP;
 +
 +		mtk_foe_entry_set_vlan(&foe, data.vlan.id);
 +	}
 +	if (data.pppoe.num == 1)
 +		mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
 +
 +	err = mtk_flow_set_output_device(eth, &foe, odev);
 +	if (err)
 +		return err;
 +
 +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 +	if (!entry)
 +		return -ENOMEM;
 +
 +	entry->cookie = f->cookie;
 +	timestamp = mtk_eth_timestamp(eth);
 +	hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
 +	if (hash < 0) {
 +		err = hash;
 +		goto free;
 +	}
 +
 +	entry->hash = hash;
 +	err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
 +				     mtk_flow_ht_params);
 +	if (err < 0)
 +		goto clear_flow;
 +
 +	return 0;
 +clear_flow:
 +	mtk_foe_entry_clear(&eth->ppe, hash);
 +free:
 +	kfree(entry);
 +	return err;
 +}
 +
 +static int
 +mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
 +{
 +	struct mtk_flow_entry *entry;
 +
 +	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
 +				  mtk_flow_ht_params);
 +	if (!entry)
 +		return -ENOENT;
 +
 +	mtk_foe_entry_clear(&eth->ppe, entry->hash);
 +	rhashtable_remove_fast(&eth->flow_table, &entry->node,
 +			       mtk_flow_ht_params);
 +	kfree(entry);
 +
 +	return 0;
 +}
 +
 +static int
 +mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
 +{
 +	struct mtk_flow_entry *entry;
 +	int timestamp;
 +	u32 idle;
 +
 +	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
 +				  mtk_flow_ht_params);
 +	if (!entry)
 +		return -ENOENT;
 +
 +	timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
 +	if (timestamp < 0)
 +		return -ETIMEDOUT;
 +
 +	idle = mtk_eth_timestamp(eth) - timestamp;
 +	f->stats.lastused = jiffies - idle * HZ;
 +
 +	return 0;
 +}
 +
 +static DEFINE_MUTEX(mtk_flow_offload_mutex);
 +
 +static int
 +mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
 +{
 +	struct flow_cls_offload *cls = type_data;
 +	struct net_device *dev = cb_priv;
 +	struct mtk_mac *mac = netdev_priv(dev);
 +	struct mtk_eth *eth = mac->hw;
 +	int err;
 +
 +	if (!tc_can_offload(dev))
 +		return -EOPNOTSUPP;
 +
 +	if (type != TC_SETUP_CLSFLOWER)
 +		return -EOPNOTSUPP;
 +
 +	mutex_lock(&mtk_flow_offload_mutex);
 +	switch (cls->command) {
 +	case FLOW_CLS_REPLACE:
 +		err = mtk_flow_offload_replace(eth, cls);
 +		break;
 +	case FLOW_CLS_DESTROY:
 +		err = mtk_flow_offload_destroy(eth, cls);
 +		break;
 +	case FLOW_CLS_STATS:
 +		err = mtk_flow_offload_stats(eth, cls);
 +		break;
 +	default:
 +		err = -EOPNOTSUPP;
 +		break;
 +	}
 +	mutex_unlock(&mtk_flow_offload_mutex);
 +
 +	return err;
 +}
 +
 +static int
 +mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
 +{
 +	struct mtk_mac *mac = netdev_priv(dev);
 +	struct mtk_eth *eth = mac->hw;
 +	static LIST_HEAD(block_cb_list);
 +	struct flow_block_cb *block_cb;
 +	flow_setup_cb_t *cb;
 +
 +	if (!eth->ppe.foe_table)
 +		return -EOPNOTSUPP;
 +
 +	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 +		return -EOPNOTSUPP;
 +
 +	cb = mtk_eth_setup_tc_block_cb;
 +	f->driver_block_list = &block_cb_list;
 +
 +	switch (f->command) {
 +	case FLOW_BLOCK_BIND:
 +		block_cb = flow_block_cb_lookup(f->block, cb, dev);
 +		if (block_cb) {
 +			flow_block_cb_incref(block_cb);
 +			return 0;
 +		}
 +		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
 +		if (IS_ERR(block_cb))
 +			return PTR_ERR(block_cb);
 +
 +		flow_block_cb_add(block_cb, f);
 +		list_add_tail(&block_cb->driver_list, &block_cb_list);
 +		return 0;
 +	case FLOW_BLOCK_UNBIND:
 +		block_cb = flow_block_cb_lookup(f->block, cb, dev);
 +		if (!block_cb)
 +			return -ENOENT;
 +
 +		if (flow_block_cb_decref(block_cb)) {
 +			flow_block_cb_remove(block_cb, f);
 +			list_del(&block_cb->driver_list);
 +		}
 +		return 0;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +}
 +
 +int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
 +		     void *type_data)
 +{
 +	if (type == TC_SETUP_FT)
 +		return mtk_eth_setup_tc_block(dev, type_data);
 +
 +	return -EOPNOTSUPP;
 +}
 +
 +int mtk_eth_offload_init(struct mtk_eth *eth)
 +{
 +	if (!eth->ppe.foe_table)
 +		return 0;
 +
 +	return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
 +}
 diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
 new file mode 100644
 index 000000000..0c45ea090
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
 @@ -0,0 +1,144 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
 +
 +#ifndef __MTK_PPE_REGS_H
 +#define __MTK_PPE_REGS_H
 +
 +#define MTK_PPE_GLO_CFG				0x200
 +#define MTK_PPE_GLO_CFG_EN			BIT(0)
 +#define MTK_PPE_GLO_CFG_TSID_EN			BIT(1)
 +#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP		BIT(2)
 +#define MTK_PPE_GLO_CFG_IP4_CS_DROP		BIT(3)
 +#define MTK_PPE_GLO_CFG_TTL0_DROP		BIT(4)
 +#define MTK_PPE_GLO_CFG_PPE_BSWAP		BIT(5)
 +#define MTK_PPE_GLO_CFG_PSE_HASH_OFS		BIT(6)
 +#define MTK_PPE_GLO_CFG_MCAST_TB_EN		BIT(7)
 +#define MTK_PPE_GLO_CFG_FLOW_DROP_KA		BIT(8)
 +#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE	BIT(9)
 +#define MTK_PPE_GLO_CFG_UDP_LITE_EN		BIT(10)
 +#define MTK_PPE_GLO_CFG_UDP_LEN_DROP		BIT(11)
 +#define MTK_PPE_GLO_CFG_MCAST_ENTRIES		GNEMASK(13, 12)
 +#define MTK_PPE_GLO_CFG_BUSY			BIT(31)
 +
 +#define MTK_PPE_FLOW_CFG			0x204
 +#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG		BIT(6)
 +#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG		BIT(7)
 +#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE		BIT(8)
 +#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE		BIT(9)
 +#define MTK_PPE_FLOW_CFG_IP6_6RD		BIT(10)
 +#define MTK_PPE_FLOW_CFG_IP4_NAT		BIT(12)
 +#define MTK_PPE_FLOW_CFG_IP4_NAPT		BIT(13)
 +#define MTK_PPE_FLOW_CFG_IP4_DSLITE		BIT(14)
 +#define MTK_PPE_FLOW_CFG_L2_BRIDGE		BIT(15)
 +#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST	BIT(16)
 +#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG		BIT(17)
 +#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL	BIT(18)
 +#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY	BIT(19)
 +#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY	BIT(20)
 +
 +#define MTK_PPE_IP_PROTO_CHK			0x208
 +#define MTK_PPE_IP_PROTO_CHK_IPV4		GENMASK(15, 0)
 +#define MTK_PPE_IP_PROTO_CHK_IPV6		GENMASK(31, 16)
 +
 +#define MTK_PPE_TB_CFG				0x21c
 +#define MTK_PPE_TB_CFG_ENTRY_NUM		GENMASK(2, 0)
 +#define MTK_PPE_TB_CFG_ENTRY_80B		BIT(3)
 +#define MTK_PPE_TB_CFG_SEARCH_MISS		GENMASK(5, 4)
 +#define MTK_PPE_TB_CFG_AGE_PREBIND		BIT(6)
 +#define MTK_PPE_TB_CFG_AGE_NON_L4		BIT(7)
 +#define MTK_PPE_TB_CFG_AGE_UNBIND		BIT(8)
 +#define MTK_PPE_TB_CFG_AGE_TCP			BIT(9)
 +#define MTK_PPE_TB_CFG_AGE_UDP			BIT(10)
 +#define MTK_PPE_TB_CFG_AGE_TCP_FIN		BIT(11)
 +#define MTK_PPE_TB_CFG_KEEPALIVE		GENMASK(13, 12)
 +#define MTK_PPE_TB_CFG_HASH_MODE		GENMASK(15, 14)
 +#define MTK_PPE_TB_CFG_SCAN_MODE		GENMASK(17, 16)
 +#define MTK_PPE_TB_CFG_HASH_DEBUG		GENMASK(19, 18)
 +
 +enum {
 +	MTK_PPE_SCAN_MODE_DISABLED,
 +	MTK_PPE_SCAN_MODE_CHECK_AGE,
 +	MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
 +};
 +
 +enum {
 +	MTK_PPE_KEEPALIVE_DISABLE,
 +	MTK_PPE_KEEPALIVE_UNICAST_CPU,
 +	MTK_PPE_KEEPALIVE_DUP_CPU = 3,
 +};
 +
 +enum {
 +	MTK_PPE_SEARCH_MISS_ACTION_DROP,
 +	MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
 +	MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
 +};
 +
 +#define MTK_PPE_TB_BASE				0x220
 +
 +#define MTK_PPE_TB_USED				0x224
 +#define MTK_PPE_TB_USED_NUM			GENMASK(13, 0)
 +
 +#define MTK_PPE_BIND_RATE			0x228
 +#define MTK_PPE_BIND_RATE_BIND			GENMASK(15, 0)
 +#define MTK_PPE_BIND_RATE_PREBIND		GENMASK(31, 16)
 +
 +#define MTK_PPE_BIND_LIMIT0			0x22c
 +#define MTK_PPE_BIND_LIMIT0_QUARTER		GENMASK(13, 0)
 +#define MTK_PPE_BIND_LIMIT0_HALF		GENMASK(29, 16)
 +
 +#define MTK_PPE_BIND_LIMIT1			0x230
 +#define MTK_PPE_BIND_LIMIT1_FULL		GENMASK(13, 0)
 +#define MTK_PPE_BIND_LIMIT1_NON_L4		GENMASK(23, 16)
 +
 +#define MTK_PPE_KEEPALIVE			0x234
 +#define MTK_PPE_KEEPALIVE_TIME			GENMASK(15, 0)
 +#define MTK_PPE_KEEPALIVE_TIME_TCP		GENMASK(23, 16)
 +#define MTK_PPE_KEEPALIVE_TIME_UDP		GENMASK(31, 24)
 +
 +#define MTK_PPE_UNBIND_AGE			0x238
 +#define MTK_PPE_UNBIND_AGE_MIN_PACKETS		GENMASK(31, 16)
 +#define MTK_PPE_UNBIND_AGE_DELTA		GENMASK(7, 0)
 +
 +#define MTK_PPE_BIND_AGE0			0x23c
 +#define MTK_PPE_BIND_AGE0_DELTA_NON_L4		GENMASK(30, 16)
 +#define MTK_PPE_BIND_AGE0_DELTA_UDP		GENMASK(14, 0)
 +
 +#define MTK_PPE_BIND_AGE1			0x240
 +#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN		GENMASK(30, 16)
 +#define MTK_PPE_BIND_AGE1_DELTA_TCP		GENMASK(14, 0)
 +
 +#define MTK_PPE_HASH_SEED			0x244
 +
 +#define MTK_PPE_DEFAULT_CPU_PORT		0x248
 +#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n)	(GENMASK(2, 0) << ((_n) * 4))
 +
 +#define MTK_PPE_MTU_DROP			0x308
 +
 +#define MTK_PPE_VLAN_MTU0			0x30c
 +#define MTK_PPE_VLAN_MTU0_NONE			GENMASK(13, 0)
 +#define MTK_PPE_VLAN_MTU0_1TAG			GENMASK(29, 16)
 +
 +#define MTK_PPE_VLAN_MTU1			0x310
 +#define MTK_PPE_VLAN_MTU1_2TAG			GENMASK(13, 0)
 +#define MTK_PPE_VLAN_MTU1_3TAG			GENMASK(29, 16)
 +
 +#define MTK_PPE_VPM_TPID			0x318
 +
 +#define MTK_PPE_CACHE_CTL			0x320
 +#define MTK_PPE_CACHE_CTL_EN			BIT(0)
 +#define MTK_PPE_CACHE_CTL_LOCK_CLR		BIT(4)
 +#define MTK_PPE_CACHE_CTL_REQ			BIT(8)
 +#define MTK_PPE_CACHE_CTL_CLEAR			BIT(9)
 +#define MTK_PPE_CACHE_CTL_CMD			GENMASK(13, 12)
 +
 +#define MTK_PPE_MIB_CFG				0x334
 +#define MTK_PPE_MIB_CFG_EN			BIT(0)
 +#define MTK_PPE_MIB_CFG_RD_CLR			BIT(1)
 +
 +#define MTK_PPE_MIB_TB_BASE			0x338
 +
 +#define MTK_PPE_MIB_CACHE_CTL			0x350
 +#define MTK_PPE_MIB_CACHE_CTL_EN		BIT(0)
 +#define MTK_PPE_MIB_CACHE_CTL_FLUSH		BIT(2)
 +
 +#endif
 diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
 index a085213dc..813e30495 100644
 --- a/drivers/net/ppp/ppp_generic.c
 +++ b/drivers/net/ppp/ppp_generic.c
 @@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
  		ppp_destroy_interface(ppp);
  }

 +static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
 +				 struct net_device_path *path)
 +{
 +	struct ppp *ppp = netdev_priv(ctx->dev);
 +	struct ppp_channel *chan;
 +	struct channel *pch;
 +
 +	if (ppp->flags & SC_MULTILINK)
 +		return -EOPNOTSUPP;
 +
 +	if (list_empty(&ppp->channels))
 +		return -ENODEV;
 +
 +	pch = list_first_entry(&ppp->channels, struct channel, clist);
 +	chan = pch->chan;
 +	if (!chan->ops->fill_forward_path)
 +		return -EOPNOTSUPP;
 +
 +	return chan->ops->fill_forward_path(ctx, path, chan);
 +}
 +
  static const struct net_device_ops ppp_netdev_ops = {
  	.ndo_init	 = ppp_dev_init,
  	.ndo_uninit      = ppp_dev_uninit,
  	.ndo_start_xmit  = ppp_start_xmit,
  	.ndo_do_ioctl    = ppp_net_ioctl,
  	.ndo_get_stats64 = ppp_get_stats64,
 +	.ndo_fill_forward_path = ppp_fill_forward_path,
  };

  static struct device_type ppp_type = {
 diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
 index 087b01684..7a8c246ab 100644
 --- a/drivers/net/ppp/pppoe.c
 +++ b/drivers/net/ppp/pppoe.c
 @@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
  	return __pppoe_xmit(sk, skb);
  }

 +static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
 +				   struct net_device_path *path,
 +				   const struct ppp_channel *chan)
 +{
 +	struct sock *sk = (struct sock *)chan->private;
 +	struct pppox_sock *po = pppox_sk(sk);
 +	struct net_device *dev = po->pppoe_dev;
 +
 +	if (sock_flag(sk, SOCK_DEAD) ||
 +	    !(sk->sk_state & PPPOX_CONNECTED) || !dev)
 +		return -1;
 +
 +	path->type = DEV_PATH_PPPOE;
 +	path->encap.proto = htons(ETH_P_PPP_SES);
 +	path->encap.id = be16_to_cpu(po->num);
 +	memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
 +	memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
 +	path->dev = ctx->dev;
 +	ctx->dev = dev;
 +
 +	return 0;
 +}
 +
  static const struct ppp_channel_ops pppoe_chan_ops = {
  	.start_xmit = pppoe_xmit,
 +	.fill_forward_path = pppoe_fill_forward_path,
  };

  static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
 index 38af42bf8..9f64504ac 100644
 --- a/include/linux/netdevice.h
 +++ b/include/linux/netdevice.h
 @@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  				       struct sk_buff *skb,
  				       struct net_device *sb_dev);

 +enum net_device_path_type {
 +	DEV_PATH_ETHERNET = 0,
 +	DEV_PATH_VLAN,
 +	DEV_PATH_BRIDGE,
 +	DEV_PATH_PPPOE,
 +	DEV_PATH_DSA,
 +};
 +
 +struct net_device_path {
 +	enum net_device_path_type	type;
 +	const struct net_device		*dev;
 +	union {
 +		struct {
 +			u16		id;
 +			__be16		proto;
 +			u8		h_dest[ETH_ALEN];
 +		} encap;
 +		struct {
 +			enum {
 +				DEV_PATH_BR_VLAN_KEEP,
 +				DEV_PATH_BR_VLAN_TAG,
 +				DEV_PATH_BR_VLAN_UNTAG,
 +				DEV_PATH_BR_VLAN_UNTAG_HW,
 +			}		vlan_mode;
 +			u16		vlan_id;
 +			__be16		vlan_proto;
 +		} bridge;
 +		struct {
 +			int port;
 +			u16 proto;
 +		} dsa;
 +	};
 +};
 +
 +#define NET_DEVICE_PATH_STACK_MAX	5
 +#define NET_DEVICE_PATH_VLAN_MAX	2
 +
 +struct net_device_path_stack {
 +	int			num_paths;
 +	struct net_device_path	path[NET_DEVICE_PATH_STACK_MAX];
 +};
 +
 +struct net_device_path_ctx {
 +	const struct net_device *dev;
 +	u8			daddr[ETH_ALEN];
 +
 +	int			num_vlans;
 +	struct {
 +		u16		id;
 +		__be16		proto;
 +	} vlan[NET_DEVICE_PATH_VLAN_MAX];
 +};
 +
  enum tc_setup_type {
  	TC_SETUP_QDISC_MQPRIO,
  	TC_SETUP_CLSU32,
 @@ -844,6 +897,7 @@ enum tc_setup_type {
  	TC_SETUP_ROOT_QDISC,
  	TC_SETUP_QDISC_GRED,
  	TC_SETUP_QDISC_TAPRIO,
 +	TC_SETUP_FT,
  };

  /* These structures hold the attributes of bpf state that are being passed
 @@ -1239,6 +1293,8 @@ struct tlsdev_ops;
   *	Get devlink port instance associated with a given netdev.
   *	Called with a reference on the netdevice and devlink locks only,
   *	rtnl_lock is not held.
 + * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
 + *     Get the forwarding path to reach the real device from the HW destination address
   */
  struct net_device_ops {
  	int			(*ndo_init)(struct net_device *dev);
 @@ -1436,6 +1492,8 @@ struct net_device_ops {
  	int			(*ndo_xsk_wakeup)(struct net_device *dev,
  						  u32 queue_id, u32 flags);
  	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
 +	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
 +                                                         struct net_device_path *path);
  };

  /**
 @@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);

  int dev_get_iflink(const struct net_device *dev);
  int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
 +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
 +			  struct net_device_path_stack *stack);
  struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
  				      unsigned short mask);
  struct net_device *dev_get_by_name(struct net *net, const char *name);
 diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
 index 98966064e..91f9a9283 100644
 --- a/include/linux/ppp_channel.h
 +++ b/include/linux/ppp_channel.h
 @@ -28,6 +28,9 @@ struct ppp_channel_ops {
  	int	(*start_xmit)(struct ppp_channel *, struct sk_buff *);
  	/* Handle an ioctl call that has come in via /dev/ppp. */
  	int	(*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
 +	int	(*fill_forward_path)(struct net_device_path_ctx *,
 +				     struct net_device_path *,
 +				     const struct ppp_channel *);
  };

  struct ppp_channel {
 diff --git a/include/net/dsa.h b/include/net/dsa.h
 index 05f66d487..cafc74218 100644
 --- a/include/net/dsa.h
 +++ b/include/net/dsa.h
 @@ -561,6 +561,8 @@ struct dsa_switch_ops {
  					  struct sk_buff *skb);
  };

 +struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
 +
  struct dsa_switch_driver {
  	struct list_head	list;
  	const struct dsa_switch_ops *ops;
 @@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
  #define BRCM_TAG_GET_PORT(v)		((v) >> 8)
  #define BRCM_TAG_GET_QUEUE(v)		((v) & 0xff)

 +#if IS_ENABLED(CONFIG_NET_DSA)
 +bool dsa_slave_dev_check(const struct net_device *dev);
 +#else
 +static inline bool dsa_slave_dev_check(const struct net_device *dev)
 +{
 +	return false;
 +}
 +#endif

  netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
  int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
 diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
 index c6f7bd22d..59b873653 100644
 --- a/include/net/flow_offload.h
 +++ b/include/net/flow_offload.h
 @@ -138,6 +138,7 @@ enum flow_action_id {
  	FLOW_ACTION_MPLS_PUSH,
  	FLOW_ACTION_MPLS_POP,
  	FLOW_ACTION_MPLS_MANGLE,
 +	FLOW_ACTION_PPPOE_PUSH,
  	NUM_FLOW_ACTIONS,
  };

 @@ -213,6 +214,9 @@ struct flow_action_entry {
  			u8		bos;
  			u8		ttl;
  		} mpls_mangle;
 +		struct {				/* FLOW_ACTION_PPPOE_PUSH */
 +			u16		sid;
 +		} pppoe;
  	};
  };

 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
 index 2c739fc75..89ab8f180 100644
 --- a/include/net/ip6_route.h
 +++ b/include/net/ip6_route.h
 @@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
  	       !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
  }

 -static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 +static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
 +						     bool forwarding)
  {
  	struct inet6_dev *idev;
  	unsigned int mtu;

 -	if (dst_metric_locked(dst, RTAX_MTU)) {
 +	if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
  		mtu = dst_metric_raw(dst, RTAX_MTU);
  		if (mtu)
  			goto out;
 diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
 index 7b3c873f8..e95483192 100644
 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
 +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
 @@ -4,7 +4,4 @@

  extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;

 -#include <linux/sysctl.h>
 -extern struct ctl_table nf_ct_ipv6_sysctl_table[];
 -
  #endif /* _NF_CONNTRACK_IPV6_H*/
 diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
 index 90690e37a..ce0bc3e62 100644
 --- a/include/net/netfilter/nf_conntrack.h
 +++ b/include/net/netfilter/nf_conntrack.h
 @@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
  	       !nf_ct_is_dying(ct);
  }

 +#define	NF_CT_DAY	(86400 * HZ)
 +
 +/* Set an arbitrary timeout large enough not to ever expire, this save
 + * us a check for the IPS_OFFLOAD_BIT from the packet path via
 + * nf_ct_is_expired().
 + */
 +static inline void nf_ct_offload_timeout(struct nf_conn *ct)
 +{
 +	if (nf_ct_expires(ct) < NF_CT_DAY / 2)
 +		WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
 +}
 +
  struct kernel_param;

  int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
 diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
 index f7a060c6e..7f44a7715 100644
 --- a/include/net/netfilter/nf_conntrack_acct.h
 +++ b/include/net/netfilter/nf_conntrack_acct.h
 @@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
  #endif
  }

 +void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
 +		    unsigned int bytes);
 +
 +static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
 +				     unsigned int bytes)
 +{
 +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 +	nf_ct_acct_add(ct, dir, 1, bytes);
 +#endif
 +}
 +
  void nf_conntrack_acct_pernet_init(struct net *net);

  int nf_conntrack_acct_init(void);
 diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
 index 68d7fc92..7cf89767 100644
 --- a/include/net/netfilter/nf_flow_table.h
 +++ b/include/net/netfilter/nf_flow_table.h
 @@ -8,31 +8,99 @@
  #include <linux/rcupdate.h>
  #include <linux/netfilter.h>
  #include <linux/netfilter/nf_conntrack_tuple_common.h>
 +#include <net/flow_offload.h>
  #include <net/dst.h>
 +#include <linux/if_pppox.h>
 +#include <linux/ppp_defs.h>

  struct nf_flowtable;
 +struct nf_flow_rule;
 +struct flow_offload;
 +enum flow_offload_tuple_dir;
 +
 +struct nf_flow_key {
 +	struct flow_dissector_key_meta			meta;
 +	struct flow_dissector_key_control		control;
 +	struct flow_dissector_key_control		enc_control;
 +	struct flow_dissector_key_basic			basic;
 +	struct flow_dissector_key_vlan			vlan;
 +	struct flow_dissector_key_vlan			cvlan;
 +	union {
 +		struct flow_dissector_key_ipv4_addrs	ipv4;
 +		struct flow_dissector_key_ipv6_addrs	ipv6;
 +	};
 +	struct flow_dissector_key_keyid			enc_key_id;
 +	union {
 +		struct flow_dissector_key_ipv4_addrs	enc_ipv4;
 +		struct flow_dissector_key_ipv6_addrs	enc_ipv6;
 +	};
 +	struct flow_dissector_key_tcp			tcp;
 +	struct flow_dissector_key_ports			tp;
 +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 +
 +struct nf_flow_match {
 +	struct flow_dissector	dissector;
 +	struct nf_flow_key	key;
 +	struct nf_flow_key	mask;
 +};
 +
 +struct nf_flow_rule {
 +	struct nf_flow_match	match;
 +	struct flow_rule	*rule;
 +};

  struct nf_flowtable_type {
  	struct list_head		list;
  	int				family;
  	int				(*init)(struct nf_flowtable *ft);
 +	int				(*setup)(struct nf_flowtable *ft,
 +						 struct net_device *dev,
 +						 enum flow_block_command cmd);
 +	int				(*action)(struct net *net,
 +						  const struct flow_offload *flow,
 +						  enum flow_offload_tuple_dir dir,
 +						  struct nf_flow_rule *flow_rule);
  	void				(*free)(struct nf_flowtable *ft);
  	nf_hookfn			*hook;
  	struct module			*owner;
  };

 +enum nf_flowtable_flags {
 +	NF_FLOWTABLE_HW_OFFLOAD		= 0x1,	/* NFT_FLOWTABLE_HW_OFFLOAD */
 +	NF_FLOWTABLE_COUNTER		= 0x2,	/* NFT_FLOWTABLE_COUNTER */
 +};
 +
  struct nf_flowtable {
  	struct list_head		list;
  	struct rhashtable		rhashtable;
 +	int				priority;
  	const struct nf_flowtable_type	*type;
  	struct delayed_work		gc_work;
 +	unsigned int			flags;
 +	struct flow_block		flow_block;
 +	struct rw_semaphore		flow_block_lock; /* Guards flow_block */
 +	possible_net_t			net;
  };

 +static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
 +{
 +	return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
 +}
 +
  enum flow_offload_tuple_dir {
  	FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
  	FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
 -	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
  };
 +#define FLOW_OFFLOAD_DIR_MAX	IP_CT_DIR_MAX
 +
 +enum flow_offload_xmit_type {
 +	FLOW_OFFLOAD_XMIT_UNSPEC	= 0,
 +	FLOW_OFFLOAD_XMIT_NEIGH,
 +	FLOW_OFFLOAD_XMIT_XFRM,
 +	FLOW_OFFLOAD_XMIT_DIRECT,
 +};
 +
 +#define NF_FLOW_TABLE_ENCAP_MAX		2

  struct flow_offload_tuple {
  	union {
 @@ -52,13 +120,30 @@ struct flow_offload_tuple {

  	u8				l3proto;
  	u8				l4proto;
 -	u8				dir;
 +	struct {
 +		u16			id;
 +		__be16			proto;
 +	} encap[NF_FLOW_TABLE_ENCAP_MAX];

 -	u16				mtu;
 +	/* All members above are keys for lookups, see flow_offload_hash(). */
 +	struct { }			__hash;

 -	struct {
 -		struct dst_entry *dst_cache;
 -		u32		dst_cookie;
 +	u8				dir:2,
 +					xmit_type:2,
 +					encap_num:2,
 +					in_vlan_ingress:2;
 +	u16				mtu;
 +	union {
 +		struct {
 +			struct dst_entry *dst_cache;
 +			u32		dst_cookie;
 +		};
 +		struct {
 +			u32		ifidx;
 +			u32		hw_ifidx;
 +			u8		h_source[ETH_ALEN];
 +			u8		h_dest[ETH_ALEN];
 +		} out;
  	};
  };

 @@ -67,52 +152,139 @@ struct flow_offload_tuple_rhash {
  	struct flow_offload_tuple	tuple;
  };

 -#define FLOW_OFFLOAD_SNAT	0x1
 -#define FLOW_OFFLOAD_DNAT	0x2
 -#define FLOW_OFFLOAD_DYING	0x4
 -#define FLOW_OFFLOAD_TEARDOWN	0x8
 +enum nf_flow_flags {
 +	NF_FLOW_SNAT,
 +	NF_FLOW_DNAT,
 +	NF_FLOW_TEARDOWN,
 +	NF_FLOW_HW,
 +	NF_FLOW_HW_DYING,
 +	NF_FLOW_HW_DEAD,
 +	NF_FLOW_HW_PENDING,
 +};
 +
 +enum flow_offload_type {
 +	NF_FLOW_OFFLOAD_UNSPEC	= 0,
 +	NF_FLOW_OFFLOAD_ROUTE,
 +};

  struct flow_offload {
  	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX];
 -	u32					flags;
 -	union {
 -		/* Your private driver data here. */
 -		u32		timeout;
 -	};
 +	struct nf_conn				*ct;
 +	unsigned long				flags;
 +	u16					type;
 +	u32					timeout;
 +	struct rcu_head				rcu_head;
  };

  #define NF_FLOW_TIMEOUT (30 * HZ)
 +#define nf_flowtable_time_stamp	(u32)jiffies
 +
 +unsigned long flow_offload_get_timeout(struct flow_offload *flow);
 +
 +static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 +{
 +	return (__s32)(timeout - nf_flowtable_time_stamp);
 +}

  struct nf_flow_route {
  	struct {
 -		struct dst_entry	*dst;
 +		struct dst_entry		*dst;
 +		struct {
 +			u32			ifindex;
 +			struct {
 +				u16		id;
 +				__be16		proto;
 +			} encap[NF_FLOW_TABLE_ENCAP_MAX];
 +			u8			num_encaps:2,
 +						ingress_vlans:2;
 +		} in;
 +		struct {
 +			u32			ifindex;
 +			u32			hw_ifindex;
 +			u8			h_source[ETH_ALEN];
 +			u8			h_dest[ETH_ALEN];
 +		} out;
 +		enum flow_offload_xmit_type	xmit_type;
  	} tuple[FLOW_OFFLOAD_DIR_MAX];
  };

 -struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
 -					struct nf_flow_route *route);
 +struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
  void flow_offload_free(struct flow_offload *flow);

 +static inline int
 +nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
 +			     flow_setup_cb_t *cb, void *cb_priv)
 +{
 +	struct flow_block *block = &flow_table->flow_block;
 +	struct flow_block_cb *block_cb;
 +	int err = 0;
 +
 +	down_write(&flow_table->flow_block_lock);
 +	block_cb = flow_block_cb_lookup(block, cb, cb_priv);
 +	if (block_cb) {
 +		err = -EEXIST;
 +		goto unlock;
 +	}
 +
 +	block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
 +	if (IS_ERR(block_cb)) {
 +		err = PTR_ERR(block_cb);
 +		goto unlock;
 +	}
 +
 +	list_add_tail(&block_cb->list, &block->cb_list);
 +
 +unlock:
 +	up_write(&flow_table->flow_block_lock);
 +	return err;
 +}
 +
 +static inline void
 +nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
 +			     flow_setup_cb_t *cb, void *cb_priv)
 +{
 +	struct flow_block *block = &flow_table->flow_block;
 +	struct flow_block_cb *block_cb;
 +
 +	down_write(&flow_table->flow_block_lock);
 +	block_cb = flow_block_cb_lookup(block, cb, cb_priv);
 +	if (block_cb) {
 +		list_del(&block_cb->list);
 +		flow_block_cb_free(block_cb);
 +	} else {
 +		WARN_ON(true);
 +	}
 +	up_write(&flow_table->flow_block_lock);
 +}
 +
 +int flow_offload_route_init(struct flow_offload *flow,
 +			    const struct nf_flow_route *route);
 +
  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
 +void flow_offload_refresh(struct nf_flowtable *flow_table,
 +			  struct flow_offload *flow);
 +
  struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
  						     struct flow_offload_tuple *tuple);
 +void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
 +			      struct net_device *dev);
  void nf_flow_table_cleanup(struct net_device *dev);

  int nf_flow_table_init(struct nf_flowtable *flow_table);
  void nf_flow_table_free(struct nf_flowtable *flow_table);

  void flow_offload_teardown(struct flow_offload *flow);
 -static inline void flow_offload_dead(struct flow_offload *flow)
 -{
 -	flow->flags |= FLOW_OFFLOAD_DYING;
 -}

 -int nf_flow_snat_port(const struct flow_offload *flow,
 -		      struct sk_buff *skb, unsigned int thoff,
 -		      u8 protocol, enum flow_offload_tuple_dir dir);
 -int nf_flow_dnat_port(const struct flow_offload *flow,
 -		      struct sk_buff *skb, unsigned int thoff,
 -		      u8 protocol, enum flow_offload_tuple_dir dir);
 +int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 +			  void (*iter)(struct flow_offload *flow, void *data),
 +			  void *data);
 +
 +void nf_flow_snat_port(const struct flow_offload *flow,
 +		       struct sk_buff *skb, unsigned int thoff,
 +		       u8 protocol, enum flow_offload_tuple_dir dir);
 +void nf_flow_dnat_port(const struct flow_offload *flow,
 +		       struct sk_buff *skb, unsigned int thoff,
 +		       u8 protocol, enum flow_offload_tuple_dir dir);

  struct flow_ports {
  	__be16 source, dest;
 @@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
  #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
  	MODULE_ALIAS("nf-flowtable-" __stringify(family))

 +void nf_flow_offload_add(struct nf_flowtable *flowtable,
 +			 struct flow_offload *flow);
 +void nf_flow_offload_del(struct nf_flowtable *flowtable,
 +			 struct flow_offload *flow);
 +void nf_flow_offload_stats(struct nf_flowtable *flowtable,
 +			   struct flow_offload *flow);
 +
 +void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
 +int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
 +				struct net_device *dev,
 +				enum flow_block_command cmd);
 +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 +			    enum flow_offload_tuple_dir dir,
 +			    struct nf_flow_rule *flow_rule);
 +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 +			    enum flow_offload_tuple_dir dir,
 +			    struct nf_flow_rule *flow_rule);
 +
 +int nf_flow_table_offload_init(void);
 +void nf_flow_table_offload_exit(void);
 +
 +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
 +{
 +	__be16 proto;
 +
 +	proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
 +			     sizeof(struct pppoe_hdr)));
 +	switch (proto) {
 +	case htons(PPP_IP):
 +		return htons(ETH_P_IP);
 +	case htons(PPP_IPV6):
 +		return htons(ETH_P_IPV6);
 +	}
 +
 +	return 0;
 +}
 +
  #endif /* _NF_FLOW_TABLE_H */
 diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
 index 806454e76..9e3963c8f 100644
 --- a/include/net/netns/conntrack.h
 +++ b/include/net/netns/conntrack.h
 @@ -27,6 +27,9 @@ struct nf_tcp_net {
  	int tcp_loose;
  	int tcp_be_liberal;
  	int tcp_max_retrans;
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	unsigned int offload_timeout;
 +#endif
  };

  enum udp_conntrack {
 @@ -37,6 +40,9 @@ enum udp_conntrack {

  struct nf_udp_net {
  	unsigned int timeouts[UDP_CT_MAX];
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	unsigned int offload_timeout;
 +#endif
  };

  struct nf_icmp_net {
 diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
 index 336014bf8..ae698d11c 100644
 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h
 +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
 @@ -105,14 +105,19 @@ enum ip_conntrack_status {
  	IPS_OFFLOAD_BIT = 14,
  	IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),

 +	/* Conntrack has been offloaded to hardware. */
 +	IPS_HW_OFFLOAD_BIT = 15,
 +	IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
 +
  	/* Be careful here, modifying these bits can make things messy,
  	 * so don't let users modify them directly.
  	 */
  	IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
  				 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
 -				 IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
 +				 IPS_SEQ_ADJUST | IPS_TEMPLATE |
 +				 IPS_OFFLOAD | IPS_HW_OFFLOAD),

 -	__IPS_MAX_BIT = 15,
 +	__IPS_MAX_BIT = 16,
  };

  /* Connection tracking event types */
 diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
 new file mode 100644
 index 000000000..5841bbe0e
 --- /dev/null
 +++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
 @@ -0,0 +1,17 @@
 +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 +#ifndef _XT_FLOWOFFLOAD_H
 +#define _XT_FLOWOFFLOAD_H
 +
 +#include <linux/types.h>
 +
 +enum {
 +	XT_FLOWOFFLOAD_HW	= 1 << 0,
 +
 +	XT_FLOWOFFLOAD_MASK	= XT_FLOWOFFLOAD_HW
 +};
 +
 +struct xt_flowoffload_target_info {
 +	__u32 flags;
 +};
 +
 +#endif /* _XT_FLOWOFFLOAD_H */
 diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
 index 589615ec4..444ab5fae 100644
 --- a/net/8021q/vlan_dev.c
 +++ b/net/8021q/vlan_dev.c
 @@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
  	return real_dev->ifindex;
  }

 +static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
 +				      struct net_device_path *path)
 +{
 +	struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
 +
 +	path->type = DEV_PATH_VLAN;
 +	path->encap.id = vlan->vlan_id;
 +	path->encap.proto = vlan->vlan_proto;
 +	path->dev = ctx->dev;
 +	ctx->dev = vlan->real_dev;
 +	if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
 +		return -ENOSPC;
 +
 +	ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
 +	ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
 +	ctx->num_vlans++;
 +
 +	return 0;
 +}
 +
  static const struct ethtool_ops vlan_ethtool_ops = {
  	.get_link_ksettings	= vlan_ethtool_get_link_ksettings,
  	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
 @@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
  #endif
  	.ndo_fix_features	= vlan_dev_fix_features,
  	.ndo_get_iflink		= vlan_dev_get_iflink,
 +	.ndo_fill_forward_path	= vlan_dev_fill_forward_path,
  };

  static void vlan_dev_free(struct net_device *dev)
 diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
 index 501f77f0f..0940b44cd 100644
 --- a/net/bridge/br_device.c
 +++ b/net/bridge/br_device.c
 @@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
  	return br_del_if(br, slave_dev);
  }

 +static int br_fill_forward_path(struct net_device_path_ctx *ctx,
 +				struct net_device_path *path)
 +{
 +	struct net_bridge_fdb_entry *f;
 +	struct net_bridge_port *dst;
 +	struct net_bridge *br;
 +
 +	if (netif_is_bridge_port(ctx->dev))
 +		return -1;
 +
 +	br = netdev_priv(ctx->dev);
 +
 +	br_vlan_fill_forward_path_pvid(br, ctx, path);
 +
 +	f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
 +	if (!f || !f->dst)
 +		return -1;
 +
 +	dst = READ_ONCE(f->dst);
 +	if (!dst)
 +		return -1;
 +
 +	if (br_vlan_fill_forward_path_mode(br, dst, path))
 +		return -1;
 +
 +	path->type = DEV_PATH_BRIDGE;
 +	path->dev = dst->br->dev;
 +	ctx->dev = dst->dev;
 +
 +	switch (path->bridge.vlan_mode) {
 +	case DEV_PATH_BR_VLAN_TAG:
 +		if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
 +			return -ENOSPC;
 +		ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
 +		ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
 +		ctx->num_vlans++;
 +		break;
 +	case DEV_PATH_BR_VLAN_UNTAG_HW:
 +	case DEV_PATH_BR_VLAN_UNTAG:
 +		ctx->num_vlans--;
 +		break;
 +	case DEV_PATH_BR_VLAN_KEEP:
 +		break;
 +	}
 +
 +	return 0;
 +}
 +
  static const struct ethtool_ops br_ethtool_ops = {
  	.get_drvinfo    = br_getinfo,
  	.get_link	= ethtool_op_get_link,
 @@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
  	.ndo_bridge_setlink	 = br_setlink,
  	.ndo_bridge_dellink	 = br_dellink,
  	.ndo_features_check	 = passthru_features_check,
 +	.ndo_fill_forward_path	 = br_fill_forward_path,
  };

  static struct device_type br_type = {
 diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
 index a736be8a1..4bd9e9b57 100644
 --- a/net/bridge/br_private.h
 +++ b/net/bridge/br_private.h
 @@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
  int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
  			 void *ptr);

 +void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
 +				    struct net_device_path_ctx *ctx,
 +				    struct net_device_path *path);
 +int br_vlan_fill_forward_path_mode(struct net_bridge *br,
 +				   struct net_bridge_port *dst,
 +				   struct net_device_path *path);
 +
  static inline struct net_bridge_vlan_group *br_vlan_group(
  					const struct net_bridge *br)
  {
 @@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
  	return 0;
  }

 +static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
 +						  struct net_device_path_ctx *ctx,
 +						  struct net_device_path *path)
 +{
 +}
 +
 +static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
 +						 struct net_bridge_port *dst,
 +						 struct net_device_path *path)
 +{
 +	return 0;
 +}
 +
  static inline struct net_bridge_vlan_group *br_vlan_group(
  					const struct net_bridge *br)
  {
 diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
 index 9257292bd..bcfd16924 100644
 --- a/net/bridge/br_vlan.c
 +++ b/net/bridge/br_vlan.c
 @@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
  }
  EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);

 +void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
 +				    struct net_device_path_ctx *ctx,
 +				    struct net_device_path *path)
 +{
 +	struct net_bridge_vlan_group *vg;
 +	int idx = ctx->num_vlans - 1;
 +	u16 vid;
 +
 +	path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
 +
 +	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
 +		return;
 +
 +	vg = br_vlan_group(br);
 +
 +	if (idx >= 0 &&
 +	    ctx->vlan[idx].proto == br->vlan_proto) {
 +		vid = ctx->vlan[idx].id;
 +	} else {
 +		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
 +		vid = br_get_pvid(vg);
 +	}
 +
 +	path->bridge.vlan_id = vid;
 +	path->bridge.vlan_proto = br->vlan_proto;
 +}
 +
 +int br_vlan_fill_forward_path_mode(struct net_bridge *br,
 +				   struct net_bridge_port *dst,
 +				   struct net_device_path *path)
 +{
 +	struct net_bridge_vlan_group *vg;
 +	struct net_bridge_vlan *v;
 +
 +	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
 +		return 0;
 +
 +	vg = nbp_vlan_group_rcu(dst);
 +	v = br_vlan_find(vg, path->bridge.vlan_id);
 +	if (!v || !br_vlan_should_use(v))
 +		return -EINVAL;
 +
 +	if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
 +		return 0;
 +
 +	if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
 +		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
 +	else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
 +		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
 +	else
 +		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
 +
 +	return 0;
 +}
 +
  int br_vlan_get_info(const struct net_device *dev, u16 vid,
  		     struct bridge_vlan_info *p_vinfo)
  {
 diff --git a/net/core/dev.c b/net/core/dev.c
 index fe2c856b9..4f0edb218 100644
 --- a/net/core/dev.c
 +++ b/net/core/dev.c
 @@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
  }
  EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);

 +static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
 +{
 +	int k = stack->num_paths++;
 +
 +	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
 +		return NULL;
 +
 +	return &stack->path[k];
 +}
 +
 +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
 +			  struct net_device_path_stack *stack)
 +{
 +	const struct net_device *last_dev;
 +	struct net_device_path_ctx ctx = {
 +		.dev	= dev,
 +	};
 +	struct net_device_path *path;
 +	int ret = 0;
 +
 +	memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
 +	stack->num_paths = 0;
 +	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
 +		last_dev = ctx.dev;
 +		path = dev_fwd_path(stack);
 +		if (!path)
 +			return -1;
 +
 +		memset(path, 0, sizeof(struct net_device_path));
 +		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
 +		if (ret < 0)
 +			return -1;
 +
 +		if (WARN_ON_ONCE(last_dev == ctx.dev))
 +			return -1;
 +	}
 +	path = dev_fwd_path(stack);
 +	if (!path)
 +		return -1;
 +	path->type = DEV_PATH_ETHERNET;
 +	path->dev = ctx.dev;
 +
 +	return ret;
 +}
 +EXPORT_SYMBOL_GPL(dev_fill_forward_path);
 +
  /**
   *	__dev_get_by_name	- find a device by its name
   *	@net: the applicable net namespace
 diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
 index ca80f8699..35a1249a9 100644
 --- a/net/dsa/dsa.c
 +++ b/net/dsa/dsa.c
 @@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
  }
  EXPORT_SYMBOL_GPL(call_dsa_notifiers);

 +struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
 +{
 +	if (!netdev || !dsa_slave_dev_check(netdev))
 +		return ERR_PTR(-ENODEV);
 +
 +	return dsa_slave_to_port(netdev);
 +}
 +EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
 +
  static int __init dsa_init_module(void)
  {
  	int rc;
 diff --git a/net/dsa/slave.c b/net/dsa/slave.c
 index 036fda317..2dfaa1eac 100644
 --- a/net/dsa/slave.c
 +++ b/net/dsa/slave.c
 @@ -22,8 +22,6 @@

  #include "dsa_priv.h"

 -static bool dsa_slave_dev_check(const struct net_device *dev);
 -
  /* slave mii_bus handling ***************************************************/
  static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
  {
 @@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
  	}
  }

 +static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
 +				    void *type_data)
 +{
 +	struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
 +	struct net_device *master = cpu_dp->master;
 +
 +	if (!master->netdev_ops->ndo_setup_tc)
 +		return -EOPNOTSUPP;
 +
 +	return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
 +}
 +
  static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
  			      void *type_data)
  {
  	struct dsa_port *dp = dsa_slave_to_port(dev);
  	struct dsa_switch *ds = dp->ds;

 -	if (type == TC_SETUP_BLOCK)
 +	switch (type) {
 +	case TC_SETUP_BLOCK:
  		return dsa_slave_setup_tc_block(dev, type_data);
 +	case TC_SETUP_FT:
 +		return dsa_slave_setup_ft_block(ds, dp->index, type_data);
 +	default:
 +		break;
 +	}

  	if (!ds->ops->port_setup_tc)
  		return -EOPNOTSUPP;
 @@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
  	return dp->ds->devlink ? &dp->devlink_port : NULL;
  }

 +static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
 +				       struct net_device_path *path)
 +{
 +	struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
 +	struct dsa_port *cpu_dp = dp->cpu_dp;
 +
 +	path->dev = ctx->dev;
 +	path->type = DEV_PATH_DSA;
 +	path->dsa.proto = cpu_dp->tag_ops->proto;
 +	path->dsa.port = dp->index;
 +	ctx->dev = cpu_dp->master;
 +
 +	return 0;
 +}
 +
  static const struct net_device_ops dsa_slave_netdev_ops = {
  	.ndo_open	 	= dsa_slave_open,
  	.ndo_stop		= dsa_slave_close,
 @@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
  	.ndo_vlan_rx_add_vid	= dsa_slave_vlan_rx_add_vid,
  	.ndo_vlan_rx_kill_vid	= dsa_slave_vlan_rx_kill_vid,
  	.ndo_get_devlink_port	= dsa_slave_get_devlink_port,
 +	.ndo_fill_forward_path	= dsa_slave_fill_forward_path,
  };

  static struct device_type dsa_type = {
 @@ -1497,10 +1529,11 @@ void dsa_slave_destroy(struct net_device *slave_dev)
  	free_netdev(slave_dev);
  }

 -static bool dsa_slave_dev_check(const struct net_device *dev)
 +bool dsa_slave_dev_check(const struct net_device *dev)
  {
  	return dev->netdev_ops == &dsa_slave_netdev_ops;
  }
 +EXPORT_SYMBOL_GPL(dsa_slave_dev_check);

  static int dsa_slave_changeupper(struct net_device *dev,
  				 struct netdev_notifier_changeupper_info *info)
 diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
 index f17b40211..803b92e4c 100644
 --- a/net/ipv4/netfilter/Kconfig
 +++ b/net/ipv4/netfilter/Kconfig
 @@ -56,8 +56,6 @@ config NF_TABLES_ARP
  	help
  	  This option enables the ARP support for nf_tables.

 -endif # NF_TABLES
 -
  config NF_FLOW_TABLE_IPV4
  	tristate "Netfilter flow table IPv4 module"
  	depends on NF_FLOW_TABLE
 @@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4

  	  To compile it as a module, choose M here.

 +endif # NF_TABLES
 +
  config NF_DUP_IPV4
  	tristate "Netfilter IPv4 packet duplication to alternate destination"
  	depends on !NF_CONNTRACK || NF_CONNTRACK
 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
 index 5585e3a94..bb76f6061 100644
 --- a/net/ipv6/ip6_output.c
 +++ b/net/ipv6/ip6_output.c
 @@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
  		}
  	}

 -	mtu = ip6_dst_mtu_forward(dst);
 +	mtu = ip6_dst_mtu_maybe_forward(dst, true);
  	if (mtu < IPV6_MIN_MTU)
  		mtu = IPV6_MIN_MTU;

 diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
 index 69443e9a3..0b481d236 100644
 --- a/net/ipv6/netfilter/Kconfig
 +++ b/net/ipv6/netfilter/Kconfig
 @@ -45,7 +45,6 @@ config NFT_FIB_IPV6
  	  multicast or blackhole.

  endif # NF_TABLES_IPV6
 -endif # NF_TABLES

  config NF_FLOW_TABLE_IPV6
  	tristate "Netfilter flow table IPv6 module"
 @@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6

  	  To compile it as a module, choose M here.

 +endif # NF_TABLES
 +
  config NF_DUP_IPV6
  	tristate "Netfilter IPv6 packet duplication to alternate destination"
  	depends on !NF_CONNTRACK || NF_CONNTRACK
 diff --git a/net/ipv6/route.c b/net/ipv6/route.c
 index 98aaf0b79..2b357ac71 100644
 --- a/net/ipv6/route.c
 +++ b/net/ipv6/route.c
 @@ -83,7 +83,7 @@ enum rt6_nud_state {

  static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
 -static unsigned int	 ip6_mtu(const struct dst_entry *dst);
 +static unsigned int	ip6_mtu(const struct dst_entry *dst);
  static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  static void		ip6_dst_destroy(struct dst_entry *);
  static void		ip6_dst_ifdown(struct dst_entry *,
 @@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)

  static unsigned int ip6_mtu(const struct dst_entry *dst)
  {
 -	struct inet6_dev *idev;
 -	unsigned int mtu;
 -
 -	mtu = dst_metric_raw(dst, RTAX_MTU);
 -	if (mtu)
 -		goto out;
 -
 -	mtu = IPV6_MIN_MTU;
 -
 -	rcu_read_lock();
 -	idev = __in6_dev_get(dst->dev);
 -	if (idev)
 -		mtu = idev->cnf.mtu6;
 -	rcu_read_unlock();
 -
 -out:
 -	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
 -
 -	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 +	return ip6_dst_mtu_maybe_forward(dst, false);
  }

  /* MTU selection:
 diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
 index b967763f5..c040e713a 100644
 --- a/net/netfilter/Kconfig
 +++ b/net/netfilter/Kconfig
 @@ -690,8 +690,6 @@ config NFT_FIB_NETDEV

  endif # NF_TABLES_NETDEV

 -endif # NF_TABLES
 -
  config NF_FLOW_TABLE_INET
  	tristate "Netfilter flow table mixed IPv4/IPv6 module"
  	depends on NF_FLOW_TABLE
 @@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET

  	  To compile it as a module, choose M here.

 +endif # NF_TABLES
 +
  config NF_FLOW_TABLE
  	tristate "Netfilter flow table module"
  	depends on NETFILTER_INGRESS
  	depends on NF_CONNTRACK
 -	depends on NF_TABLES
  	help
  	  This option adds the flow table core infrastructure.

 @@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
  	depends on NETFILTER_ADVANCED
  	select NETFILTER_XT_TARGET_CT

 +config NETFILTER_XT_TARGET_FLOWOFFLOAD
 +	tristate '"FLOWOFFLOAD" target support'
 +	depends on NF_FLOW_TABLE
 +	depends on NETFILTER_INGRESS
 +	help
 +	  This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
 +	  module to speed up processing of packets by bypassing the usual
 +	  netfilter chains
 +
  config NETFILTER_XT_TARGET_RATEEST
  	tristate '"RATEEST" target support'
  	depends on NETFILTER_ADVANCED
 diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
 index 4fc075b61..d93a121bc 100644
 --- a/net/netfilter/Makefile
 +++ b/net/netfilter/Makefile
 @@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV)	+= nft_fwd_netdev.o

  # flow table infrastructure
  obj-$(CONFIG_NF_FLOW_TABLE)	+= nf_flow_table.o
 -nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
 +nf_flow_table-objs		:= nf_flow_table_core.o nf_flow_table_ip.o \
 +				   nf_flow_table_offload.o

  obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o

 @@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 +obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
 index f6ab6f484..f689e19d8 100644
 --- a/net/netfilter/nf_conntrack_core.c
 +++ b/net/netfilter/nf_conntrack_core.c
 @@ -864,9 +864,8 @@ out:
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);

 -static inline void nf_ct_acct_update(struct nf_conn *ct,
 -				     enum ip_conntrack_info ctinfo,
 -				     unsigned int len)
 +void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
 +		    unsigned int bytes)
  {
  	struct nf_conn_acct *acct;

 @@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
  	if (acct) {
  		struct nf_conn_counter *counter = acct->counter;

 -		atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
 -		atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
 +		atomic64_add(packets, &counter[dir].packets);
 +		atomic64_add(bytes, &counter[dir].bytes);
  	}
  }
 +EXPORT_SYMBOL_GPL(nf_ct_acct_add);

  static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
  			     const struct nf_conn *loser_ct)
 @@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,

  		/* u32 should be fine since we must have seen one packet. */
  		bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
 -		nf_ct_acct_update(ct, ctinfo, bytes);
 +		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
  	}
  }

 @@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)

  			tmp = nf_ct_tuplehash_to_ctrack(h);

 -			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
 +			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
 +				nf_ct_offload_timeout(tmp);
  				continue;
 +			}

  			if (nf_ct_is_expired(tmp)) {
  				nf_ct_gc_expired(tmp);
 @@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
  		WRITE_ONCE(ct->timeout, extra_jiffies);
  acct:
  	if (do_acct)
 -		nf_ct_acct_update(ct, ctinfo, skb->len);
 +		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
  }
  EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);

 @@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
  		     enum ip_conntrack_info ctinfo,
  		     const struct sk_buff *skb)
  {
 -	nf_ct_acct_update(ct, ctinfo, skb->len);
 +	nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);

  	return nf_ct_delete(ct, 0, 0);
  }
 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
 index 7204f0366..3742bae21 100644
 --- a/net/netfilter/nf_conntrack_proto_tcp.c
 +++ b/net/netfilter/nf_conntrack_proto_tcp.c
 @@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
  	tn->tcp_loose = nf_ct_tcp_loose;
  	tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
  	tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
 +
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	tn->offload_timeout = 30 * HZ;
 +#endif
  }

  const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
 diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
 index e3a2d018f..a1579d6c3 100644
 --- a/net/netfilter/nf_conntrack_proto_udp.c
 +++ b/net/netfilter/nf_conntrack_proto_udp.c
 @@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)

  	for (i = 0; i < UDP_CT_MAX; i++)
  		un->timeouts[i] = udp_timeouts[i];
 +
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	un->offload_timeout = 30 * HZ;
 +#endif
  }

  const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
 diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
 index 9c6259c28..10d9f93ce 100644
 --- a/net/netfilter/nf_conntrack_standalone.c
 +++ b/net/netfilter/nf_conntrack_standalone.c
 @@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
  	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
  		goto release;

 -	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
 +	if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
 +		seq_puts(s, "[HW_OFFLOAD] ");
 +	else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
  		seq_puts(s, "[OFFLOAD] ");
  	else if (test_bit(IPS_ASSURED_BIT, &ct->status))
  		seq_puts(s, "[ASSURED] ");
 @@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
  	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
  	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
  	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
 +#endif
  	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
  	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
  	NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
  	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
  	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
 +#endif
  	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
  	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
  #ifdef CONFIG_NF_CT_PROTO_SCTP
 @@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
  		.mode		= 0644,
  		.proc_handler	= proc_dointvec_jiffies,
  	},
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
 +		.procname	= "nf_flowtable_tcp_timeout",
 +		.maxlen		= sizeof(unsigned int),
 +		.mode		= 0644,
 +		.proc_handler	= proc_dointvec_jiffies,
 +	},
 +#endif
  	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
  		.procname	= "nf_conntrack_tcp_loose",
  		.maxlen		= sizeof(int),
 @@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
  		.mode		= 0644,
  		.proc_handler	= proc_dointvec_jiffies,
  	},
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
 +		.procname	= "nf_flowtable_udp_timeout",
 +		.maxlen		= sizeof(unsigned int),
 +		.mode		= 0644,
 +		.proc_handler	= proc_dointvec_jiffies,
 +	},
 +#endif
  	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
  		.procname	= "nf_conntrack_icmp_timeout",
  		.maxlen		= sizeof(unsigned int),
 @@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
  	XASSIGN(LIBERAL, &tn->tcp_be_liberal);
  	XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
  #undef XASSIGN
 +
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
 +#endif
 +
  }

  static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
 @@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
  	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
  	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
  	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
 +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 +	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
 +#endif

  	nf_conntrack_standalone_init_tcp_sysctl(net, table);
  	nf_conntrack_standalone_init_sctp_sysctl(net, table);
 diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
 index f212cec0..10365581 100644
 --- a/net/netfilter/nf_flow_table_core.c
 +++ b/net/netfilter/nf_flow_table_core.c
 @@ -7,43 +7,21 @@
  #include <linux/netdevice.h>
  #include <net/ip.h>
  #include <net/ip6_route.h>
 -#include <net/netfilter/nf_tables.h>
  #include <net/netfilter/nf_flow_table.h>
  #include <net/netfilter/nf_conntrack.h>
  #include <net/netfilter/nf_conntrack_core.h>
  #include <net/netfilter/nf_conntrack_l4proto.h>
  #include <net/netfilter/nf_conntrack_tuple.h>

 -struct flow_offload_entry {
 -	struct flow_offload	flow;
 -	struct nf_conn		*ct;
 -	struct rcu_head		rcu_head;
 -};
 -
  static DEFINE_MUTEX(flowtable_lock);
  static LIST_HEAD(flowtables);

 -static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
 -{
 -	const struct rt6_info *rt;
 -
 -	if (flow_tuple->l3proto == NFPROTO_IPV6) {
 -		rt = (const struct rt6_info *)flow_tuple->dst_cache;
 -		return rt6_get_cookie(rt);
 -	}
 -
 -	return 0;
 -}
 -
  static void
 -flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
 -		      struct nf_flow_route *route,
 +flow_offload_fill_dir(struct flow_offload *flow,
  		      enum flow_offload_tuple_dir dir)
  {
  	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
 -	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
 -	struct dst_entry *other_dst = route->tuple[!dir].dst;
 -	struct dst_entry *dst = route->tuple[dir].dst;
 +	struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;

  	ft->dir = dir;

 @@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
  	case NFPROTO_IPV4:
  		ft->src_v4 = ctt->src.u3.in;
  		ft->dst_v4 = ctt->dst.u3.in;
 -		ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
  		break;
  	case NFPROTO_IPV6:
  		ft->src_v6 = ctt->src.u3.in6;
  		ft->dst_v6 = ctt->dst.u3.in6;
 -		ft->mtu = ip6_dst_mtu_forward(dst);
  		break;
  	}

 @@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
  	ft->l4proto = ctt->dst.protonum;
  	ft->src_port = ctt->src.u.tcp.port;
  	ft->dst_port = ctt->dst.u.tcp.port;
 -
 -	ft->iifidx = other_dst->dev->ifindex;
 -	ft->dst_cache = dst;
 -	ft->dst_cookie = flow_offload_dst_cookie(ft);
  }

 -struct flow_offload *
 -flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 +struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
  {
 -	struct flow_offload_entry *entry;
  	struct flow_offload *flow;

  	if (unlikely(nf_ct_is_dying(ct) ||
  	    !atomic_inc_not_zero(&ct->ct_general.use)))
  		return NULL;

 -	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 -	if (!entry)
 +	flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
 +	if (!flow)
  		goto err_ct_refcnt;

 -	flow = &entry->flow;
 -
 -	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
 -		goto err_dst_cache_original;
 -
 -	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
 -		goto err_dst_cache_reply;
 +	flow->ct = ct;

 -	entry->ct = ct;
 -
 -	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
 -	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
 +	flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
 +	flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);

  	if (ct->status & IPS_SRC_NAT)
 -		flow->flags |= FLOW_OFFLOAD_SNAT;
 +		__set_bit(NF_FLOW_SNAT, &flow->flags);
  	if (ct->status & IPS_DST_NAT)
 -		flow->flags |= FLOW_OFFLOAD_DNAT;
 +		__set_bit(NF_FLOW_DNAT, &flow->flags);

  	return flow;

 -err_dst_cache_reply:
 -	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
 -err_dst_cache_original:
 -	kfree(entry);
  err_ct_refcnt:
  	nf_ct_put(ct);

 @@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
  }
  EXPORT_SYMBOL_GPL(flow_offload_alloc);

 -static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 +static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
  {
 -	tcp->state = TCP_CONNTRACK_ESTABLISHED;
 -	tcp->seen[0].td_maxwin = 0;
 -	tcp->seen[1].td_maxwin = 0;
 +	const struct rt6_info *rt;
 +
 +	if (flow_tuple->l3proto == NFPROTO_IPV6) {
 +		rt = (const struct rt6_info *)flow_tuple->dst_cache;
 +		return rt6_get_cookie(rt);
 +	}
 +
 +	return 0;
  }

 -#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT	(120 * HZ)
 -#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT	(30 * HZ)
 +static int flow_offload_fill_route(struct flow_offload *flow,
 +				   const struct nf_flow_route *route,
 +				   enum flow_offload_tuple_dir dir)
 +{
 +	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
 +	struct dst_entry *dst = route->tuple[dir].dst;
 +	int i, j = 0;
 +
 +	switch (flow_tuple->l3proto) {
 +	case NFPROTO_IPV4:
 +		flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
 +		break;
 +	case NFPROTO_IPV6:
 +		flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
 +		break;
 +	}
 +
 +	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
 +	for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
 +		flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
 +		flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
 +		if (route->tuple[dir].in.ingress_vlans & BIT(i))
 +			flow_tuple->in_vlan_ingress |= BIT(j);
 +		j++;
 +	}
 +	flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
 +
 +	switch (route->tuple[dir].xmit_type) {
 +	case FLOW_OFFLOAD_XMIT_DIRECT:
 +		memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
 +		       ETH_ALEN);
 +		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
 +		       ETH_ALEN);
 +		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
 +		flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
 +		break;
 +	case FLOW_OFFLOAD_XMIT_XFRM:
 +	case FLOW_OFFLOAD_XMIT_NEIGH:
 +		if (!dst_hold_safe(route->tuple[dir].dst))
 +			return -1;
 +
 +		flow_tuple->dst_cache = dst;
 +		flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
 +		break;
 +	default:
 +		WARN_ON_ONCE(1);
 +		break;
 +	}
 +	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
 +
 +	return 0;
 +}
 +
 +static void nft_flow_dst_release(struct flow_offload *flow,
 +				 enum flow_offload_tuple_dir dir)
 +{
 +	if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
 +	    flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
 +		dst_release(flow->tuplehash[dir].tuple.dst_cache);
 +}
 +
 +int flow_offload_route_init(struct flow_offload *flow,
 +			    const struct nf_flow_route *route)
 +{
 +	int err;
 +
 +	err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
 +	if (err < 0)
 +		return err;
 +
 +	err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
 +	if (err < 0)
 +		goto err_route_reply;
 +
 +	flow->type = NF_FLOW_OFFLOAD_ROUTE;
 +
 +	return 0;
 +
 +err_route_reply:
 +	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
 +
 +	return err;
 +}
 +EXPORT_SYMBOL_GPL(flow_offload_route_init);

 -static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 +static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
  {
 -	return (__s32)(timeout - (u32)jiffies);
 +	tcp->state = TCP_CONNTRACK_ESTABLISHED;
 +	tcp->seen[0].td_maxwin = 0;
 +	tcp->seen[1].td_maxwin = 0;
  }

  static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
  {
 -	const struct nf_conntrack_l4proto *l4proto;
 +	struct net *net = nf_ct_net(ct);
  	int l4num = nf_ct_protonum(ct);
 -	unsigned int timeout;
 +	s32 timeout;

 -	l4proto = nf_ct_l4proto_find(l4num);
 -	if (!l4proto)
 -		return;
 +	if (l4num == IPPROTO_TCP) {
 +		struct nf_tcp_net *tn = nf_tcp_pernet(net);

 -	if (l4num == IPPROTO_TCP)
 -		timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
 -	else if (l4num == IPPROTO_UDP)
 -		timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
 -	else
 +		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
 +		timeout -= tn->offload_timeout;
 +	} else if (l4num == IPPROTO_UDP) {
 +		struct nf_udp_net *tn = nf_udp_pernet(net);
 +
 +		timeout = tn->timeouts[UDP_CT_REPLIED];
 +		timeout -= tn->offload_timeout;
 +	} else {
  		return;
 +	}
 +
 +	if (timeout < 0)
 +		timeout = 0;

 -	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
 -		ct->timeout = nfct_time_stamp + timeout;
 +	if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
 +		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
  }

  static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 @@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
  	flow_offload_fixup_ct_timeout(ct);
  }

 -void flow_offload_free(struct flow_offload *flow)
 +static void flow_offload_route_release(struct flow_offload *flow)
  {
 -	struct flow_offload_entry *e;
 +	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
 +	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
 +}

 -	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 -	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 -	e = container_of(flow, struct flow_offload_entry, flow);
 -	if (flow->flags & FLOW_OFFLOAD_DYING)
 -		nf_ct_delete(e->ct, 0, 0);
 -	nf_ct_put(e->ct);
 -	kfree_rcu(e, rcu_head);
 +void flow_offload_free(struct flow_offload *flow)
 +{
 +	switch (flow->type) {
 +	case NF_FLOW_OFFLOAD_ROUTE:
 +		flow_offload_route_release(flow);
 +		break;
 +	default:
 +		break;
 +	}
 +	nf_ct_put(flow->ct);
 +	kfree_rcu(flow, rcu_head);
  }
  EXPORT_SYMBOL_GPL(flow_offload_free);

 @@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
  {
  	const struct flow_offload_tuple *tuple = data;

 -	return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
 +	return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
  }

  static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
  {
  	const struct flow_offload_tuple_rhash *tuplehash = data;

 -	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
 +	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
  }

  static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
 @@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
  	const struct flow_offload_tuple *tuple = arg->key;
  	const struct flow_offload_tuple_rhash *x = ptr;

 -	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
 +	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
  		return 1;

  	return 0;
 @@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
  	.automatic_shrinking	= true,
  };

 -#define        DAY     (86400 * HZ)
 -
 -/* Set an arbitrary timeout large enough not to ever expire, this save
 - * us a check for the IPS_OFFLOAD_BIT from the packet path via
 - * nf_ct_is_expired().
 - */
 -static void nf_ct_offload_timeout(struct flow_offload *flow)
 +unsigned long flow_offload_get_timeout(struct flow_offload *flow)
  {
 -	struct flow_offload_entry *entry;
 -	struct nf_conn *ct;
 +	unsigned long timeout = NF_FLOW_TIMEOUT;
 +	struct net *net = nf_ct_net(flow->ct);
 +	int l4num = nf_ct_protonum(flow->ct);

 -	entry = container_of(flow, struct flow_offload_entry, flow);
 -	ct = entry->ct;
 +	if (l4num == IPPROTO_TCP) {
 +		struct nf_tcp_net *tn = nf_tcp_pernet(net);

 -	if (nf_ct_expires(ct) < DAY / 2)
 -		ct->timeout = nfct_time_stamp + DAY;
 +		timeout = tn->offload_timeout;
 +	} else if (l4num == IPPROTO_UDP) {
 +		struct nf_udp_net *tn = nf_udp_pernet(net);
 +
 +		timeout = tn->offload_timeout;
 +	}
 +
 +	return timeout;
  }

  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
  {
  	int err;

 -	nf_ct_offload_timeout(flow);
 -	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 +	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);

  	err = rhashtable_insert_fast(&flow_table->rhashtable,
  				     &flow->tuplehash[0].node,
 @@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
  		return err;
  	}

 +	nf_ct_offload_timeout(flow->ct);
 +
 +	if (nf_flowtable_hw_offload(flow_table)) {
 +		__set_bit(NF_FLOW_HW, &flow->flags);
 +		nf_flow_offload_add(flow_table, flow);
 +	}
 +
  	return 0;
  }
  EXPORT_SYMBOL_GPL(flow_offload_add);

 +void flow_offload_refresh(struct nf_flowtable *flow_table,
 +			  struct flow_offload *flow)
 +{
 +	u32 timeout;
 +
 +	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
 +	if (timeout - READ_ONCE(flow->timeout) > HZ)
 +		WRITE_ONCE(flow->timeout, timeout);
 +	else
 +		return;
 +
 +	if (likely(!nf_flowtable_hw_offload(flow_table)))
 +		return;
 +
 +	nf_flow_offload_add(flow_table, flow);
 +}
 +EXPORT_SYMBOL_GPL(flow_offload_refresh);
 +
  static inline bool nf_flow_has_expired(const struct flow_offload *flow)
  {
  	return nf_flow_timeout_delta(flow->timeout) <= 0;
 @@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
  static void flow_offload_del(struct nf_flowtable *flow_table,
  			     struct flow_offload *flow)
  {
 -	struct flow_offload_entry *e;
 -
  	rhashtable_remove_fast(&flow_table->rhashtable,
  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
  			       nf_flow_offload_rhash_params);
 @@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
  			       nf_flow_offload_rhash_params);

 -	e = container_of(flow, struct flow_offload_entry, flow);
 -	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
 +	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);

  	if (nf_flow_has_expired(flow))
 -		flow_offload_fixup_ct(e->ct);
 -	else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
 -		flow_offload_fixup_ct_timeout(e->ct);
 -
 -	if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
 -		flow_offload_fixup_ct_state(e->ct);
 +		flow_offload_fixup_ct(flow->ct);
 +	else
 +		flow_offload_fixup_ct_timeout(flow->ct);

  	flow_offload_free(flow);
  }

  void flow_offload_teardown(struct flow_offload *flow)
  {
 -	struct flow_offload_entry *e;
 -
 -	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
 +	set_bit(NF_FLOW_TEARDOWN, &flow->flags);

 -	e = container_of(flow, struct flow_offload_entry, flow);
 -	flow_offload_fixup_ct_state(e->ct);
 +	flow_offload_fixup_ct_state(flow->ct);
  }
  EXPORT_SYMBOL_GPL(flow_offload_teardown);

 @@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
  {
  	struct flow_offload_tuple_rhash *tuplehash;
  	struct flow_offload *flow;
 -	struct flow_offload_entry *e;
  	int dir;

  	tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
 @@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,

  	dir = tuplehash->tuple.dir;
  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 -	if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
 +	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
  		return NULL;

 -	e = container_of(flow, struct flow_offload_entry, flow);
 -	if (unlikely(nf_ct_is_dying(e->ct)))
 +	if (unlikely(nf_ct_is_dying(flow->ct)))
  		return NULL;

  	return tuplehash;
  }
  EXPORT_SYMBOL_GPL(flow_offload_lookup);

 -static int
 -nf_flow_table_iterate(struct nf_flowtable *flow_table,
 +int nf_flow_table_iterate(struct nf_flowtable *flow_table,
  		      void (*iter)(struct flow_offload *flow, void *data),
  		      void *data)
  {
 @@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
  	rhashtable_walk_start(&hti);

  	while ((tuplehash = rhashtable_walk_next(&hti))) {
 -
  		if (IS_ERR(tuplehash)) {
  			if (PTR_ERR(tuplehash) != -EAGAIN) {
  				err = PTR_ERR(tuplehash);
 @@ -359,23 +430,49 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,

  	return err;
  }
 +EXPORT_SYMBOL_GPL(nf_flow_table_iterate);

 -static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
 +static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
  {
 -	struct nf_flowtable *flow_table = data;
 -	struct flow_offload_entry *e;
 -	bool teardown;
 +	struct dst_entry *dst;

 -	e = container_of(flow, struct flow_offload_entry, flow);
 +	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
 +	    tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
 +		dst = tuple->dst_cache;
 +		if (!dst_check(dst, tuple->dst_cookie))
 +			return true;
 +	}

 -	teardown = flow->flags & (FLOW_OFFLOAD_DYING |
 -				  FLOW_OFFLOAD_TEARDOWN);
 +	return false;
 +}

 -	if (!teardown)
 -		nf_ct_offload_timeout(flow);
 +static bool nf_flow_has_stale_dst(struct flow_offload *flow)
 +{
 +	return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
 +	       flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
 +}

 -	if (nf_flow_has_expired(flow) || teardown)
 -		flow_offload_del(flow_table, flow);
 +static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
 +{
 +	struct nf_flowtable *flow_table = data;
 +
 +	if (nf_flow_has_expired(flow) ||
 +	    nf_ct_is_dying(flow->ct) ||
 +	    nf_flow_has_stale_dst(flow))
 +		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
 +
 +	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
 +		if (test_bit(NF_FLOW_HW, &flow->flags)) {
 +			if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
 +				nf_flow_offload_del(flow_table, flow);
 +			else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
 +				flow_offload_del(flow_table, flow);
 +		} else {
 +			flow_offload_del(flow_table, flow);
 +		}
 +	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
 +		nf_flow_offload_stats(flow_table, flow);
 +	}
  }

  static void nf_flow_offload_work_gc(struct work_struct *work)
 @@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
  	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
  }

 -static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 -				__be16 port, __be16 new_port)
 +static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 +				 __be16 port, __be16 new_port)
  {
  	struct tcphdr *tcph;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 -		return -1;
 -
  	tcph = (void *)(skb_network_header(skb) + thoff);
  	inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
 -
 -	return 0;
  }

 -static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 -				__be16 port, __be16 new_port)
 +static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 +				 __be16 port, __be16 new_port)
  {
  	struct udphdr *udph;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
 -		return -1;
 -
  	udph = (void *)(skb_network_header(skb) + thoff);
  	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  		inet_proto_csum_replace2(&udph->check, skb, port,
 @@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
  		if (!udph->check)
  			udph->check = CSUM_MANGLED_0;
  	}
 -
 -	return 0;
  }

 -static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
 -			    u8 protocol, __be16 port, __be16 new_port)
 +static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
 +			     u8 protocol, __be16 port, __be16 new_port)
  {
  	switch (protocol) {
  	case IPPROTO_TCP:
 -		if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
 -			return NF_DROP;
 +		nf_flow_nat_port_tcp(skb, thoff, port, new_port);
  		break;
  	case IPPROTO_UDP:
 -		if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
 -			return NF_DROP;
 +		nf_flow_nat_port_udp(skb, thoff, port, new_port);
  		break;
  	}
 -
 -	return 0;
  }

 -int nf_flow_snat_port(const struct flow_offload *flow,
 -		      struct sk_buff *skb, unsigned int thoff,
 -		      u8 protocol, enum flow_offload_tuple_dir dir)
 +void nf_flow_snat_port(const struct flow_offload *flow,
 +		       struct sk_buff *skb, unsigned int thoff,
 +		       u8 protocol, enum flow_offload_tuple_dir dir)
  {
  	struct flow_ports *hdr;
  	__be16 port, new_port;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 -		return -1;
 -
  	hdr = (void *)(skb_network_header(skb) + thoff);

  	switch (dir) {
 @@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
  		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
  		hdr->dest = new_port;
  		break;
 -	default:
 -		return -1;
  	}

 -	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 +	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
  }
  EXPORT_SYMBOL_GPL(nf_flow_snat_port);

 -int nf_flow_dnat_port(const struct flow_offload *flow,
 -		      struct sk_buff *skb, unsigned int thoff,
 -		      u8 protocol, enum flow_offload_tuple_dir dir)
 +void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
 +		       unsigned int thoff, u8 protocol,
 +		       enum flow_offload_tuple_dir dir)
  {
  	struct flow_ports *hdr;
  	__be16 port, new_port;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*hdr)))
 -		return -1;
 -
  	hdr = (void *)(skb_network_header(skb) + thoff);

  	switch (dir) {
 @@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
  		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
  		hdr->source = new_port;
  		break;
 -	default:
 -		return -1;
  	}

 -	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 +	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
  }
  EXPORT_SYMBOL_GPL(nf_flow_dnat_port);

 @@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
  {
  	int err;

 -	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
 +	INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
 +	flow_block_init(&flowtable->flow_block);
 +	init_rwsem(&flowtable->flow_block_lock);

  	err = rhashtable_init(&flowtable->rhashtable,
  			      &nf_flow_offload_rhash_params);
 @@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
  static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
  {
  	struct net_device *dev = data;
 -	struct flow_offload_entry *e;
 -
 -	e = container_of(flow, struct flow_offload_entry, flow);

  	if (!dev) {
  		flow_offload_teardown(flow);
  		return;
  	}
 -	if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
 +
 +	if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
  	    (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
  	     flow->tuplehash[1].tuple.iifidx == dev->ifindex))
 -		flow_offload_dead(flow);
 +		flow_offload_teardown(flow);
  }

 -static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
 -					  struct net_device *dev)
 +void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
 +			      struct net_device *dev)
  {
  	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
  	flush_delayed_work(&flowtable->gc_work);
 +	nf_flow_table_offload_flush(flowtable);
  }

  void nf_flow_table_cleanup(struct net_device *dev)
 @@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)

  	mutex_lock(&flowtable_lock);
  	list_for_each_entry(flowtable, &flowtables, list)
 -		nf_flow_table_iterate_cleanup(flowtable, dev);
 +		nf_flow_table_gc_cleanup(flowtable, dev);
  	mutex_unlock(&flowtable_lock);
  }
  EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 @@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
  	mutex_lock(&flowtable_lock);
  	list_del(&flow_table->list);
  	mutex_unlock(&flowtable_lock);
 +
  	cancel_delayed_work_sync(&flow_table->gc_work);
  	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
  	nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
 +	nf_flow_table_offload_flush(flow_table);
 +	if (nf_flowtable_hw_offload(flow_table))
 +		nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
 +				      flow_table);
  	rhashtable_destroy(&flow_table->rhashtable);
  }
  EXPORT_SYMBOL_GPL(nf_flow_table_free);
 @@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {

  static int __init nf_flow_table_module_init(void)
  {
 -	return register_netdevice_notifier(&flow_offload_netdev_notifier);
 +	int ret;
 +
 +	ret = nf_flow_table_offload_init();
 +	if (ret)
 +		return ret;
 +
 +	ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
 +	if (ret)
 +		nf_flow_table_offload_exit();
 +
 +	return ret;
  }

  static void __exit nf_flow_table_module_exit(void)
  {
  	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
 +	nf_flow_table_offload_exit();
  }

  module_init(nf_flow_table_module_init);
 @@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit);

  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 +MODULE_DESCRIPTION("Netfilter flow table module");
 diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
 index 397129b2..6257d87c 100644
 --- a/net/netfilter/nf_flow_table_ip.c
 +++ b/net/netfilter/nf_flow_table_ip.c
 @@ -7,11 +7,13 @@
  #include <linux/ip.h>
  #include <linux/ipv6.h>
  #include <linux/netdevice.h>
 +#include <linux/if_ether.h>
  #include <net/ip.h>
  #include <net/ipv6.h>
  #include <net/ip6_route.h>
  #include <net/neighbour.h>
  #include <net/netfilter/nf_flow_table.h>
 +#include <net/netfilter/nf_conntrack_acct.h>
  /* For layer 4 checksum field offset. */
  #include <linux/tcp.h>
  #include <linux/udp.h>
 @@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
  	if (proto != IPPROTO_TCP)
  		return 0;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
 -		return -1;
 -
  	tcph = (void *)(skb_network_header(skb) + thoff);
  	if (unlikely(tcph->fin || tcph->rst)) {
  		flow_offload_teardown(flow);
 @@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
  	return 0;
  }

 -static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
 -			      __be32 addr, __be32 new_addr)
 +static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
 +			       __be32 addr, __be32 new_addr)
  {
  	struct tcphdr *tcph;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 -		return -1;
 -
  	tcph = (void *)(skb_network_header(skb) + thoff);
  	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
 -
 -	return 0;
  }

 -static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
 -			      __be32 addr, __be32 new_addr)
 +static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
 +			       __be32 addr, __be32 new_addr)
  {
  	struct udphdr *udph;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
 -		return -1;
 -
  	udph = (void *)(skb_network_header(skb) + thoff);
  	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  		inet_proto_csum_replace4(&udph->check, skb, addr,
 @@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
  		if (!udph->check)
  			udph->check = CSUM_MANGLED_0;
  	}
 -
 -	return 0;
  }

 -static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
 -				  unsigned int thoff, __be32 addr,
 -				  __be32 new_addr)
 +static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
 +				   unsigned int thoff, __be32 addr,
 +				   __be32 new_addr)
  {
  	switch (iph->protocol) {
  	case IPPROTO_TCP:
 -		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
 -			return NF_DROP;
 +		nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
  		break;
  	case IPPROTO_UDP:
 -		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
 -			return NF_DROP;
 +		nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
  		break;
  	}
 -
 -	return 0;
  }

 -static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 -			   struct iphdr *iph, unsigned int thoff,
 -			   enum flow_offload_tuple_dir dir)
 +static void nf_flow_snat_ip(const struct flow_offload *flow,
 +			    struct sk_buff *skb, struct iphdr *iph,
 +			    unsigned int thoff, enum flow_offload_tuple_dir dir)
  {
  	__be32 addr, new_addr;

 @@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
  		iph->daddr = new_addr;
  		break;
 -	default:
 -		return -1;
  	}
  	csum_replace4(&iph->check, addr, new_addr);

 -	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 +	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
  }

 -static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 -			   struct iphdr *iph, unsigned int thoff,
 -			   enum flow_offload_tuple_dir dir)
 +static void nf_flow_dnat_ip(const struct flow_offload *flow,
 +			    struct sk_buff *skb, struct iphdr *iph,
 +			    unsigned int thoff, enum flow_offload_tuple_dir dir)
  {
  	__be32 addr, new_addr;

 @@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
  		iph->saddr = new_addr;
  		break;
 -	default:
 -		return -1;
  	}
  	csum_replace4(&iph->check, addr, new_addr);

 -	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 +	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
  }

 -static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 -			  unsigned int thoff, enum flow_offload_tuple_dir dir)
 +static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 +			  unsigned int thoff, enum flow_offload_tuple_dir dir,
 +			  struct iphdr *iph)
  {
 -	struct iphdr *iph = ip_hdr(skb);
 -
 -	if (flow->flags & FLOW_OFFLOAD_SNAT &&
 -	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
 -	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
 -		return -1;
 -	if (flow->flags & FLOW_OFFLOAD_DNAT &&
 -	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
 -	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
 -		return -1;
 -
 -	return 0;
 +	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 +		nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
 +		nf_flow_snat_ip(flow, skb, iph, thoff, dir);
 +	}
 +	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
 +		nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
 +		nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
 +	}
  }

  static bool ip_has_options(unsigned int thoff)
 @@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
  	return thoff != sizeof(struct iphdr);
  }

 +static void nf_flow_tuple_encap(struct sk_buff *skb,
 +				struct flow_offload_tuple *tuple)
 +{
 +	struct vlan_ethhdr *veth;
 +	struct pppoe_hdr *phdr;
 +	int i = 0;
 +
 +	if (skb_vlan_tag_present(skb)) {
 +		tuple->encap[i].id = skb_vlan_tag_get(skb);
 +		tuple->encap[i].proto = skb->vlan_proto;
 +		i++;
 +	}
 +	switch (skb->protocol) {
 +	case htons(ETH_P_8021Q):
 +		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 +		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
 +		tuple->encap[i].proto = skb->protocol;
 +		break;
 +	case htons(ETH_P_PPP_SES):
 +		phdr = (struct pppoe_hdr *)skb_mac_header(skb);
 +		tuple->encap[i].id = ntohs(phdr->sid);
 +		tuple->encap[i].proto = skb->protocol;
 +		break;
 +	}
 +}
 +
  static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 -			    struct flow_offload_tuple *tuple)
 +			    struct flow_offload_tuple *tuple, u32 *hdrsize,
 +			    u32 offset)
  {
  	struct flow_ports *ports;
  	unsigned int thoff;
  	struct iphdr *iph;

 -	if (!pskb_may_pull(skb, sizeof(*iph)))
 +	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
  		return -1;

 -	iph = ip_hdr(skb);
 -	thoff = iph->ihl * 4;
 +	iph = (struct iphdr *)(skb_network_header(skb) + offset);
 +	thoff = (iph->ihl * 4);

  	if (ip_is_fragment(iph) ||
  	    unlikely(ip_has_options(thoff)))
  		return -1;

 -	if (iph->protocol != IPPROTO_TCP &&
 -	    iph->protocol != IPPROTO_UDP)
 +	thoff += offset;
 +
 +	switch (iph->protocol) {
 +	case IPPROTO_TCP:
 +		*hdrsize = sizeof(struct tcphdr);
 +		break;
 +	case IPPROTO_UDP:
 +		*hdrsize = sizeof(struct udphdr);
 +		break;
 +	default:
  		return -1;
 +	}

  	if (iph->ttl <= 1)
  		return -1;

 -	thoff = iph->ihl * 4;
 -	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
 +	if (!pskb_may_pull(skb, thoff + *hdrsize))
  		return -1;

 -	iph = ip_hdr(skb);
 +	iph = (struct iphdr *)(skb_network_header(skb) + offset);
  	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);

  	tuple->src_v4.s_addr	= iph->saddr;
 @@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
  	tuple->l3proto		= AF_INET;
  	tuple->l4proto		= iph->protocol;
  	tuple->iifidx		= dev->ifindex;
 +	nf_flow_tuple_encap(skb, tuple);

  	return 0;
  }
 @@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
  	return NF_STOLEN;
  }

 +static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
 +				       u32 *offset)
 +{
 +	struct vlan_ethhdr *veth;
 +
 +	switch (skb->protocol) {
 +	case htons(ETH_P_8021Q):
 +		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 +		if (veth->h_vlan_encapsulated_proto == proto) {
 +			*offset += VLAN_HLEN;
 +			return true;
 +		}
 +		break;
 +	case htons(ETH_P_PPP_SES):
 +		if (nf_flow_pppoe_proto(skb) == proto) {
 +			*offset += PPPOE_SES_HLEN;
 +			return true;
 +		}
 +		break;
 +	}
 +
 +	return false;
 +}
 +
 +static void nf_flow_encap_pop(struct sk_buff *skb,
 +			      struct flow_offload_tuple_rhash *tuplehash)
 +{
 +	struct vlan_hdr *vlan_hdr;
 +	int i;
 +
 +	for (i = 0; i < tuplehash->tuple.encap_num; i++) {
 +		if (skb_vlan_tag_present(skb)) {
 +			__vlan_hwaccel_clear_tag(skb);
 +			continue;
 +		}
 +		switch (skb->protocol) {
 +		case htons(ETH_P_8021Q):
 +			vlan_hdr = (struct vlan_hdr *)skb->data;
 +			__skb_pull(skb, VLAN_HLEN);
 +			vlan_set_encap_proto(skb, vlan_hdr);
 +			skb_reset_network_header(skb);
 +			break;
 +		case htons(ETH_P_PPP_SES):
 +			skb->protocol = nf_flow_pppoe_proto(skb);
 +			skb_pull(skb, PPPOE_SES_HLEN);
 +			skb_reset_network_header(skb);
 +			break;
 +		}
 +	}
 +}
 +
 +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
 +				       const struct flow_offload_tuple_rhash *tuplehash,
 +				       unsigned short type)
 +{
 +	struct net_device *outdev;
 +
 +	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
 +	if (!outdev)
 +		return NF_DROP;
 +
 +	skb->dev = outdev;
 +	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
 +			tuplehash->tuple.out.h_source, skb->len);
 +	dev_queue_xmit(skb);
 +
 +	return NF_STOLEN;
 +}
 +
  unsigned int
  nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
  			const struct nf_hook_state *state)
 @@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
  	enum flow_offload_tuple_dir dir;
  	struct flow_offload *flow;
  	struct net_device *outdev;
 +	u32 hdrsize, offset = 0;
 +	unsigned int thoff, mtu;
  	struct rtable *rt;
 -	unsigned int thoff;
  	struct iphdr *iph;
  	__be32 nexthop;
 +	int ret;

 -	if (skb->protocol != htons(ETH_P_IP))
 +	if (skb->protocol != htons(ETH_P_IP) &&
 +	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
  		return NF_ACCEPT;

 -	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
 +	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
  		return NF_ACCEPT;

  	tuplehash = flow_offload_lookup(flow_table, &tuple);
 @@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,

  	dir = tuplehash->tuple.dir;
  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 -	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
 -	outdev = rt->dst.dev;
 -
 -	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 -		return NF_ACCEPT;

 -	if (skb_try_make_writable(skb, sizeof(*iph)))
 -		return NF_DROP;
 -
 -	thoff = ip_hdr(skb)->ihl * 4;
 -	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
 +	mtu = flow->tuplehash[dir].tuple.mtu + offset;
 +	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
  		return NF_ACCEPT;

 -	if (!dst_check(&rt->dst, 0)) {
 -		flow_offload_teardown(flow);
 +	iph = (struct iphdr *)(skb_network_header(skb) + offset);
 +	thoff = (iph->ihl * 4) + offset;
 +	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
  		return NF_ACCEPT;
 -	}

 -	if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
 +	if (skb_try_make_writable(skb, thoff + hdrsize))
  		return NF_DROP;

 -	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 +	flow_offload_refresh(flow_table, flow);
 +
 +	nf_flow_encap_pop(skb, tuplehash);
 +	thoff -= offset;
 +
  	iph = ip_hdr(skb);
 +	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
 +
  	ip_decrease_ttl(iph);
  	skb->tstamp = 0;

 -	if (unlikely(dst_xfrm(&rt->dst))) {
 +	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 +		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 +
 +	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 +		rt = (struct rtable *)tuplehash->tuple.dst_cache;
  		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
  		IPCB(skb)->iif = skb->dev->ifindex;
  		IPCB(skb)->flags = IPSKB_FORWARDED;
  		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
  	}

 -	skb->dev = outdev;
 -	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
 -	skb_dst_set_noref(skb, &rt->dst);
 -	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
 +	switch (tuplehash->tuple.xmit_type) {
 +	case FLOW_OFFLOAD_XMIT_NEIGH:
 +		rt = (struct rtable *)tuplehash->tuple.dst_cache;
 +		outdev = rt->dst.dev;
 +		skb->dev = outdev;
 +		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
 +		skb_dst_set_noref(skb, &rt->dst);
 +		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
 +		ret = NF_STOLEN;
 +		break;
 +	case FLOW_OFFLOAD_XMIT_DIRECT:
 +		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
 +		if (ret == NF_DROP)
 +			flow_offload_teardown(flow);
 +		break;
 +	}

 -	return NF_STOLEN;
 +	return ret;
  }
  EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);

 -static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
 -				struct in6_addr *addr,
 -				struct in6_addr *new_addr)
 +static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
 +				 struct in6_addr *addr,
 +				 struct in6_addr *new_addr,
 +				 struct ipv6hdr *ip6h)
  {
  	struct tcphdr *tcph;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
 -		return -1;
 -
  	tcph = (void *)(skb_network_header(skb) + thoff);
  	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
  				  new_addr->s6_addr32, true);
 -
 -	return 0;
  }

 -static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
 -				struct in6_addr *addr,
 -				struct in6_addr *new_addr)
 +static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
 +				 struct in6_addr *addr,
 +				 struct in6_addr *new_addr)
  {
  	struct udphdr *udph;

 -	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
 -	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
 -		return -1;
 -
  	udph = (void *)(skb_network_header(skb) + thoff);
  	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
 @@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
  		if (!udph->check)
  			udph->check = CSUM_MANGLED_0;
  	}
 -
 -	return 0;
  }

 -static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
 -				    unsigned int thoff, struct in6_addr *addr,
 -				    struct in6_addr *new_addr)
 +static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
 +				     unsigned int thoff, struct in6_addr *addr,
 +				     struct in6_addr *new_addr)
  {
  	switch (ip6h->nexthdr) {
  	case IPPROTO_TCP:
 -		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
 -			return NF_DROP;
 +		nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
  		break;
  	case IPPROTO_UDP:
 -		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
 -			return NF_DROP;
 +		nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
  		break;
  	}
 -
 -	return 0;
  }

 -static int nf_flow_snat_ipv6(const struct flow_offload *flow,
 -			     struct sk_buff *skb, struct ipv6hdr *ip6h,
 -			     unsigned int thoff,
 -			     enum flow_offload_tuple_dir dir)
 +static void nf_flow_snat_ipv6(const struct flow_offload *flow,
 +			      struct sk_buff *skb, struct ipv6hdr *ip6h,
 +			      unsigned int thoff,
 +			      enum flow_offload_tuple_dir dir)
  {
  	struct in6_addr addr, new_addr;

 @@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
  		ip6h->daddr = new_addr;
  		break;
 -	default:
 -		return -1;
  	}

 -	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 +	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
  }

 -static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
 -			     struct sk_buff *skb, struct ipv6hdr *ip6h,
 -			     unsigned int thoff,
 -			     enum flow_offload_tuple_dir dir)
 +static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
 +			      struct sk_buff *skb, struct ipv6hdr *ip6h,
 +			      unsigned int thoff,
 +			      enum flow_offload_tuple_dir dir)
  {
  	struct in6_addr addr, new_addr;

 @@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
  		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
  		ip6h->saddr = new_addr;
  		break;
 -	default:
 -		return -1;
  	}

 -	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 +	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
  }

 -static int nf_flow_nat_ipv6(const struct flow_offload *flow,
 -			    struct sk_buff *skb,
 -			    enum flow_offload_tuple_dir dir)
 +static void nf_flow_nat_ipv6(const struct flow_offload *flow,
 +			     struct sk_buff *skb,
 +			     enum flow_offload_tuple_dir dir,
 +			     struct ipv6hdr *ip6h)
  {
 -	struct ipv6hdr *ip6h = ipv6_hdr(skb);
  	unsigned int thoff = sizeof(*ip6h);

 -	if (flow->flags & FLOW_OFFLOAD_SNAT &&
 -	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
 -	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
 -		return -1;
 -	if (flow->flags & FLOW_OFFLOAD_DNAT &&
 -	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
 -	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
 -		return -1;
 -
 -	return 0;
 +	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 +		nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
 +		nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
 +	}
 +	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
 +		nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
 +		nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
 +	}
  }

  static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 -			      struct flow_offload_tuple *tuple)
 +			      struct flow_offload_tuple *tuple, u32 *hdrsize,
 +			      u32 offset)
  {
  	struct flow_ports *ports;
  	struct ipv6hdr *ip6h;
  	unsigned int thoff;

 -	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 +	thoff = sizeof(*ip6h) + offset;
 +	if (!pskb_may_pull(skb, thoff))
  		return -1;

 -	ip6h = ipv6_hdr(skb);
 +	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);

 -	if (ip6h->nexthdr != IPPROTO_TCP &&
 -	    ip6h->nexthdr != IPPROTO_UDP)
 +	switch (ip6h->nexthdr) {
 +	case IPPROTO_TCP:
 +		*hdrsize = sizeof(struct tcphdr);
 +		break;
 +	case IPPROTO_UDP:
 +		*hdrsize = sizeof(struct udphdr);
 +		break;
 +	default:
  		return -1;
 +	}

  	if (ip6h->hop_limit <= 1)
  		return -1;

 -	thoff = sizeof(*ip6h);
 -	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
 +	if (!pskb_may_pull(skb, thoff + *hdrsize))
  		return -1;

 -	ip6h = ipv6_hdr(skb);
 +	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
  	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);

  	tuple->src_v6		= ip6h->saddr;
 @@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
  	tuple->l3proto		= AF_INET6;
  	tuple->l4proto		= ip6h->nexthdr;
  	tuple->iifidx		= dev->ifindex;
 +	nf_flow_tuple_encap(skb, tuple);

  	return 0;
  }
 @@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
  	const struct in6_addr *nexthop;
  	struct flow_offload *flow;
  	struct net_device *outdev;
 +	unsigned int thoff, mtu;
 +	u32 hdrsize, offset = 0;
  	struct ipv6hdr *ip6h;
  	struct rt6_info *rt;
 +	int ret;

 -	if (skb->protocol != htons(ETH_P_IPV6))
 +	if (skb->protocol != htons(ETH_P_IPV6) &&
 +	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
  		return NF_ACCEPT;

 -	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
 +	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
  		return NF_ACCEPT;

  	tuplehash = flow_offload_lookup(flow_table, &tuple);
 @@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,

  	dir = tuplehash->tuple.dir;
  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 -	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
 -	outdev = rt->dst.dev;

 -	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 +	mtu = flow->tuplehash[dir].tuple.mtu + offset;
 +	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
  		return NF_ACCEPT;

 -	if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
 -				sizeof(*ip6h)))
 +	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 +	thoff = sizeof(*ip6h) + offset;
 +	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
  		return NF_ACCEPT;

 -	if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
 -		flow_offload_teardown(flow);
 -		return NF_ACCEPT;
 -	}
 -
 -	if (skb_try_make_writable(skb, sizeof(*ip6h)))
 +	if (skb_try_make_writable(skb, thoff + hdrsize))
  		return NF_DROP;

 -	if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
 -		return NF_DROP;
 +	flow_offload_refresh(flow_table, flow);
 +
 +	nf_flow_encap_pop(skb, tuplehash);

 -	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
  	ip6h = ipv6_hdr(skb);
 +	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
 +
  	ip6h->hop_limit--;
  	skb->tstamp = 0;

 -	if (unlikely(dst_xfrm(&rt->dst))) {
 +	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 +		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 +
 +	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 +		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
  		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
  		IP6CB(skb)->iif = skb->dev->ifindex;
  		IP6CB(skb)->flags = IP6SKB_FORWARDED;
  		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
  	}

 -	skb->dev = outdev;
 -	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
 -	skb_dst_set_noref(skb, &rt->dst);
 -	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
 +	switch (tuplehash->tuple.xmit_type) {
 +	case FLOW_OFFLOAD_XMIT_NEIGH:
 +		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
 +		outdev = rt->dst.dev;
 +		skb->dev = outdev;
 +		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
 +		skb_dst_set_noref(skb, &rt->dst);
 +		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
 +		ret = NF_STOLEN;
 +		break;
 +	case FLOW_OFFLOAD_XMIT_DIRECT:
 +		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
 +		if (ret == NF_DROP)
 +			flow_offload_teardown(flow);
 +		break;
 +	}

 -	return NF_STOLEN;
 +	return ret;
  }
  EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
 diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
 new file mode 100644
 index 000000000..d94c6fb92
 --- /dev/null
 +++ b/net/netfilter/nf_flow_table_offload.c
 @@ -0,0 +1,1191 @@
 +#include <linux/kernel.h>
 +#include <linux/init.h>
 +#include <linux/module.h>
 +#include <linux/netfilter.h>
 +#include <linux/rhashtable.h>
 +#include <linux/netdevice.h>
 +#include <linux/tc_act/tc_csum.h>
 +#include <net/flow_offload.h>
 +#include <net/netfilter/nf_flow_table.h>
 +#include <net/netfilter/nf_tables.h>
 +#include <net/netfilter/nf_conntrack.h>
 +#include <net/netfilter/nf_conntrack_acct.h>
 +#include <net/netfilter/nf_conntrack_core.h>
 +#include <net/netfilter/nf_conntrack_tuple.h>
 +
 +static struct workqueue_struct *nf_flow_offload_add_wq;
 +static struct workqueue_struct *nf_flow_offload_del_wq;
 +static struct workqueue_struct *nf_flow_offload_stats_wq;
 +
 +struct flow_offload_work {
 +	struct list_head	list;
 +	enum flow_cls_command	cmd;
 +	int			priority;
 +	struct nf_flowtable	*flowtable;
 +	struct flow_offload	*flow;
 +	struct work_struct	work;
 +};
 +
 +#define NF_FLOW_DISSECTOR(__match, __type, __field)	\
 +	(__match)->dissector.offset[__type] =		\
 +		offsetof(struct nf_flow_key, __field)
 +
 +static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
 +				   struct ip_tunnel_info *tun_info)
 +{
 +	struct nf_flow_key *mask = &match->mask;
 +	struct nf_flow_key *key = &match->key;
 +	unsigned int enc_keys;
 +
 +	if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
 +		return;
 +
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
 +	key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
 +	mask->enc_key_id.keyid = 0xffffffff;
 +	enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
 +		   BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
 +
 +	if (ip_tunnel_info_af(tun_info) == AF_INET) {
 +		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
 +				  enc_ipv4);
 +		key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
 +		key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
 +		if (key->enc_ipv4.src)
 +			mask->enc_ipv4.src = 0xffffffff;
 +		if (key->enc_ipv4.dst)
 +			mask->enc_ipv4.dst = 0xffffffff;
 +		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
 +		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 +	} else {
 +		memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
 +		       sizeof(struct in6_addr));
 +		memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
 +		       sizeof(struct in6_addr));
 +		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
 +			   sizeof(struct in6_addr)))
 +			memset(&mask->enc_ipv6.src, 0xff,
 +			       sizeof(struct in6_addr));
 +		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
 +			   sizeof(struct in6_addr)))
 +			memset(&mask->enc_ipv6.dst, 0xff,
 +			       sizeof(struct in6_addr));
 +		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
 +		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 +	}
 +
 +	match->dissector.used_keys |= enc_keys;
 +}
 +
 +static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
 +				    struct flow_dissector_key_vlan *mask,
 +				    u16 vlan_id, __be16 proto)
 +{
 +	key->vlan_id = vlan_id;
 +	mask->vlan_id = VLAN_VID_MASK;
 +	key->vlan_tpid = proto;
 +	mask->vlan_tpid = 0xffff;
 +}
 +
 +static int nf_flow_rule_match(struct nf_flow_match *match,
 +			      const struct flow_offload_tuple *tuple,
 +			      struct dst_entry *other_dst)
 +{
 +	struct nf_flow_key *mask = &match->mask;
 +	struct nf_flow_key *key = &match->key;
 +	struct ip_tunnel_info *tun_info;
 +	bool vlan_encap = false;
 +
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
 +	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
 +
 +	if (other_dst && other_dst->lwtstate) {
 +		tun_info = lwt_tun_info(other_dst->lwtstate);
 +		nf_flow_rule_lwt_match(match, tun_info);
 +	}
 +
 +	key->meta.ingress_ifindex = tuple->iifidx;
 +	mask->meta.ingress_ifindex = 0xffffffff;
 +
 +	if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
 +	    tuple->encap[0].proto == htons(ETH_P_8021Q)) {
 +		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
 +		nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
 +					tuple->encap[0].id,
 +					tuple->encap[0].proto);
 +		vlan_encap = true;
 +	}
 +
 +	if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
 +	    tuple->encap[1].proto == htons(ETH_P_8021Q)) {
 +		if (vlan_encap) {
 +			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
 +					  cvlan);
 +			nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
 +						tuple->encap[1].id,
 +						tuple->encap[1].proto);
 +		} else {
 +			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
 +					  vlan);
 +			nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
 +						tuple->encap[1].id,
 +						tuple->encap[1].proto);
 +		}
 +	}
 +
 +	switch (tuple->l3proto) {
 +	case AF_INET:
 +		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 +		key->basic.n_proto = htons(ETH_P_IP);
 +		key->ipv4.src = tuple->src_v4.s_addr;
 +		mask->ipv4.src = 0xffffffff;
 +		key->ipv4.dst = tuple->dst_v4.s_addr;
 +		mask->ipv4.dst = 0xffffffff;
 +		break;
 +       case AF_INET6:
 +		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 +		key->basic.n_proto = htons(ETH_P_IPV6);
 +		key->ipv6.src = tuple->src_v6;
 +		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
 +		key->ipv6.dst = tuple->dst_v6;
 +		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +	mask->control.addr_type = 0xffff;
 +	match->dissector.used_keys |= BIT(key->control.addr_type);
 +	mask->basic.n_proto = 0xffff;
 +
 +	switch (tuple->l4proto) {
 +	case IPPROTO_TCP:
 +		key->tcp.flags = 0;
 +		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
 +		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
 +		break;
 +	case IPPROTO_UDP:
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +
 +	key->basic.ip_proto = tuple->l4proto;
 +	mask->basic.ip_proto = 0xff;
 +
 +	key->tp.src = tuple->src_port;
 +	mask->tp.src = 0xffff;
 +	key->tp.dst = tuple->dst_port;
 +	mask->tp.dst = 0xffff;
 +
 +	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
 +				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 +				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 +				      BIT(FLOW_DISSECTOR_KEY_PORTS);
 +	return 0;
 +}
 +
 +static void flow_offload_mangle(struct flow_action_entry *entry,
 +				enum flow_action_mangle_base htype, u32 offset,
 +				const __be32 *value, const __be32 *mask)
 +{
 +	entry->id = FLOW_ACTION_MANGLE;
 +	entry->mangle.htype = htype;
 +	entry->mangle.offset = offset;
 +	memcpy(&entry->mangle.mask, mask, sizeof(u32));
 +	memcpy(&entry->mangle.val, value, sizeof(u32));
 +}
 +
 +static inline struct flow_action_entry *
 +flow_action_entry_next(struct nf_flow_rule *flow_rule)
 +{
 +	int i = flow_rule->rule->action.num_entries++;
 +
 +	return &flow_rule->rule->action.entries[i];
 +}
 +
 +static int flow_offload_eth_src(struct net *net,
 +				const struct flow_offload *flow,
 +				enum flow_offload_tuple_dir dir,
 +				struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 +	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
 +	const struct flow_offload_tuple *other_tuple, *this_tuple;
 +	struct net_device *dev = NULL;
 +	const unsigned char *addr;
 +	u32 mask, val;
 +	u16 val16;
 +
 +	this_tuple = &flow->tuplehash[dir].tuple;
 +
 +	switch (this_tuple->xmit_type) {
 +	case FLOW_OFFLOAD_XMIT_DIRECT:
 +		addr = this_tuple->out.h_source;
 +		break;
 +	case FLOW_OFFLOAD_XMIT_NEIGH:
 +		other_tuple = &flow->tuplehash[!dir].tuple;
 +		dev = dev_get_by_index(net, other_tuple->iifidx);
 +		if (!dev)
 +			return -ENOENT;
 +
 +		addr = dev->dev_addr;
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +
 +	mask = ~0xffff0000;
 +	memcpy(&val16, addr, 2);
 +	val = val16 << 16;
 +	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 +			    &val, &mask);
 +
 +	mask = ~0xffffffff;
 +	memcpy(&val, addr + 2, 4);
 +	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
 +			    &val, &mask);
 +
 +	if (dev)
 +		dev_put(dev);
 +
 +	return 0;
 +}
 +
 +static int flow_offload_eth_dst(struct net *net,
 +				const struct flow_offload *flow,
 +				enum flow_offload_tuple_dir dir,
 +				struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 +	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
 +	const struct flow_offload_tuple *other_tuple, *this_tuple;
 +	const struct dst_entry *dst_cache;
 +	unsigned char ha[ETH_ALEN];
 +	struct neighbour *n;
 +	const void *daddr;
 +	u32 mask, val;
 +	u8 nud_state;
 +	u16 val16;
 +
 +	this_tuple = &flow->tuplehash[dir].tuple;
 +
 +	switch (this_tuple->xmit_type) {
 +	case FLOW_OFFLOAD_XMIT_DIRECT:
 +		ether_addr_copy(ha, this_tuple->out.h_dest);
 +		break;
 +	case FLOW_OFFLOAD_XMIT_NEIGH:
 +		other_tuple = &flow->tuplehash[!dir].tuple;
 +		daddr = &other_tuple->src_v4;
 +		dst_cache = this_tuple->dst_cache;
 +		n = dst_neigh_lookup(dst_cache, daddr);
 +		if (!n)
 +			return -ENOENT;
 +
 +		read_lock_bh(&n->lock);
 +		nud_state = n->nud_state;
 +		ether_addr_copy(ha, n->ha);
 +		read_unlock_bh(&n->lock);
 +		neigh_release(n);
 +
 +		if (!(nud_state & NUD_VALID))
 +			return -ENOENT;
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +
 +	mask = ~0xffffffff;
 +	memcpy(&val, ha, 4);
 +	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
 +			    &val, &mask);
 +
 +	mask = ~0x0000ffff;
 +	memcpy(&val16, ha + 4, 2);
 +	val = val16;
 +	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 +			    &val, &mask);
 +
 +	return 0;
 +}
 +
 +static void flow_offload_ipv4_snat(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 +	u32 mask = ~htonl(0xffffffff);
 +	__be32 addr;
 +	u32 offset;
 +
 +	switch (dir) {
 +	case FLOW_OFFLOAD_DIR_ORIGINAL:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
 +		offset = offsetof(struct iphdr, saddr);
 +		break;
 +	case FLOW_OFFLOAD_DIR_REPLY:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
 +		offset = offsetof(struct iphdr, daddr);
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
 +			    &addr, &mask);
 +}
 +
 +static void flow_offload_ipv4_dnat(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 +	u32 mask = ~htonl(0xffffffff);
 +	__be32 addr;
 +	u32 offset;
 +
 +	switch (dir) {
 +	case FLOW_OFFLOAD_DIR_ORIGINAL:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
 +		offset = offsetof(struct iphdr, daddr);
 +		break;
 +	case FLOW_OFFLOAD_DIR_REPLY:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
 +		offset = offsetof(struct iphdr, saddr);
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
 +			    &addr, &mask);
 +}
 +
 +static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
 +				     unsigned int offset,
 +				     const __be32 *addr, const __be32 *mask)
 +{
 +	struct flow_action_entry *entry;
 +	int i, j;
 +
 +	for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
 +		entry = flow_action_entry_next(flow_rule);
 +		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
 +				    offset + i, &addr[j], mask);
 +	}
 +}
 +
 +static void flow_offload_ipv6_snat(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	u32 mask = ~htonl(0xffffffff);
 +	const __be32 *addr;
 +	u32 offset;
 +
 +	switch (dir) {
 +	case FLOW_OFFLOAD_DIR_ORIGINAL:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
 +		offset = offsetof(struct ipv6hdr, saddr);
 +		break;
 +	case FLOW_OFFLOAD_DIR_REPLY:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
 +		offset = offsetof(struct ipv6hdr, daddr);
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
 +}
 +
 +static void flow_offload_ipv6_dnat(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	u32 mask = ~htonl(0xffffffff);
 +	const __be32 *addr;
 +	u32 offset;
 +
 +	switch (dir) {
 +	case FLOW_OFFLOAD_DIR_ORIGINAL:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
 +		offset = offsetof(struct ipv6hdr, daddr);
 +		break;
 +	case FLOW_OFFLOAD_DIR_REPLY:
 +		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
 +		offset = offsetof(struct ipv6hdr, saddr);
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
 +}
 +
 +static int flow_offload_l4proto(const struct flow_offload *flow)
 +{
 +	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
 +	u8 type = 0;
 +
 +	switch (protonum) {
 +	case IPPROTO_TCP:
 +		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
 +		break;
 +	case IPPROTO_UDP:
 +		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
 +		break;
 +	default:
 +		break;
 +	}
 +
 +	return type;
 +}
 +
 +static void flow_offload_port_snat(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 +	u32 mask, port;
 +	u32 offset;
 +
 +	switch (dir) {
 +	case FLOW_OFFLOAD_DIR_ORIGINAL:
 +		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
 +		offset = 0; /* offsetof(struct tcphdr, source); */
 +		port = htonl(port << 16);
 +		mask = ~htonl(0xffff0000);
 +		break;
 +	case FLOW_OFFLOAD_DIR_REPLY:
 +		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
 +		offset = 0; /* offsetof(struct tcphdr, dest); */
 +		port = htonl(port);
 +		mask = ~htonl(0xffff);
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
 +			    &port, &mask);
 +}
 +
 +static void flow_offload_port_dnat(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 +	u32 mask, port;
 +	u32 offset;
 +
 +	switch (dir) {
 +	case FLOW_OFFLOAD_DIR_ORIGINAL:
 +		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
 +		offset = 0; /* offsetof(struct tcphdr, dest); */
 +		port = htonl(port);
 +		mask = ~htonl(0xffff);
 +		break;
 +	case FLOW_OFFLOAD_DIR_REPLY:
 +		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
 +		offset = 0; /* offsetof(struct tcphdr, source); */
 +		port = htonl(port << 16);
 +		mask = ~htonl(0xffff0000);
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
 +			    &port, &mask);
 +}
 +
 +static void flow_offload_ipv4_checksum(struct net *net,
 +				       const struct flow_offload *flow,
 +				       struct nf_flow_rule *flow_rule)
 +{
 +	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
 +	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
 +
 +	entry->id = FLOW_ACTION_CSUM;
 +	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
 +
 +	switch (protonum) {
 +	case IPPROTO_TCP:
 +		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
 +		break;
 +	case IPPROTO_UDP:
 +		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
 +		break;
 +	}
 +}
 +
 +static void flow_offload_redirect(struct net *net,
 +				  const struct flow_offload *flow,
 +				  enum flow_offload_tuple_dir dir,
 +				  struct nf_flow_rule *flow_rule)
 +{
 +	const struct flow_offload_tuple *this_tuple, *other_tuple;
 +	struct flow_action_entry *entry;
 +	struct net_device *dev;
 +	int ifindex;
 +
 +	this_tuple = &flow->tuplehash[dir].tuple;
 +	switch (this_tuple->xmit_type) {
 +	case FLOW_OFFLOAD_XMIT_DIRECT:
 +		this_tuple = &flow->tuplehash[dir].tuple;
 +		ifindex = this_tuple->out.hw_ifidx;
 +		break;
 +	case FLOW_OFFLOAD_XMIT_NEIGH:
 +		other_tuple = &flow->tuplehash[!dir].tuple;
 +		ifindex = other_tuple->iifidx;
 +		break;
 +	default:
 +		return;
 +	}
 +
 +	dev = dev_get_by_index(net, ifindex);
 +	if (!dev)
 +		return;
 +
 +	entry = flow_action_entry_next(flow_rule);
 +	entry->id = FLOW_ACTION_REDIRECT;
 +	entry->dev = dev;
 +}
 +
 +static void flow_offload_encap_tunnel(const struct flow_offload *flow,
 +				      enum flow_offload_tuple_dir dir,
 +				      struct nf_flow_rule *flow_rule)
 +{
 +	const struct flow_offload_tuple *this_tuple;
 +	struct flow_action_entry *entry;
 +	struct dst_entry *dst;
 +
 +	this_tuple = &flow->tuplehash[dir].tuple;
 +	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
 +		return;
 +
 +	dst = this_tuple->dst_cache;
 +	if (dst && dst->lwtstate) {
 +		struct ip_tunnel_info *tun_info;
 +
 +		tun_info = lwt_tun_info(dst->lwtstate);
 +		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
 +			entry = flow_action_entry_next(flow_rule);
 +			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
 +			entry->tunnel = tun_info;
 +		}
 +	}
 +}
 +
 +static void flow_offload_decap_tunnel(const struct flow_offload *flow,
 +				      enum flow_offload_tuple_dir dir,
 +				      struct nf_flow_rule *flow_rule)
 +{
 +	const struct flow_offload_tuple *other_tuple;
 +	struct flow_action_entry *entry;
 +	struct dst_entry *dst;
 +
 +	other_tuple = &flow->tuplehash[!dir].tuple;
 +	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
 +		return;
 +
 +	dst = other_tuple->dst_cache;
 +	if (dst && dst->lwtstate) {
 +		struct ip_tunnel_info *tun_info;
 +
 +		tun_info = lwt_tun_info(dst->lwtstate);
 +		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
 +			entry = flow_action_entry_next(flow_rule);
 +			entry->id = FLOW_ACTION_TUNNEL_DECAP;
 +		}
 +	}
 +}
 +
 +static int
 +nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
 +			  enum flow_offload_tuple_dir dir,
 +			  struct nf_flow_rule *flow_rule)
 +{
 +	const struct flow_offload_tuple *other_tuple;
 +	const struct flow_offload_tuple *tuple;
 +	int i;
 +
 +	flow_offload_decap_tunnel(flow, dir, flow_rule);
 +	flow_offload_encap_tunnel(flow, dir, flow_rule);
 +
 +	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
 +	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
 +		return -1;
 +
 +	tuple = &flow->tuplehash[dir].tuple;
 +
 +	for (i = 0; i < tuple->encap_num; i++) {
 +		struct flow_action_entry *entry;
 +
 +		if (tuple->in_vlan_ingress & BIT(i))
 +			continue;
 +
 +		if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
 +			entry = flow_action_entry_next(flow_rule);
 +			entry->id = FLOW_ACTION_VLAN_POP;
 +		}
 +	}
 +
 +	other_tuple = &flow->tuplehash[!dir].tuple;
 +
 +	for (i = 0; i < other_tuple->encap_num; i++) {
 +		struct flow_action_entry *entry;
 +
 +		if (other_tuple->in_vlan_ingress & BIT(i))
 +			continue;
 +
 +		entry = flow_action_entry_next(flow_rule);
 +
 +		switch (other_tuple->encap[i].proto) {
 +		case htons(ETH_P_PPP_SES):
 +			entry->id = FLOW_ACTION_PPPOE_PUSH;
 +			entry->pppoe.sid = other_tuple->encap[i].id;
 +			break;
 +		case htons(ETH_P_8021Q):
 +			entry->id = FLOW_ACTION_VLAN_PUSH;
 +			entry->vlan.vid = other_tuple->encap[i].id;
 +			entry->vlan.proto = other_tuple->encap[i].proto;
 +			break;
 +		}
 +	}
 +
 +	return 0;
 +}
 +
 +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 +			    enum flow_offload_tuple_dir dir,
 +			    struct nf_flow_rule *flow_rule)
 +{
 +	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
 +		return -1;
 +
 +	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 +		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
 +		flow_offload_port_snat(net, flow, dir, flow_rule);
 +	}
 +	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
 +		flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
 +		flow_offload_port_dnat(net, flow, dir, flow_rule);
 +	}
 +	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
 +	    test_bit(NF_FLOW_DNAT, &flow->flags))
 +		flow_offload_ipv4_checksum(net, flow, flow_rule);
 +
 +	flow_offload_redirect(net, flow, dir, flow_rule);
 +
 +	return 0;
 +}
 +EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
 +
 +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 +			    enum flow_offload_tuple_dir dir,
 +			    struct nf_flow_rule *flow_rule)
 +{
 +	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
 +		return -1;
 +
 +	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 +		flow_offload_ipv6_snat(net, flow, dir, flow_rule);
 +		flow_offload_port_snat(net, flow, dir, flow_rule);
 +	}
 +	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
 +		flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
 +		flow_offload_port_dnat(net, flow, dir, flow_rule);
 +	}
 +
 +	flow_offload_redirect(net, flow, dir, flow_rule);
 +
 +	return 0;
 +}
 +EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
 +
 +#define NF_FLOW_RULE_ACTION_MAX	16
 +
 +static struct nf_flow_rule *
 +nf_flow_offload_rule_alloc(struct net *net,
 +			   const struct flow_offload_work *offload,
 +			   enum flow_offload_tuple_dir dir)
 +{
 +	const struct nf_flowtable *flowtable = offload->flowtable;
 +	const struct flow_offload_tuple *tuple, *other_tuple;
 +	const struct flow_offload *flow = offload->flow;
 +	struct dst_entry *other_dst = NULL;
 +	struct nf_flow_rule *flow_rule;
 +	int err = -ENOMEM;
 +
 +	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
 +	if (!flow_rule)
 +		goto err_flow;
 +
 +	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
 +	if (!flow_rule->rule)
 +		goto err_flow_rule;
 +
 +	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
 +	flow_rule->rule->match.mask = &flow_rule->match.mask;
 +	flow_rule->rule->match.key = &flow_rule->match.key;
 +
 +	tuple = &flow->tuplehash[dir].tuple;
 +	other_tuple = &flow->tuplehash[!dir].tuple;
 +	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
 +		other_dst = other_tuple->dst_cache;
 +
 +	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
 +	if (err < 0)
 +		goto err_flow_match;
 +
 +	flow_rule->rule->action.num_entries = 0;
 +	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
 +		goto err_flow_match;
 +
 +	return flow_rule;
 +
 +err_flow_match:
 +	kfree(flow_rule->rule);
 +err_flow_rule:
 +	kfree(flow_rule);
 +err_flow:
 +	return NULL;
 +}
 +
 +static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
 +{
 +	struct flow_action_entry *entry;
 +	int i;
 +
 +	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
 +		entry = &flow_rule->rule->action.entries[i];
 +		if (entry->id != FLOW_ACTION_REDIRECT)
 +			continue;
 +
 +		dev_put(entry->dev);
 +	}
 +	kfree(flow_rule->rule);
 +	kfree(flow_rule);
 +}
 +
 +static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
 +{
 +	int i;
 +
 +	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
 +		__nf_flow_offload_destroy(flow_rule[i]);
 +}
 +
 +static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
 +				 struct nf_flow_rule *flow_rule[])
 +{
 +	struct net *net = read_pnet(&offload->flowtable->net);
 +
 +	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
 +						  FLOW_OFFLOAD_DIR_ORIGINAL);
 +	if (!flow_rule[0])
 +		return -ENOMEM;
 +
 +	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
 +						  FLOW_OFFLOAD_DIR_REPLY);
 +	if (!flow_rule[1]) {
 +		__nf_flow_offload_destroy(flow_rule[0]);
 +		return -ENOMEM;
 +	}
 +
 +	return 0;
 +}
 +
 +static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
 +				 __be16 proto, int priority,
 +				 enum flow_cls_command cmd,
 +				 const struct flow_offload_tuple *tuple,
 +				 struct netlink_ext_ack *extack)
 +{
 +	cls_flow->common.protocol = proto;
 +	cls_flow->common.prio = priority;
 +	cls_flow->common.extack = extack;
 +	cls_flow->command = cmd;
 +	cls_flow->cookie = (unsigned long)tuple;
 +}
 +
 +static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
 +				 struct flow_offload *flow,
 +				 struct nf_flow_rule *flow_rule,
 +				 enum flow_offload_tuple_dir dir,
 +				 int priority, int cmd,
 +				 struct flow_stats *stats,
 +				 struct list_head *block_cb_list)
 +{
 +	struct flow_cls_offload cls_flow = {};
 +	struct flow_block_cb *block_cb;
 +	struct netlink_ext_ack extack;
 +	__be16 proto = ETH_P_ALL;
 +	int err, i = 0;
 +
 +	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
 +			     &flow->tuplehash[dir].tuple, &extack);
 +	if (cmd == FLOW_CLS_REPLACE)
 +		cls_flow.rule = flow_rule->rule;
 +
 +	down_read(&flowtable->flow_block_lock);
 +	list_for_each_entry(block_cb, block_cb_list, list) {
 +		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
 +				   block_cb->cb_priv);
 +		if (err < 0)
 +			continue;
 +
 +		i++;
 +	}
 +	up_read(&flowtable->flow_block_lock);
 +
 +	if (cmd == FLOW_CLS_STATS)
 +		memcpy(stats, &cls_flow.stats, sizeof(*stats));
 +
 +	return i;
 +}
 +
 +static int flow_offload_tuple_add(struct flow_offload_work *offload,
 +				  struct nf_flow_rule *flow_rule,
 +				  enum flow_offload_tuple_dir dir)
 +{
 +	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
 +				     flow_rule, dir, offload->priority,
 +				     FLOW_CLS_REPLACE, NULL,
 +				     &offload->flowtable->flow_block.cb_list);
 +}
 +
 +static void flow_offload_tuple_del(struct flow_offload_work *offload,
 +				   enum flow_offload_tuple_dir dir)
 +{
 +	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
 +			      offload->priority, FLOW_CLS_DESTROY, NULL,
 +			      &offload->flowtable->flow_block.cb_list);
 +}
 +
 +static int flow_offload_rule_add(struct flow_offload_work *offload,
 +				 struct nf_flow_rule *flow_rule[])
 +{
 +	int ok_count = 0;
 +
 +	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
 +					   FLOW_OFFLOAD_DIR_ORIGINAL);
 +	ok_count += flow_offload_tuple_add(offload, flow_rule[1],
 +					   FLOW_OFFLOAD_DIR_REPLY);
 +	if (ok_count == 0)
 +		return -ENOENT;
 +
 +	return 0;
 +}
 +
 +static void flow_offload_work_add(struct flow_offload_work *offload)
 +{
 +	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
 +	int err;
 +
 +	err = nf_flow_offload_alloc(offload, flow_rule);
 +	if (err < 0)
 +		return;
 +
 +	err = flow_offload_rule_add(offload, flow_rule);
 +	if (err < 0)
 +		goto out;
 +
 +	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 +
 +out:
 +	nf_flow_offload_destroy(flow_rule);
 +}
 +
 +static void flow_offload_work_del(struct flow_offload_work *offload)
 +{
 +	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 +	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
 +	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
 +	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
 +}
 +
 +static void flow_offload_tuple_stats(struct flow_offload_work *offload,
 +				     enum flow_offload_tuple_dir dir,
 +				     struct flow_stats *stats)
 +{
 +	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
 +			      offload->priority, FLOW_CLS_STATS, stats,
 +			      &offload->flowtable->flow_block.cb_list);
 +}
 +
 +static void flow_offload_work_stats(struct flow_offload_work *offload)
 +{
 +	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
 +	u64 lastused;
 +
 +	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
 +	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
 +
 +	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
 +	offload->flow->timeout = max_t(u64, offload->flow->timeout,
 +				       lastused + flow_offload_get_timeout(offload->flow));
 +
 +	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
 +		if (stats[0].pkts)
 +			nf_ct_acct_add(offload->flow->ct,
 +				       FLOW_OFFLOAD_DIR_ORIGINAL,
 +				       stats[0].pkts, stats[0].bytes);
 +		if (stats[1].pkts)
 +			nf_ct_acct_add(offload->flow->ct,
 +				       FLOW_OFFLOAD_DIR_REPLY,
 +				       stats[1].pkts, stats[1].bytes);
 +	}
 +}
 +
 +static void flow_offload_work_handler(struct work_struct *work)
 +{
 +	struct flow_offload_work *offload;
 +
 +	offload = container_of(work, struct flow_offload_work, work);
 +	switch (offload->cmd) {
 +		case FLOW_CLS_REPLACE:
 +			flow_offload_work_add(offload);
 +			break;
 +		case FLOW_CLS_DESTROY:
 +			flow_offload_work_del(offload);
 +			break;
 +		case FLOW_CLS_STATS:
 +			flow_offload_work_stats(offload);
 +			break;
 +		default:
 +			WARN_ON_ONCE(1);
 +	}
 +
 +	clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
 +	kfree(offload);
 +}
 +
 +static void flow_offload_queue_work(struct flow_offload_work *offload)
 +{
 +	if (offload->cmd == FLOW_CLS_REPLACE)
 +		queue_work(nf_flow_offload_add_wq, &offload->work);
 +	else if (offload->cmd == FLOW_CLS_DESTROY)
 +		queue_work(nf_flow_offload_del_wq, &offload->work);
 +	else
 +		queue_work(nf_flow_offload_stats_wq, &offload->work);
 +}
 +
 +static struct flow_offload_work *
 +nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
 +			   struct flow_offload *flow, unsigned int cmd)
 +{
 +	struct flow_offload_work *offload;
 +
 +	if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
 +		return NULL;
 +
 +	offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
 +	if (!offload) {
 +		clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
 +		return NULL;
 +	}
 +
 +	offload->cmd = cmd;
 +	offload->flow = flow;
 +	offload->priority = flowtable->priority;
 +	offload->flowtable = flowtable;
 +	INIT_WORK(&offload->work, flow_offload_work_handler);
 +
 +	return offload;
 +}
 +
 +
 +void nf_flow_offload_add(struct nf_flowtable *flowtable,
 +			 struct flow_offload *flow)
 +{
 +	struct flow_offload_work *offload;
 +
 +	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
 +	if (!offload)
 +		return;
 +
 +	flow_offload_queue_work(offload);
 +}
 +
 +void nf_flow_offload_del(struct nf_flowtable *flowtable,
 +			 struct flow_offload *flow)
 +{
 +	struct flow_offload_work *offload;
 +
 +	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
 +	if (!offload)
 +		return;
 +
 +	set_bit(NF_FLOW_HW_DYING, &flow->flags);
 +	flow_offload_queue_work(offload);
 +}
 +
 +void nf_flow_offload_stats(struct nf_flowtable *flowtable,
 +			   struct flow_offload *flow)
 +{
 +	struct flow_offload_work *offload;
 +	__s32 delta;
 +
 +	delta = nf_flow_timeout_delta(flow->timeout);
 +	if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
 +		return;
 +
 +	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
 +	if (!offload)
 +		return;
 +
 +	flow_offload_queue_work(offload);
 +}
 +
 +void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
 +{
 +	if (nf_flowtable_hw_offload(flowtable)) {
 +		flush_workqueue(nf_flow_offload_add_wq);
 +		flush_workqueue(nf_flow_offload_del_wq);
 +		flush_workqueue(nf_flow_offload_stats_wq);
 +	}
 +}
 +
 +static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
 +				     struct flow_block_offload *bo,
 +				     enum flow_block_command cmd)
 +{
 +	struct flow_block_cb *block_cb, *next;
 +	int err = 0;
 +
 +	switch (cmd) {
 +	case FLOW_BLOCK_BIND:
 +		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
 +		break;
 +	case FLOW_BLOCK_UNBIND:
 +		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
 +			list_del(&block_cb->list);
 +			flow_block_cb_free(block_cb);
 +		}
 +		break;
 +	default:
 +		WARN_ON_ONCE(1);
 +		err = -EOPNOTSUPP;
 +	}
 +
 +	return err;
 +}
 +
 +static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
 +					     struct net *net,
 +					     enum flow_block_command cmd,
 +					     struct nf_flowtable *flowtable,
 +					     struct netlink_ext_ack *extack)
 +{
 +	memset(bo, 0, sizeof(*bo));
 +	bo->net		= net;
 +	bo->block	= &flowtable->flow_block;
 +	bo->command	= cmd;
 +	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 +	bo->extack	= extack;
 +	INIT_LIST_HEAD(&bo->cb_list);
 +}
 +
 +static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
 +					  struct nf_flowtable *flowtable,
 +					  struct net_device *dev,
 +					  enum flow_block_command cmd,
 +					  struct netlink_ext_ack *extack)
 +{
 +	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
 +					 extack);
 +	flow_indr_block_call(dev, bo, cmd);
 +
 +	if (list_empty(&bo->cb_list))
 +		return -EOPNOTSUPP;
 +
 +	return 0;
 +}
 +
 +static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
 +				     struct nf_flowtable *flowtable,
 +				     struct net_device *dev,
 +				     enum flow_block_command cmd,
 +				     struct netlink_ext_ack *extack)
 +{
 +	int err;
 +
 +	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
 +					 extack);
 +	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
 +	if (err < 0)
 +		return err;
 +
 +	return 0;
 +}
 +
 +int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
 +				struct net_device *dev,
 +				enum flow_block_command cmd)
 +{
 +	struct netlink_ext_ack extack = {};
 +	struct flow_block_offload bo;
 +	int err;
 +
 +	if (!nf_flowtable_hw_offload(flowtable))
 +		return 0;
 +
 +	if (dev->netdev_ops->ndo_setup_tc)
 +		err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
 +						&extack);
 +	else
 +		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
 +						     &extack);
 +	if (err < 0)
 +		return err;
 +
 +	return nf_flow_table_block_setup(flowtable, &bo, cmd);
 +}
 +EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
 +
 +int nf_flow_table_offload_init(void)
 +{
 +	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
 +						  WQ_UNBOUND | WQ_SYSFS, 0);
 +	if (!nf_flow_offload_add_wq)
 +		return -ENOMEM;
 +
 +	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
 +						  WQ_UNBOUND | WQ_SYSFS, 0);
 +	if (!nf_flow_offload_del_wq)
 +		goto err_del_wq;
 +
 +	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
 +						    WQ_UNBOUND | WQ_SYSFS, 0);
 +	if (!nf_flow_offload_stats_wq)
 +		goto err_stats_wq;
 +
 +	return 0;
 +
 +err_stats_wq:
 +	destroy_workqueue(nf_flow_offload_del_wq);
 +err_del_wq:
 +	destroy_workqueue(nf_flow_offload_add_wq);
 +	return -ENOMEM;
 +}
 +
 +void nf_flow_table_offload_exit(void)
 +{
 +	destroy_workqueue(nf_flow_offload_add_wq);
 +	destroy_workqueue(nf_flow_offload_del_wq);
 +	destroy_workqueue(nf_flow_offload_stats_wq);
 +}
 diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
 new file mode 100644
 index 000000000..ae1eb2656
 --- /dev/null
 +++ b/net/netfilter/xt_FLOWOFFLOAD.c
 @@ -0,0 +1,719 @@
 +/*
 + * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + */
 +#include <linux/module.h>
 +#include <linux/init.h>
 +#include <linux/netfilter.h>
 +#include <linux/netfilter/xt_FLOWOFFLOAD.h>
 +#include <linux/if_vlan.h>
 +#include <net/ip.h>
 +#include <net/netfilter/nf_conntrack.h>
 +#include <net/netfilter/nf_conntrack_extend.h>
 +#include <net/netfilter/nf_conntrack_helper.h>
 +#include <net/netfilter/nf_flow_table.h>
 +
 +struct xt_flowoffload_hook {
 +	struct hlist_node list;
 +	struct nf_hook_ops ops;
 +	struct net *net;
 +	bool registered;
 +	bool used;
 +};
 +
 +struct xt_flowoffload_table {
 +	struct nf_flowtable ft;
 +	struct hlist_head hooks;
 +	struct delayed_work work;
 +};
 +
 +struct nf_forward_info {
 +	const struct net_device *indev;
 +	const struct net_device *outdev;
 +	const struct net_device *hw_outdev;
 +	struct id {
 +		__u16	id;
 +		__be16	proto;
 +	} encap[NF_FLOW_TABLE_ENCAP_MAX];
 +	u8 num_encaps;
 +	u8 ingress_vlans;
 +	u8 h_source[ETH_ALEN];
 +	u8 h_dest[ETH_ALEN];
 +	enum flow_offload_xmit_type xmit_type;
 +};
 +
 +static DEFINE_SPINLOCK(hooks_lock);
 +
 +struct xt_flowoffload_table flowtable[2];
 +
 +static unsigned int
 +xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
 +			const struct nf_hook_state *state)
 +{
 +	struct vlan_ethhdr *veth;
 +	__be16 proto;
 +
 +	switch (skb->protocol) {
 +	case htons(ETH_P_8021Q):
 +		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 +		proto = veth->h_vlan_encapsulated_proto;
 +		break;
 +	case htons(ETH_P_PPP_SES):
 +		proto = nf_flow_pppoe_proto(skb);
 +		break;
 +	default:
 +		proto = skb->protocol;
 +		break;
 +	}
 +
 +	switch (proto) {
 +	case htons(ETH_P_IP):
 +		return nf_flow_offload_ip_hook(priv, skb, state);
 +	case htons(ETH_P_IPV6):
 +		return nf_flow_offload_ipv6_hook(priv, skb, state);
 +	}
 +
 +	return NF_ACCEPT;
 +}
 +
 +static int
 +xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
 +			   struct net_device *dev)
 +{
 +	struct xt_flowoffload_hook *hook;
 +	struct nf_hook_ops *ops;
 +
 +	hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
 +	if (!hook)
 +		return -ENOMEM;
 +
 +	ops = &hook->ops;
 +	ops->pf = NFPROTO_NETDEV;
 +	ops->hooknum = NF_NETDEV_INGRESS;
 +	ops->priority = 10;
 +	ops->priv = &table->ft;
 +	ops->hook = xt_flowoffload_net_hook;
 +	ops->dev = dev;
 +
 +	hlist_add_head(&hook->list, &table->hooks);
 +	mod_delayed_work(system_power_efficient_wq, &table->work, 0);
 +
 +	return 0;
 +}
 +
 +static struct xt_flowoffload_hook *
 +flow_offload_lookup_hook(struct xt_flowoffload_table *table,
 +			 struct net_device *dev)
 +{
 +	struct xt_flowoffload_hook *hook;
 +
 +	hlist_for_each_entry(hook, &table->hooks, list) {
 +		if (hook->ops.dev == dev)
 +			return hook;
 +	}
 +
 +	return NULL;
 +}
 +
 +static void
 +xt_flowoffload_check_device(struct xt_flowoffload_table *table,
 +			    struct net_device *dev)
 +{
 +	struct xt_flowoffload_hook *hook;
 +
 +	if (!dev)
 +		return;
 +
 +	spin_lock_bh(&hooks_lock);
 +	hook = flow_offload_lookup_hook(table, dev);
 +	if (hook)
 +		hook->used = true;
 +	else
 +		xt_flowoffload_create_hook(table, dev);
 +	spin_unlock_bh(&hooks_lock);
 +}
 +
 +static void
 +xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
 +{
 +	struct xt_flowoffload_hook *hook;
 +
 +restart:
 +	hlist_for_each_entry(hook, &table->hooks, list) {
 +		if (hook->registered)
 +			continue;
 +
 +		hook->registered = true;
 +		hook->net = dev_net(hook->ops.dev);
 +		spin_unlock_bh(&hooks_lock);
 +		nf_register_net_hook(hook->net, &hook->ops);
 +		if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
 +			table->ft.type->setup(&table->ft, hook->ops.dev,
 +					      FLOW_BLOCK_BIND);
 +		spin_lock_bh(&hooks_lock);
 +		goto restart;
 +	}
 +
 +}
 +
 +static bool
 +xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
 +{
 +	struct xt_flowoffload_hook *hook;
 +	bool active = false;
 +
 +restart:
 +	spin_lock_bh(&hooks_lock);
 +	hlist_for_each_entry(hook, &table->hooks, list) {
 +		if (hook->used || !hook->registered) {
 +			active = true;
 +			continue;
 +		}
 +
 +		hlist_del(&hook->list);
 +		spin_unlock_bh(&hooks_lock);
 +		if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
 +			table->ft.type->setup(&table->ft, hook->ops.dev,
 +					      FLOW_BLOCK_UNBIND);
 +		nf_unregister_net_hook(hook->net, &hook->ops);
 +		kfree(hook);
 +		goto restart;
 +	}
 +	spin_unlock_bh(&hooks_lock);
 +
 +	return active;
 +}
 +
 +static void
 +xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
 +{
 +	struct xt_flowoffload_table *table = data;
 +	struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
 +	struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
 +	struct xt_flowoffload_hook *hook;
 +
 +	spin_lock_bh(&hooks_lock);
 +	hlist_for_each_entry(hook, &table->hooks, list) {
 +		if (hook->ops.dev->ifindex != tuple0->iifidx &&
 +		    hook->ops.dev->ifindex != tuple1->iifidx)
 +			continue;
 +
 +		hook->used = true;
 +	}
 +	spin_unlock_bh(&hooks_lock);
 +}
 +
 +static void
 +xt_flowoffload_hook_work(struct work_struct *work)
 +{
 +	struct xt_flowoffload_table *table;
 +	struct xt_flowoffload_hook *hook;
 +	int err;
 +
 +	table = container_of(work, struct xt_flowoffload_table, work.work);
 +
 +	spin_lock_bh(&hooks_lock);
 +	xt_flowoffload_register_hooks(table);
 +	hlist_for_each_entry(hook, &table->hooks, list)
 +		hook->used = false;
 +	spin_unlock_bh(&hooks_lock);
 +
 +	err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
 +				    table);
 +	if (err && err != -EAGAIN)
 +		goto out;
 +
 +	if (!xt_flowoffload_cleanup_hooks(table))
 +		return;
 +
 +out:
 +	queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
 +}
 +
 +static bool
 +xt_flowoffload_skip(struct sk_buff *skb, int family)
 +{
 +	if (skb_sec_path(skb))
 +		return true;
 +
 +	if (family == NFPROTO_IPV4) {
 +		const struct ip_options *opt = &(IPCB(skb)->opt);
 +
 +		if (unlikely(opt->optlen))
 +			return true;
 +	}
 +
 +	return false;
 +}
 +
 +static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
 +{
 +	if (dst_xfrm(dst))
 +		return FLOW_OFFLOAD_XMIT_XFRM;
 +
 +	return FLOW_OFFLOAD_XMIT_NEIGH;
 +}
 +
 +static void nf_default_forward_path(struct nf_flow_route *route,
 +				    struct dst_entry *dst_cache,
 +				    enum ip_conntrack_dir dir,
 +				    struct net_device **dev)
 +{
 +	route->tuple[!dir].in.ifindex	= dst_cache->dev->ifindex;
 +	route->tuple[dir].dst		= dst_cache;
 +	route->tuple[dir].xmit_type	= nf_xmit_type(dst_cache);
 +}
 +
 +static bool nf_is_valid_ether_device(const struct net_device *dev)
 +{
 +	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
 +	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
 +		return false;
 +
 +	return true;
 +}
 +
 +static void nf_dev_path_info(const struct net_device_path_stack *stack,
 +			     struct nf_forward_info *info,
 +			     unsigned char *ha)
 +{
 +	const struct net_device_path *path;
 +	int i;
 +
 +	memcpy(info->h_dest, ha, ETH_ALEN);
 +
 +	for (i = 0; i < stack->num_paths; i++) {
 +		path = &stack->path[i];
 +
 +		info->indev = path->dev;
 +
 +		switch (path->type) {
 +		case DEV_PATH_ETHERNET:
 +		case DEV_PATH_DSA:
 +		case DEV_PATH_VLAN:
 +		case DEV_PATH_PPPOE:
 +			if (is_zero_ether_addr(info->h_source))
 +				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
 +
 +			if (path->type == DEV_PATH_ETHERNET)
 +				break;
 +			if (path->type == DEV_PATH_DSA) {
 +				i = stack->num_paths;
 +				break;
 +			}
 +
 +			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
 +			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
 +				info->indev = NULL;
 +				break;
 +			}
 +			if (!info->outdev)
 +				info->outdev = path->dev;
 +			info->encap[info->num_encaps].id = path->encap.id;
 +			info->encap[info->num_encaps].proto = path->encap.proto;
 +			info->num_encaps++;
 +			if (path->type == DEV_PATH_PPPOE)
 +				memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
 +			break;
 +		case DEV_PATH_BRIDGE:
 +			if (is_zero_ether_addr(info->h_source))
 +				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
 +
 +			switch (path->bridge.vlan_mode) {
 +			case DEV_PATH_BR_VLAN_UNTAG_HW:
 +				info->ingress_vlans |= BIT(info->num_encaps - 1);
 +				break;
 +			case DEV_PATH_BR_VLAN_TAG:
 +				info->encap[info->num_encaps].id = path->bridge.vlan_id;
 +				info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
 +				info->num_encaps++;
 +				break;
 +			case DEV_PATH_BR_VLAN_UNTAG:
 +				info->num_encaps--;
 +				break;
 +			case DEV_PATH_BR_VLAN_KEEP:
 +				break;
 +			}
 +			break;
 +		default:
 +			break;
 +		}
 +	}
 +	if (!info->outdev)
 +		info->outdev = info->indev;
 +
 +	info->hw_outdev = info->indev;
 +
 +	if (nf_is_valid_ether_device(info->indev))
 +		info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
 +}
 +
 +static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
 +				     const struct dst_entry *dst_cache,
 +				     const struct nf_conn *ct,
 +				     enum ip_conntrack_dir dir, u8 *ha,
 +				     struct net_device_path_stack *stack)
 +{
 +	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
 +	struct net_device *dev = dst_cache->dev;
 +	struct neighbour *n;
 +	u8 nud_state;
 +
 +	if (!nf_is_valid_ether_device(dev))
 +		goto out;
 +
 +	n = dst_neigh_lookup(dst_cache, daddr);
 +	if (!n)
 +		return -1;
 +
 +	read_lock_bh(&n->lock);
 +	nud_state = n->nud_state;
 +	ether_addr_copy(ha, n->ha);
 +	read_unlock_bh(&n->lock);
 +	neigh_release(n);
 +
 +	if (!(nud_state & NUD_VALID))
 +		return -1;
 +
 +out:
 +	return dev_fill_forward_path(dev, ha, stack);
 +}
 +
 +static int nf_dev_forward_path(struct nf_flow_route *route,
 +				const struct nf_conn *ct,
 +				enum ip_conntrack_dir dir,
 +				struct net_device **devs)
 +{
 +	const struct dst_entry *dst = route->tuple[dir].dst;
 +	struct net_device_path_stack stack;
 +	struct nf_forward_info info = {};
 +	unsigned char ha[ETH_ALEN];
 +	int i;
 +
 +	if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
 +		nf_dev_path_info(&stack, &info, ha);
 +
 +	devs[!dir] = (struct net_device *)info.indev;
 +	if (!info.indev)
 +		return -1;
 +
 +	route->tuple[!dir].in.ifindex = info.indev->ifindex;
 +	for (i = 0; i < info.num_encaps; i++) {
 +		route->tuple[!dir].in.encap[i].id = info.encap[i].id;
 +		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
 +	}
 +	route->tuple[!dir].in.num_encaps = info.num_encaps;
 +	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
 +
 +	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
 +		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
 +		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
 +		route->tuple[dir].out.ifindex = info.outdev->ifindex;
 +		route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
 +		route->tuple[dir].xmit_type = info.xmit_type;
 +	}
 +
 +	return 0;
 +}
 +
 +static int
 +xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
 +			 enum ip_conntrack_dir dir,
 +			 const struct xt_action_param *par, int ifindex,
 +			 struct net_device **devs)
 +{
 +	struct dst_entry *dst = NULL;
 +	struct flowi fl;
 +
 +	memset(&fl, 0, sizeof(fl));
 +	switch (xt_family(par)) {
 +	case NFPROTO_IPV4:
 +		fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
 +		fl.u.ip4.flowi4_oif = ifindex;
 +		break;
 +	case NFPROTO_IPV6:
 +		fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
 +		fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
 +		fl.u.ip6.flowi6_oif = ifindex;
 +		break;
 +	}
 +
 +	nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
 +	if (!dst)
 +		return -ENOENT;
 +
 +	nf_default_forward_path(route, dst, dir, devs);
 +
 +	return 0;
 +}
 +
 +static int
 +xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
 +		     const struct xt_action_param *par,
 +		     struct nf_flow_route *route, enum ip_conntrack_dir dir,
 +		     struct net_device **devs)
 +{
 +	int ret;
 +
 +	ret = xt_flowoffload_route_dir(route, ct, dir, par,
 +				       devs[dir]->ifindex,
 +				       devs);
 +	if (ret)
 +		return ret;
 +
 +	ret = xt_flowoffload_route_dir(route, ct, !dir, par,
 +				       devs[!dir]->ifindex,
 +				       devs);
 +	if (ret)
 +		return ret;
 +
 +	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH &&
 +	    route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
 +		if (nf_dev_forward_path(route, ct, dir, devs))
 +			return -1;
 +		if (nf_dev_forward_path(route, ct, !dir, devs))
 +			return -1;
 +	}
 +
 +	return 0;
 +}
 +
 +static unsigned int
 +flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
 +{
 +	struct xt_flowoffload_table *table;
 +	const struct xt_flowoffload_target_info *info = par->targinfo;
 +	struct tcphdr _tcph, *tcph = NULL;
 +	enum ip_conntrack_info ctinfo;
 +	enum ip_conntrack_dir dir;
 +	struct nf_flow_route route = {};
 +	struct flow_offload *flow = NULL;
 +	struct net_device *devs[2] = {};
 +	struct nf_conn *ct;
 +	struct net *net;
 +
 +	if (xt_flowoffload_skip(skb, xt_family(par)))
 +		return XT_CONTINUE;
 +
 +	ct = nf_ct_get(skb, &ctinfo);
 +	if (ct == NULL)
 +		return XT_CONTINUE;
 +
 +	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
 +	case IPPROTO_TCP:
 +		if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
 +			return XT_CONTINUE;
 +
 +		tcph = skb_header_pointer(skb, par->thoff,
 +					  sizeof(_tcph), &_tcph);
 +		if (unlikely(!tcph || tcph->fin || tcph->rst))
 +			return XT_CONTINUE;
 +		break;
 +	case IPPROTO_UDP:
 +		break;
 +	default:
 +		return XT_CONTINUE;
 +	}
 +
 +	if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
 +	    ct->status & IPS_SEQ_ADJUST)
 +		return XT_CONTINUE;
 +
 +	if (!nf_ct_is_confirmed(ct))
 +		return XT_CONTINUE;
 +
 +	devs[dir] = xt_out(par);
 +	devs[!dir] = xt_in(par);
 +
 +	if (!devs[dir] || !devs[!dir])
 +		return XT_CONTINUE;
 +
 +	if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
 +		return XT_CONTINUE;
 +
 +	dir = CTINFO2DIR(ctinfo);
 +
 +	if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
 +		goto err_flow_route;
 +
 +	flow = flow_offload_alloc(ct);
 +	if (!flow)
 +		goto err_flow_alloc;
 +
 +	if (flow_offload_route_init(flow, &route) < 0)
 +		goto err_flow_add;
 +
 +	if (tcph) {
 +		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 +		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 +	}
 +
 +	table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
 +
 +	net = read_pnet(&table->ft.net);
 +	if (!net)
 +		write_pnet(&table->ft.net, xt_net(par));
 +
 +	if (flow_offload_add(&table->ft, flow) < 0)
 +		goto err_flow_add;
 +
 +	xt_flowoffload_check_device(table, devs[0]);
 +	xt_flowoffload_check_device(table, devs[1]);
 +
 +	dst_release(route.tuple[!dir].dst);
 +
 +	return XT_CONTINUE;
 +
 +err_flow_add:
 +	flow_offload_free(flow);
 +err_flow_alloc:
 +	dst_release(route.tuple[!dir].dst);
 +err_flow_route:
 +	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
 +
 +	return XT_CONTINUE;
 +}
 +
 +static int flowoffload_chk(const struct xt_tgchk_param *par)
 +{
 +	struct xt_flowoffload_target_info *info = par->targinfo;
 +
 +	if (info->flags & ~XT_FLOWOFFLOAD_MASK)
 +		return -EINVAL;
 +
 +	return 0;
 +}
 +
 +static struct xt_target offload_tg_reg __read_mostly = {
 +	.family		= NFPROTO_UNSPEC,
 +	.name		= "FLOWOFFLOAD",
 +	.revision	= 0,
 +	.targetsize	= sizeof(struct xt_flowoffload_target_info),
 +	.usersize	= sizeof(struct xt_flowoffload_target_info),
 +	.checkentry	= flowoffload_chk,
 +	.target		= flowoffload_tg,
 +	.me		= THIS_MODULE,
 +};
 +
 +static int flow_offload_netdev_event(struct notifier_block *this,
 +				     unsigned long event, void *ptr)
 +{
 +	struct xt_flowoffload_hook *hook0, *hook1;
 +	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 +
 +	if (event != NETDEV_UNREGISTER)
 +		return NOTIFY_DONE;
 +
 +	spin_lock_bh(&hooks_lock);
 +	hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
 +	if (hook0)
 +		hlist_del(&hook0->list);
 +
 +	hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
 +	if (hook1)
 +		hlist_del(&hook1->list);
 +	spin_unlock_bh(&hooks_lock);
 +
 +	if (hook0) {
 +		nf_unregister_net_hook(hook0->net, &hook0->ops);
 +		kfree(hook0);
 +	}
 +
 +	if (hook1) {
 +		nf_unregister_net_hook(hook1->net, &hook1->ops);
 +		kfree(hook1);
 +	}
 +
 +	nf_flow_table_cleanup(dev);
 +
 +	return NOTIFY_DONE;
 +}
 +
 +static struct notifier_block flow_offload_netdev_notifier = {
 +	.notifier_call	= flow_offload_netdev_event,
 +};
 +
 +static int nf_flow_rule_route_inet(struct net *net,
 +				   const struct flow_offload *flow,
 +				   enum flow_offload_tuple_dir dir,
 +				   struct nf_flow_rule *flow_rule)
 +{
 +	const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
 +	int err;
 +
 +	switch (flow_tuple->l3proto) {
 +	case NFPROTO_IPV4:
 +		err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
 +		break;
 +	case NFPROTO_IPV6:
 +		err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
 +		break;
 +	default:
 +		err = -1;
 +		break;
 +	}
 +
 +	return err;
 +}
 +
 +static struct nf_flowtable_type flowtable_inet = {
 +	.family		= NFPROTO_INET,
 +	.init		= nf_flow_table_init,
 +	.setup		= nf_flow_table_offload_setup,
 +	.action		= nf_flow_rule_route_inet,
 +	.free		= nf_flow_table_free,
 +	.hook		= xt_flowoffload_net_hook,
 +	.owner		= THIS_MODULE,
 +};
 +
 +static int init_flowtable(struct xt_flowoffload_table *tbl)
 +{
 +	INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
 +	tbl->ft.type = &flowtable_inet;
 +
 +	return nf_flow_table_init(&tbl->ft);
 +}
 +
 +static int __init xt_flowoffload_tg_init(void)
 +{
 +	int ret;
 +
 +	register_netdevice_notifier(&flow_offload_netdev_notifier);
 +
 +	ret = init_flowtable(&flowtable[0]);
 +	if (ret)
 +		return ret;
 +
 +	ret = init_flowtable(&flowtable[1]);
 +	if (ret)
 +		goto cleanup;
 +
 +	flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
 +
 +	ret = xt_register_target(&offload_tg_reg);
 +	if (ret)
 +		goto cleanup2;
 +
 +	return 0;
 +
 +cleanup2:
 +	nf_flow_table_free(&flowtable[1].ft);
 +cleanup:
 +	nf_flow_table_free(&flowtable[0].ft);
 +	return ret;
 +}
 +
 +static void __exit xt_flowoffload_tg_exit(void)
 +{
 +	xt_unregister_target(&offload_tg_reg);
 +	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
 +	nf_flow_table_free(&flowtable[0].ft);
 +	nf_flow_table_free(&flowtable[1].ft);
 +}
 +
 +MODULE_LICENSE("GPL");
 +module_init(xt_flowoffload_tg_init);
 +module_exit(xt_flowoffload_tg_exit);
 --
 2.18.0