blob: 6b40b56d2a2d556d1d49929a4894fad1e5118ca7 [file] [log] [blame]
From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
From: Bo Jiao <Bo.Jiao@mediatek.com>
Date: Wed, 22 Jun 2022 09:42:19 +0800
Subject: [PATCH 1/8]
9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
---
drivers/net/ethernet/mediatek/Makefile | 3 +-
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +-
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +-
drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++
drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
.../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
.../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++
drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
drivers/net/ppp/ppp_generic.c | 22 +
drivers/net/ppp/pppoe.c | 24 +
include/linux/netdevice.h | 60 +
include/linux/ppp_channel.h | 3 +
include/net/dsa.h | 10 +
include/net/flow_offload.h | 4 +
include/net/ip6_route.h | 5 +-
.../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
include/net/netfilter/nf_conntrack.h | 12 +
include/net/netfilter/nf_conntrack_acct.h | 11 +
include/net/netfilter/nf_flow_table.h | 264 +++-
include/net/netns/conntrack.h | 6 +
.../linux/netfilter/nf_conntrack_common.h | 9 +-
include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
net/8021q/vlan_dev.c | 21 +
net/bridge/br_device.c | 49 +
net/bridge/br_private.h | 20 +
net/bridge/br_vlan.c | 55 +
net/core/dev.c | 46 +
net/dsa/dsa.c | 9 +
net/dsa/slave.c | 41 +-
net/ipv4/netfilter/Kconfig | 4 +-
net/ipv6/ip6_output.c | 2 +-
net/ipv6/netfilter/Kconfig | 3 +-
net/ipv6/route.c | 22 +-
net/netfilter/Kconfig | 14 +-
net/netfilter/Makefile | 4 +-
net/netfilter/nf_conntrack_core.c | 20 +-
net/netfilter/nf_conntrack_proto_tcp.c | 4 +
net/netfilter/nf_conntrack_proto_udp.c | 4 +
net/netfilter/nf_conntrack_standalone.c | 34 +-
net/netfilter/nf_flow_table_core.c | 446 +++---
net/netfilter/nf_flow_table_ip.c | 455 ++++---
net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++
net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++
43 files changed, 4913 insertions(+), 432 deletions(-)
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
create mode 100644 net/netfilter/nf_flow_table_offload.c
create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 13c5b4e8f..0a6af99f1 100755
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -4,5 +4,6 @@
#
obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 2b21f7ed0..819d8a0be 100755
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2654,12 +2654,17 @@ static int mtk_open(struct net_device *dev)
/* we run 2 netdevs on the same dma ring so we only bring it up once */
if (!refcount_read(&eth->dma_refcnt)) {
- int err = mtk_start_dma(eth);
+ u32 gdm_config = MTK_GDMA_TO_PDMA;
+ int err;
+ err = mtk_start_dma(eth);
if (err)
return err;
- mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
+ gdm_config = MTK_GDMA_TO_PPE;
+
+ mtk_gdm_config(eth, gdm_config);
/* Indicates CDM to parse the MTK special tag from CPU */
if (netdev_uses_dsa(dev)) {
@@ -2772,6 +2777,9 @@ static int mtk_stop(struct net_device *dev)
mtk_dma_free(eth);
+ if (eth->soc->offload_version)
+ mtk_ppe_stop(&eth->ppe);
+
return 0;
}
@@ -3391,6 +3399,7 @@ static const struct net_device_ops mtk_netdev_ops = {
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = mtk_poll_controller,
#endif
+ .ndo_setup_tc = mtk_eth_setup_tc,
};
static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
@@ -3682,6 +3691,17 @@ static int mtk_probe(struct platform_device *pdev)
goto err_free_dev;
}
+ if (eth->soc->offload_version) {
+ err = mtk_ppe_init(&eth->ppe, eth->dev,
+ eth->base + MTK_ETH_PPE_BASE, 2);
+ if (err)
+ goto err_free_dev;
+
+ err = mtk_eth_offload_init(eth);
+ if (err)
+ goto err_free_dev;
+ }
+
for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
continue;
@@ -3781,6 +3801,7 @@ static const struct mtk_soc_data mt2701_data = {
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
.has_sram = false,
+ .offload_version = 2,
};
static const struct mtk_soc_data mt7621_data = {
@@ -3789,6 +3810,7 @@ static const struct mtk_soc_data mt7621_data = {
.required_clks = MT7621_CLKS_BITMAP,
.required_pctl = false,
.has_sram = false,
+ .offload_version = 2,
};
static const struct mtk_soc_data mt7622_data = {
@@ -3798,6 +3820,7 @@ static const struct mtk_soc_data mt7622_data = {
.required_clks = MT7622_CLKS_BITMAP,
.required_pctl = false,
.has_sram = false,
+ .offload_version = 2,
};
static const struct mtk_soc_data mt7623_data = {
@@ -3806,6 +3829,7 @@ static const struct mtk_soc_data mt7623_data = {
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
.has_sram = false,
+ .offload_version = 2,
};
static const struct mtk_soc_data mt7629_data = {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index b6380ffeb..349f98503 100755
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,8 @@
#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/phylink.h>
+#include <linux/rhashtable.h>
+#include "mtk_ppe.h"
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
@@ -37,7 +39,8 @@
NETIF_F_HW_VLAN_CTAG_TX | \
NETIF_F_SG | NETIF_F_TSO | \
NETIF_F_TSO6 | \
- NETIF_F_IPV6_CSUM)
+ NETIF_F_IPV6_CSUM |\
+ NETIF_F_HW_TC)
#define MTK_SET_FEATURES (NETIF_F_LRO | \
NETIF_F_HW_VLAN_CTAG_RX)
#define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
@@ -107,6 +110,7 @@
#define MTK_GDMA_TCS_EN BIT(21)
#define MTK_GDMA_UCS_EN BIT(20)
#define MTK_GDMA_TO_PDMA 0x0
+#define MTK_GDMA_TO_PPE 0x4444
#define MTK_GDMA_DROP_ALL 0x7777
/* Unicast Filter MAC Address Register - Low */
@@ -547,6 +551,12 @@
#define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
#define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
+/* QDMA descriptor rxd4 */
+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
+#define MTK_RXD4_ALG GENMASK(31, 22)
+
/* QDMA descriptor rxd4 */
#define RX_DMA_L4_VALID BIT(24)
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
u32 caps;
u32 required_clks;
bool required_pctl;
+ u8 offload_version;
netdev_features_t hw_features;
bool has_sram;
};
@@ -1271,6 +1282,9 @@ struct mtk_eth {
int ip_align;
spinlock_t syscfg0_lock;
struct timer_list mtk_dma_monitor_timer;
+
+ struct mtk_ppe ppe;
+ struct rhashtable flow_table;
};
/* struct mtk_mac - the structure that holds the info about the MACs of the
@@ -1319,4 +1333,8 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
void mtk_gdm_config(struct mtk_eth *eth, u32 config);
void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);
+int mtk_eth_offload_init(struct mtk_eth *eth);
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+
#endif /* MTK_ETH_H */
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
new file mode 100644
index 000000000..66298e223
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include "mtk_ppe.h"
+#include "mtk_ppe_regs.h"
+
+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+ writel(val, ppe->base + reg);
+}
+
+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
+{
+ return readl(ppe->base + reg);
+}
+
+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
+{
+ u32 val;
+
+ val = ppe_r32(ppe, reg);
+ val &= ~mask;
+ val |= set;
+ ppe_w32(ppe, reg, val);
+
+ return val;
+}
+
+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+ return ppe_m32(ppe, reg, 0, val);
+}
+
+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+ return ppe_m32(ppe, reg, val, 0);
+}
+
+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
+{
+ int ret;
+ u32 val;
+
+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
+ !(val & MTK_PPE_GLO_CFG_BUSY),
+ 20, MTK_PPE_WAIT_TIMEOUT_US);
+
+ if (ret)
+ dev_err(ppe->dev, "PPE table busy");
+
+ return ret;
+}
+
+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
+{
+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
+}
+
+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
+{
+ mtk_ppe_cache_clear(ppe);
+
+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
+ enable * MTK_PPE_CACHE_CTL_EN);
+}
+
+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
+{
+ u32 hv1, hv2, hv3;
+ u32 hash;
+
+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
+ case MTK_PPE_PKT_TYPE_BRIDGE:
+ hv1 = e->bridge.src_mac_lo;
+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
+ hv2 = e->bridge.src_mac_hi >> 16;
+ hv2 ^= e->bridge.dest_mac_lo;
+ hv3 = e->bridge.dest_mac_hi;
+ break;
+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+ hv1 = e->ipv4.orig.ports;
+ hv2 = e->ipv4.orig.dest_ip;
+ hv3 = e->ipv4.orig.src_ip;
+ break;
+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
+ hv1 ^= e->ipv6.ports;
+
+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
+ hv2 ^= e->ipv6.dest_ip[0];
+
+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
+ hv3 ^= e->ipv6.src_ip[0];
+ break;
+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
+ default:
+ WARN_ON_ONCE(1);
+ return MTK_PPE_HASH_MASK;
+ }
+
+ hash = (hv1 & hv2) | ((~hv1) & hv3);
+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
+ hash ^= hv1 ^ hv2 ^ hv3;
+ hash ^= hash >> 16;
+ hash <<= 1;
+ hash &= MTK_PPE_ENTRIES - 1;
+
+ return hash;
+}
+
+static inline struct mtk_foe_mac_info *
+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
+{
+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+
+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+ return &entry->ipv6.l2;
+
+ return &entry->ipv4.l2;
+}
+
+static inline u32 *
+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
+{
+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+
+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+ return &entry->ipv6.ib2;
+
+ return &entry->ipv4.ib2;
+}
+
+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
+{
+ struct mtk_foe_mac_info *l2;
+ u32 ports_pad, val;
+
+ memset(entry, 0, sizeof(*entry));
+
+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
+ MTK_FOE_IB1_BIND_TTL |
+ MTK_FOE_IB1_BIND_CACHE;
+ entry->ib1 = val;
+
+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
+
+ if (is_multicast_ether_addr(dest_mac))
+ val |= MTK_FOE_IB2_MULTICAST;
+
+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
+ entry->ipv4.orig.ports = ports_pad;
+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+ entry->ipv6.ports = ports_pad;
+
+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+ entry->ipv6.ib2 = val;
+ l2 = &entry->ipv6.l2;
+ } else {
+ entry->ipv4.ib2 = val;
+ l2 = &entry->ipv4.l2;
+ }
+
+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
+ l2->src_mac_hi = get_unaligned_be32(src_mac);
+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
+
+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+ l2->etype = ETH_P_IPV6;
+ else
+ l2->etype = ETH_P_IP;
+
+ return 0;
+}
+
+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
+{
+ u32 *ib2 = mtk_foe_entry_ib2(entry);
+ u32 val;
+
+ val = *ib2;
+ val &= ~MTK_FOE_IB2_DEST_PORT;
+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
+ *ib2 = val;
+
+ return 0;
+}
+
+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
+ __be32 src_addr, __be16 src_port,
+ __be32 dest_addr, __be16 dest_port)
+{
+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ struct mtk_ipv4_tuple *t;
+
+ switch (type) {
+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+ if (egress) {
+ t = &entry->ipv4.new;
+ break;
+ }
+ fallthrough;
+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+ t = &entry->ipv4.orig;
+ break;
+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
+ return 0;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ t->src_ip = be32_to_cpu(src_addr);
+ t->dest_ip = be32_to_cpu(dest_addr);
+
+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
+ return 0;
+
+ t->src_port = be16_to_cpu(src_port);
+ t->dest_port = be16_to_cpu(dest_port);
+
+ return 0;
+}
+
+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+ __be32 *src_addr, __be16 src_port,
+ __be32 *dest_addr, __be16 dest_port)
+{
+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ u32 *src, *dest;
+ int i;
+
+ switch (type) {
+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+ src = entry->dslite.tunnel_src_ip;
+ dest = entry->dslite.tunnel_dest_ip;
+ break;
+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
+ entry->ipv6.src_port = be16_to_cpu(src_port);
+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
+ fallthrough;
+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+ src = entry->ipv6.src_ip;
+ dest = entry->ipv6.dest_ip;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 4; i++)
+ src[i] = be32_to_cpu(src_addr[i]);
+ for (i = 0; i < 4; i++)
+ dest[i] = be32_to_cpu(dest_addr[i]);
+
+ return 0;
+}
+
+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
+{
+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+ l2->etype = BIT(port);
+
+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+ else
+ l2->etype |= BIT(8);
+
+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
+
+ return 0;
+}
+
+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
+{
+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
+ case 0:
+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+ l2->vlan1 = vid;
+ return 0;
+ case 1:
+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
+ l2->vlan1 = vid;
+ l2->etype |= BIT(8);
+ } else {
+ l2->vlan2 = vid;
+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+ }
+ return 0;
+ default:
+ return -ENOSPC;
+ }
+}
+
+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
+{
+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
+ l2->etype = ETH_P_PPP_SES;
+
+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
+ l2->pppoe_id = sid;
+
+ return 0;
+}
+
+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
+{
+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
+}
+
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+ u16 timestamp)
+{
+ struct mtk_foe_entry *hwe;
+ u32 hash;
+
+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
+
+ hash = mtk_ppe_hash_entry(entry);
+ hwe = &ppe->foe_table[hash];
+ if (!mtk_foe_entry_usable(hwe)) {
+ hwe++;
+ hash++;
+
+ if (!mtk_foe_entry_usable(hwe))
+ return -ENOSPC;
+ }
+
+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
+ wmb();
+ hwe->ib1 = entry->ib1;
+
+ dma_wmb();
+
+ mtk_ppe_cache_clear(ppe);
+
+ return hash;
+}
+
+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+ int version)
+{
+ struct mtk_foe_entry *foe;
+
+ /* need to allocate a separate device, since it PPE DMA access is
+ * not coherent.
+ */
+ ppe->base = base;
+ ppe->dev = dev;
+ ppe->version = version;
+
+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
+ &ppe->foe_phys, GFP_KERNEL);
+ if (!foe)
+ return -ENOMEM;
+
+ ppe->foe_table = foe;
+
+ mtk_ppe_debugfs_init(ppe);
+
+ return 0;
+}
+
+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
+{
+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
+ int i, k;
+
+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
+
+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
+ return;
+
+ /* skip all entries that cross the 1024 byte boundary */
+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
+ for (k = 0; k < ARRAY_SIZE(skip); k++)
+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
+}
+
+int mtk_ppe_start(struct mtk_ppe *ppe)
+{
+ u32 val;
+
+ mtk_ppe_init_foe_table(ppe);
+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
+
+ val = MTK_PPE_TB_CFG_ENTRY_80B |
+ MTK_PPE_TB_CFG_AGE_NON_L4 |
+ MTK_PPE_TB_CFG_AGE_UNBIND |
+ MTK_PPE_TB_CFG_AGE_TCP |
+ MTK_PPE_TB_CFG_AGE_UDP |
+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
+ MTK_PPE_KEEPALIVE_DISABLE) |
+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
+ MTK_PPE_ENTRIES_SHIFT);
+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
+
+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
+
+ mtk_ppe_cache_enable(ppe, true);
+
+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
+ MTK_PPE_FLOW_CFG_IP6_6RD |
+ MTK_PPE_FLOW_CFG_IP4_NAT |
+ MTK_PPE_FLOW_CFG_IP4_NAPT |
+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
+
+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
+
+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
+
+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
+
+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
+
+ val = MTK_PPE_BIND_LIMIT1_FULL |
+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
+
+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
+
+ /* enable PPE */
+ val = MTK_PPE_GLO_CFG_EN |
+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
+
+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
+
+ return 0;
+}
+
+int mtk_ppe_stop(struct mtk_ppe *ppe)
+{
+ u32 val;
+ int i;
+
+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
+ MTK_FOE_STATE_INVALID);
+
+ mtk_ppe_cache_enable(ppe, false);
+
+ /* disable offload engine */
+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
+
+ /* disable aging */
+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
+ MTK_PPE_TB_CFG_AGE_UNBIND |
+ MTK_PPE_TB_CFG_AGE_TCP |
+ MTK_PPE_TB_CFG_AGE_UDP |
+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
+
+ return mtk_ppe_wait_busy(ppe);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
new file mode 100644
index 000000000..242fb8f2a
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_PPE_H
+#define __MTK_PPE_H
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+
+#define MTK_ETH_PPE_BASE 0xc00
+
+#define MTK_PPE_ENTRIES_SHIFT 3
+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
+
+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
+
+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
+#define MTK_FOE_IB1_BIND_TTL BIT(24)
+
+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
+#define MTK_FOE_IB1_UDP BIT(30)
+#define MTK_FOE_IB1_STATIC BIT(31)
+
+enum {
+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
+};
+
+#define MTK_FOE_IB2_QID GENMASK(3, 0)
+#define MTK_FOE_IB2_PSE_QOS BIT(4)
+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
+#define MTK_FOE_IB2_MULTICAST BIT(8)
+
+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
+
+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
+
+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
+
+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
+
+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
+
+enum {
+ MTK_FOE_STATE_INVALID,
+ MTK_FOE_STATE_UNBIND,
+ MTK_FOE_STATE_BIND,
+ MTK_FOE_STATE_FIN
+};
+
+struct mtk_foe_mac_info {
+ u16 vlan1;
+ u16 etype;
+
+ u32 dest_mac_hi;
+
+ u16 vlan2;
+ u16 dest_mac_lo;
+
+ u32 src_mac_hi;
+
+ u16 pppoe_id;
+ u16 src_mac_lo;
+};
+
+struct mtk_foe_bridge {
+ u32 dest_mac_hi;
+
+ u16 src_mac_lo;
+ u16 dest_mac_lo;
+
+ u32 src_mac_hi;
+
+ u32 ib2;
+
+ u32 _rsv[5];
+
+ u32 udf_tsid;
+ struct mtk_foe_mac_info l2;
+};
+
+struct mtk_ipv4_tuple {
+ u32 src_ip;
+ u32 dest_ip;
+ union {
+ struct {
+ u16 dest_port;
+ u16 src_port;
+ };
+ struct {
+ u8 protocol;
+ u8 _pad[3]; /* fill with 0xa5a5a5 */
+ };
+ u32 ports;
+ };
+};
+
+struct mtk_foe_ipv4 {
+ struct mtk_ipv4_tuple orig;
+
+ u32 ib2;
+
+ struct mtk_ipv4_tuple new;
+
+ u16 timestamp;
+ u16 _rsv0[3];
+
+ u32 udf_tsid;
+
+ struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv4_dslite {
+ struct mtk_ipv4_tuple ip4;
+
+ u32 tunnel_src_ip[4];
+ u32 tunnel_dest_ip[4];
+
+ u8 flow_label[3];
+ u8 priority;
+
+ u32 udf_tsid;
+
+ u32 ib2;
+
+ struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv6 {
+ u32 src_ip[4];
+ u32 dest_ip[4];
+
+ union {
+ struct {
+ u8 protocol;
+ u8 _pad[3]; /* fill with 0xa5a5a5 */
+ }; /* 3-tuple */
+ struct {
+ u16 dest_port;
+ u16 src_port;
+ }; /* 5-tuple */
+ u32 ports;
+ };
+
+ u32 _rsv[3];
+
+ u32 udf;
+
+ u32 ib2;
+ struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv6_6rd {
+ u32 src_ip[4];
+ u32 dest_ip[4];
+ u16 dest_port;
+ u16 src_port;
+
+ u32 tunnel_src_ip;
+ u32 tunnel_dest_ip;
+
+ u16 hdr_csum;
+ u8 dscp;
+ u8 ttl;
+
+ u8 flag;
+ u8 pad;
+ u8 per_flow_6rd_id;
+ u8 pad2;
+
+ u32 ib2;
+ struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_entry {
+ u32 ib1;
+
+ union {
+ struct mtk_foe_bridge bridge;
+ struct mtk_foe_ipv4 ipv4;
+ struct mtk_foe_ipv4_dslite dslite;
+ struct mtk_foe_ipv6 ipv6;
+ struct mtk_foe_ipv6_6rd ipv6_6rd;
+ u32 data[19];
+ };
+};
+
+enum {
+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
+};
+
+struct mtk_ppe {
+ struct device *dev;
+ void __iomem *base;
+ int version;
+
+ struct mtk_foe_entry *foe_table;
+ dma_addr_t foe_phys;
+
+ void *acct_table;
+};
+
+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+ int version);
+int mtk_ppe_start(struct mtk_ppe *ppe);
+int mtk_ppe_stop(struct mtk_ppe *ppe);
+
+static inline void
+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
+{
+ ppe->foe_table[hash].ib1 = 0;
+ dma_wmb();
+}
+
+static inline int
+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
+{
+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
+
+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
+ return -1;
+
+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
+}
+
+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
+ __be32 src_addr, __be16 src_port,
+ __be32 dest_addr, __be16 dest_port);
+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+ __be32 *src_addr, __be16 src_port,
+ __be32 *dest_addr, __be16 dest_port);
+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+ u16 timestamp);
+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
+
+#endif
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
new file mode 100644
index 000000000..d4b482340
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include "mtk_eth_soc.h"
+
+struct mtk_flow_addr_info
+{
+ void *src, *dest;
+ u16 *src_port, *dest_port;
+ bool ipv6;
+};
+
+static const char *mtk_foe_entry_state_str(int state)
+{
+ static const char * const state_str[] = {
+ [MTK_FOE_STATE_INVALID] = "INV",
+ [MTK_FOE_STATE_UNBIND] = "UNB",
+ [MTK_FOE_STATE_BIND] = "BND",
+ [MTK_FOE_STATE_FIN] = "FIN",
+ };
+
+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
+ return "UNK";
+
+ return state_str[state];
+}
+
+static const char *mtk_foe_pkt_type_str(int type)
+{
+ static const char * const type_str[] = {
+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
+ };
+
+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
+ return "UNKNOWN";
+
+ return type_str[type];
+}
+
+static void
+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
+{
+ u32 n_addr[4];
+ int i;
+
+ if (!ipv6) {
+ seq_printf(m, "%pI4h", addr);
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
+ n_addr[i] = htonl(addr[i]);
+ seq_printf(m, "%pI6", n_addr);
+}
+
+static void
+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
+{
+ mtk_print_addr(m, ai->src, ai->ipv6);
+ if (ai->src_port)
+ seq_printf(m, ":%d", *ai->src_port);
+ seq_printf(m, "->");
+ mtk_print_addr(m, ai->dest, ai->ipv6);
+ if (ai->dest_port)
+ seq_printf(m, ":%d", *ai->dest_port);
+}
+
+static int
+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
+{
+ struct mtk_ppe *ppe = m->private;
+ int i;
+
+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
+ struct mtk_foe_mac_info *l2;
+ struct mtk_flow_addr_info ai = {};
+ unsigned char h_source[ETH_ALEN];
+ unsigned char h_dest[ETH_ALEN];
+ int type, state;
+ u32 ib2;
+
+
+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
+ if (!state)
+ continue;
+
+ if (bind && state != MTK_FOE_STATE_BIND)
+ continue;
+
+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ seq_printf(m, "%05x %s %7s", i,
+ mtk_foe_entry_state_str(state),
+ mtk_foe_pkt_type_str(type));
+
+ switch (type) {
+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+ ai.src_port = &entry->ipv4.orig.src_port;
+ ai.dest_port = &entry->ipv4.orig.dest_port;
+ fallthrough;
+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+ ai.src = &entry->ipv4.orig.src_ip;
+ ai.dest = &entry->ipv4.orig.dest_ip;
+ break;
+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+ ai.src_port = &entry->ipv6.src_port;
+ ai.dest_port = &entry->ipv6.dest_port;
+ fallthrough;
+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
+ ai.src = &entry->ipv6.src_ip;
+ ai.dest = &entry->ipv6.dest_ip;
+ ai.ipv6 = true;
+ break;
+ }
+
+ seq_printf(m, " orig=");
+ mtk_print_addr_info(m, &ai);
+
+ switch (type) {
+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+ ai.src_port = &entry->ipv4.new.src_port;
+ ai.dest_port = &entry->ipv4.new.dest_port;
+ fallthrough;
+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+ ai.src = &entry->ipv4.new.src_ip;
+ ai.dest = &entry->ipv4.new.dest_ip;
+ seq_printf(m, " new=");
+ mtk_print_addr_info(m, &ai);
+ break;
+ }
+
+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+ l2 = &entry->ipv6.l2;
+ ib2 = entry->ipv6.ib2;
+ } else {
+ l2 = &entry->ipv4.l2;
+ ib2 = entry->ipv4.ib2;
+ }
+
+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
+
+ seq_printf(m, " eth=%pM->%pM etype=%04x"
+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
+ h_source, h_dest, ntohs(l2->etype),
+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
+ }
+
+ return 0;
+}
+
+static int
+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
+{
+ return mtk_ppe_debugfs_foe_show(m, private, false);
+}
+
+static int
+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
+{
+ return mtk_ppe_debugfs_foe_show(m, private, true);
+}
+
+static int
+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
+{
+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
+ inode->i_private);
+}
+
+static int
+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
+{
+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
+ inode->i_private);
+}
+
+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
+{
+ static const struct file_operations fops_all = {
+ .open = mtk_ppe_debugfs_foe_open_all,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ };
+
+ static const struct file_operations fops_bind = {
+ .open = mtk_ppe_debugfs_foe_open_bind,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ };
+
+ struct dentry *root;
+
+ root = debugfs_create_dir("mtk_ppe", NULL);
+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
new file mode 100644
index 000000000..4294f0c74
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -0,0 +1,526 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
+ */
+
+#include <linux/if_ether.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/flow_offload.h>
+#include <net/pkt_cls.h>
+#include <net/dsa.h>
+#include "mtk_eth_soc.h"
+
+struct mtk_flow_data {
+ struct ethhdr eth;
+
+ union {
+ struct {
+ __be32 src_addr;
+ __be32 dst_addr;
+ } v4;
+
+ struct {
+ struct in6_addr src_addr;
+ struct in6_addr dst_addr;
+ } v6;
+ };
+
+ __be16 src_port;
+ __be16 dst_port;
+
+ struct {
+ u16 id;
+ __be16 proto;
+ u8 num;
+ } vlan;
+ struct {
+ u16 sid;
+ u8 num;
+ } pppoe;
+};
+
+struct mtk_flow_entry {
+ struct rhash_head node;
+ unsigned long cookie;
+ u16 hash;
+};
+
+static const struct rhashtable_params mtk_flow_ht_params = {
+ .head_offset = offsetof(struct mtk_flow_entry, node),
+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
+ .key_len = sizeof(unsigned long),
+ .automatic_shrinking = true,
+};
+
+static u32
+mtk_eth_timestamp(struct mtk_eth *eth)
+{
+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
+}
+
+static int
+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
+ bool egress)
+{
+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
+ data->v4.src_addr, data->src_port,
+ data->v4.dst_addr, data->dst_port);
+}
+
+static int
+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
+{
+ return mtk_foe_entry_set_ipv6_tuple(foe,
+ data->v6.src_addr.s6_addr32, data->src_port,
+ data->v6.dst_addr.s6_addr32, data->dst_port);
+}
+
+static void
+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
+{
+ void *dest = eth + act->mangle.offset;
+ const void *src = &act->mangle.val;
+
+ if (act->mangle.offset > 8)
+ return;
+
+ if (act->mangle.mask == 0xffff) {
+ src += 2;
+ dest += 2;
+ }
+
+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
+}
+
+
+static int
+mtk_flow_mangle_ports(const struct flow_action_entry *act,
+ struct mtk_flow_data *data)
+{
+ u32 val = ntohl(act->mangle.val);
+
+ switch (act->mangle.offset) {
+ case 0:
+ if (act->mangle.mask == ~htonl(0xffff))
+ data->dst_port = cpu_to_be16(val);
+ else
+ data->src_port = cpu_to_be16(val >> 16);
+ break;
+ case 2:
+ data->dst_port = cpu_to_be16(val);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
+ struct mtk_flow_data *data)
+{
+ __be32 *dest;
+
+ switch (act->mangle.offset) {
+ case offsetof(struct iphdr, saddr):
+ dest = &data->v4.src_addr;
+ break;
+ case offsetof(struct iphdr, daddr):
+ dest = &data->v4.dst_addr;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(dest, &act->mangle.val, sizeof(u32));
+
+ return 0;
+}
+
+static int
+mtk_flow_get_dsa_port(struct net_device **dev)
+{
+#if IS_ENABLED(CONFIG_NET_DSA)
+ struct dsa_port *dp;
+
+ dp = dsa_port_from_netdev(*dev);
+ if (IS_ERR(dp))
+ return -ENODEV;
+
+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
+ return -ENODEV;
+
+ *dev = dp->cpu_dp->master;
+
+ return dp->index;
+#else
+ return -ENODEV;
+#endif
+}
+
+static int
+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+ struct net_device *dev)
+{
+ int pse_port, dsa_port;
+
+ dsa_port = mtk_flow_get_dsa_port(&dev);
+ if (dsa_port >= 0)
+ mtk_foe_entry_set_dsa(foe, dsa_port);
+
+ if (dev == eth->netdev[0])
+ pse_port = 1;
+ else if (dev == eth->netdev[1])
+ pse_port = 2;
+ else
+ return -EOPNOTSUPP;
+
+ mtk_foe_entry_set_pse_port(foe, pse_port);
+
+ return 0;
+}
+
+static int
+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_action_entry *act;
+ struct mtk_flow_data data = {};
+ struct mtk_foe_entry foe;
+ struct net_device *odev = NULL;
+ struct mtk_flow_entry *entry;
+ int offload_type = 0;
+ u16 addr_type = 0;
+ u32 timestamp;
+ u8 l4proto = 0;
+ int err = 0;
+ int hash;
+ int i;
+
+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
+ return -EEXIST;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+ l4proto = match.key->ip_proto;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ flow_action_for_each(i, act, &rule->action) {
+ switch (act->id) {
+ case FLOW_ACTION_MANGLE:
+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
+ mtk_flow_offload_mangle_eth(act, &data.eth);
+ break;
+ case FLOW_ACTION_REDIRECT:
+ odev = act->dev;
+ break;
+ case FLOW_ACTION_CSUM:
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ if (data.vlan.num == 1 ||
+ act->vlan.proto != htons(ETH_P_8021Q))
+ return -EOPNOTSUPP;
+
+ data.vlan.id = act->vlan.vid;
+ data.vlan.proto = act->vlan.proto;
+ data.vlan.num++;
+ break;
+ case FLOW_ACTION_VLAN_POP:
+ break;
+ case FLOW_ACTION_PPPOE_PUSH:
+ if (data.pppoe.num == 1)
+ return -EOPNOTSUPP;
+
+ data.pppoe.sid = act->pppoe.sid;
+ data.pppoe.num++;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ switch (addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
+ break;
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!is_valid_ether_addr(data.eth.h_source) ||
+ !is_valid_ether_addr(data.eth.h_dest))
+ return -EINVAL;
+
+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
+ data.eth.h_source,
+ data.eth.h_dest);
+ if (err)
+ return err;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports ports;
+
+ flow_rule_match_ports(rule, &ports);
+ data.src_port = ports.key->src;
+ data.dst_port = ports.key->dst;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs addrs;
+
+ flow_rule_match_ipv4_addrs(rule, &addrs);
+
+ data.v4.src_addr = addrs.key->src;
+ data.v4.dst_addr = addrs.key->dst;
+
+ mtk_flow_set_ipv4_addr(&foe, &data, false);
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs addrs;
+
+ flow_rule_match_ipv6_addrs(rule, &addrs);
+
+ data.v6.src_addr = addrs.key->src;
+ data.v6.dst_addr = addrs.key->dst;
+
+ mtk_flow_set_ipv6_addr(&foe, &data);
+ }
+
+ flow_action_for_each(i, act, &rule->action) {
+ if (act->id != FLOW_ACTION_MANGLE)
+ continue;
+
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ err = mtk_flow_mangle_ports(act, &data);
+ break;
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ err = mtk_flow_mangle_ipv4(act, &data);
+ break;
+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+ /* handled earlier */
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (err)
+ return err;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
+ if (err)
+ return err;
+ }
+
+ if (data.vlan.num == 1) {
+ if (data.vlan.proto != htons(ETH_P_8021Q))
+ return -EOPNOTSUPP;
+
+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
+ }
+ if (data.pppoe.num == 1)
+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
+
+ err = mtk_flow_set_output_device(eth, &foe, odev);
+ if (err)
+ return err;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->cookie = f->cookie;
+ timestamp = mtk_eth_timestamp(eth);
+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
+ if (hash < 0) {
+ err = hash;
+ goto free;
+ }
+
+ entry->hash = hash;
+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
+ mtk_flow_ht_params);
+ if (err < 0)
+ goto clear_flow;
+
+ return 0;
+clear_flow:
+ mtk_foe_entry_clear(&eth->ppe, hash);
+free:
+ kfree(entry);
+ return err;
+}
+
+static int
+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+ struct mtk_flow_entry *entry;
+
+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+ mtk_flow_ht_params);
+ if (!entry)
+ return -ENOENT;
+
+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
+ mtk_flow_ht_params);
+ kfree(entry);
+
+ return 0;
+}
+
+static int
+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+ struct mtk_flow_entry *entry;
+ int timestamp;
+ u32 idle;
+
+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+ mtk_flow_ht_params);
+ if (!entry)
+ return -ENOENT;
+
+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
+ if (timestamp < 0)
+ return -ETIMEDOUT;
+
+ idle = mtk_eth_timestamp(eth) - timestamp;
+ f->stats.lastused = jiffies - idle * HZ;
+
+ return 0;
+}
+
+static DEFINE_MUTEX(mtk_flow_offload_mutex);
+
+static int
+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ struct flow_cls_offload *cls = type_data;
+ struct net_device *dev = cb_priv;
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ int err;
+
+ if (!tc_can_offload(dev))
+ return -EOPNOTSUPP;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&mtk_flow_offload_mutex);
+ switch (cls->command) {
+ case FLOW_CLS_REPLACE:
+ err = mtk_flow_offload_replace(eth, cls);
+ break;
+ case FLOW_CLS_DESTROY:
+ err = mtk_flow_offload_destroy(eth, cls);
+ break;
+ case FLOW_CLS_STATS:
+ err = mtk_flow_offload_stats(eth, cls);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+ mutex_unlock(&mtk_flow_offload_mutex);
+
+ return err;
+}
+
+static int
+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ static LIST_HEAD(block_cb_list);
+ struct flow_block_cb *block_cb;
+ flow_setup_cb_t *cb;
+
+ if (!eth->ppe.foe_table)
+ return -EOPNOTSUPP;
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ cb = mtk_eth_setup_tc_block_cb;
+ f->driver_block_list = &block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
+ if (block_cb) {
+ flow_block_cb_incref(block_cb);
+ return 0;
+ }
+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
+ if (IS_ERR(block_cb))
+ return PTR_ERR(block_cb);
+
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &block_cb_list);
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
+ if (!block_cb)
+ return -ENOENT;
+
+ if (flow_block_cb_decref(block_cb)) {
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ }
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ if (type == TC_SETUP_FT)
+ return mtk_eth_setup_tc_block(dev, type_data);
+
+ return -EOPNOTSUPP;
+}
+
+int mtk_eth_offload_init(struct mtk_eth *eth)
+{
+ if (!eth->ppe.foe_table)
+ return 0;
+
+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
new file mode 100644
index 000000000..0c45ea090
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_PPE_REGS_H
+#define __MTK_PPE_REGS_H
+
+#define MTK_PPE_GLO_CFG 0x200
+#define MTK_PPE_GLO_CFG_EN BIT(0)
+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
+
+#define MTK_PPE_FLOW_CFG 0x204
+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
+
+#define MTK_PPE_IP_PROTO_CHK 0x208
+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
+
+#define MTK_PPE_TB_CFG 0x21c
+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
+
+enum {
+ MTK_PPE_SCAN_MODE_DISABLED,
+ MTK_PPE_SCAN_MODE_CHECK_AGE,
+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
+};
+
+enum {
+ MTK_PPE_KEEPALIVE_DISABLE,
+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
+};
+
+enum {
+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
+};
+
+#define MTK_PPE_TB_BASE 0x220
+
+#define MTK_PPE_TB_USED 0x224
+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
+
+#define MTK_PPE_BIND_RATE 0x228
+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
+
+#define MTK_PPE_BIND_LIMIT0 0x22c
+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
+
+#define MTK_PPE_BIND_LIMIT1 0x230
+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
+
+#define MTK_PPE_KEEPALIVE 0x234
+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
+
+#define MTK_PPE_UNBIND_AGE 0x238
+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
+
+#define MTK_PPE_BIND_AGE0 0x23c
+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
+
+#define MTK_PPE_BIND_AGE1 0x240
+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
+
+#define MTK_PPE_HASH_SEED 0x244
+
+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
+
+#define MTK_PPE_MTU_DROP 0x308
+
+#define MTK_PPE_VLAN_MTU0 0x30c
+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
+
+#define MTK_PPE_VLAN_MTU1 0x310
+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
+
+#define MTK_PPE_VPM_TPID 0x318
+
+#define MTK_PPE_CACHE_CTL 0x320
+#define MTK_PPE_CACHE_CTL_EN BIT(0)
+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
+
+#define MTK_PPE_MIB_CFG 0x334
+#define MTK_PPE_MIB_CFG_EN BIT(0)
+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
+
+#define MTK_PPE_MIB_TB_BASE 0x338
+
+#define MTK_PPE_MIB_CACHE_CTL 0x350
+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
+
+#endif
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index a085213dc..813e30495 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
ppp_destroy_interface(ppp);
}
+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct ppp *ppp = netdev_priv(ctx->dev);
+ struct ppp_channel *chan;
+ struct channel *pch;
+
+ if (ppp->flags & SC_MULTILINK)
+ return -EOPNOTSUPP;
+
+ if (list_empty(&ppp->channels))
+ return -ENODEV;
+
+ pch = list_first_entry(&ppp->channels, struct channel, clist);
+ chan = pch->chan;
+ if (!chan->ops->fill_forward_path)
+ return -EOPNOTSUPP;
+
+ return chan->ops->fill_forward_path(ctx, path, chan);
+}
+
static const struct net_device_ops ppp_netdev_ops = {
.ndo_init = ppp_dev_init,
.ndo_uninit = ppp_dev_uninit,
.ndo_start_xmit = ppp_start_xmit,
.ndo_do_ioctl = ppp_net_ioctl,
.ndo_get_stats64 = ppp_get_stats64,
+ .ndo_fill_forward_path = ppp_fill_forward_path,
};
static struct device_type ppp_type = {
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 087b01684..7a8c246ab 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
return __pppoe_xmit(sk, skb);
}
+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path,
+ const struct ppp_channel *chan)
+{
+ struct sock *sk = (struct sock *)chan->private;
+ struct pppox_sock *po = pppox_sk(sk);
+ struct net_device *dev = po->pppoe_dev;
+
+ if (sock_flag(sk, SOCK_DEAD) ||
+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
+ return -1;
+
+ path->type = DEV_PATH_PPPOE;
+ path->encap.proto = htons(ETH_P_PPP_SES);
+ path->encap.id = be16_to_cpu(po->num);
+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
+ path->dev = ctx->dev;
+ ctx->dev = dev;
+
+ return 0;
+}
+
static const struct ppp_channel_ops pppoe_chan_ops = {
.start_xmit = pppoe_xmit,
+ .fill_forward_path = pppoe_fill_forward_path,
};
static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 38af42bf8..9f64504ac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
struct sk_buff *skb,
struct net_device *sb_dev);
+enum net_device_path_type {
+ DEV_PATH_ETHERNET = 0,
+ DEV_PATH_VLAN,
+ DEV_PATH_BRIDGE,
+ DEV_PATH_PPPOE,
+ DEV_PATH_DSA,
+};
+
+struct net_device_path {
+ enum net_device_path_type type;
+ const struct net_device *dev;
+ union {
+ struct {
+ u16 id;
+ __be16 proto;
+ u8 h_dest[ETH_ALEN];
+ } encap;
+ struct {
+ enum {
+ DEV_PATH_BR_VLAN_KEEP,
+ DEV_PATH_BR_VLAN_TAG,
+ DEV_PATH_BR_VLAN_UNTAG,
+ DEV_PATH_BR_VLAN_UNTAG_HW,
+ } vlan_mode;
+ u16 vlan_id;
+ __be16 vlan_proto;
+ } bridge;
+ struct {
+ int port;
+ u16 proto;
+ } dsa;
+ };
+};
+
+#define NET_DEVICE_PATH_STACK_MAX 5
+#define NET_DEVICE_PATH_VLAN_MAX 2
+
+struct net_device_path_stack {
+ int num_paths;
+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
+};
+
+struct net_device_path_ctx {
+ const struct net_device *dev;
+ u8 daddr[ETH_ALEN];
+
+ int num_vlans;
+ struct {
+ u16 id;
+ __be16 proto;
+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
+};
+
enum tc_setup_type {
TC_SETUP_QDISC_MQPRIO,
TC_SETUP_CLSU32,
@@ -844,6 +897,7 @@ enum tc_setup_type {
TC_SETUP_ROOT_QDISC,
TC_SETUP_QDISC_GRED,
TC_SETUP_QDISC_TAPRIO,
+ TC_SETUP_FT,
};
/* These structures hold the attributes of bpf state that are being passed
@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
* Get devlink port instance associated with a given netdev.
* Called with a reference on the netdevice and devlink locks only,
* rtnl_lock is not held.
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
+ * Get the forwarding path to reach the real device from the HW destination address
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1436,6 +1492,8 @@ struct net_device_ops {
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
+ struct net_device_path *path);
};
/**
@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
int dev_get_iflink(const struct net_device *dev);
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+ struct net_device_path_stack *stack);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 98966064e..91f9a9283 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -28,6 +28,9 @@ struct ppp_channel_ops {
int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
/* Handle an ioctl call that has come in via /dev/ppp. */
int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
+ int (*fill_forward_path)(struct net_device_path_ctx *,
+ struct net_device_path *,
+ const struct ppp_channel *);
};
struct ppp_channel {
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 05f66d487..cafc74218 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -561,6 +561,8 @@ struct dsa_switch_ops {
struct sk_buff *skb);
};
+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
+
struct dsa_switch_driver {
struct list_head list;
const struct dsa_switch_ops *ops;
@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
#define BRCM_TAG_GET_PORT(v) ((v) >> 8)
#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+#if IS_ENABLED(CONFIG_NET_DSA)
+bool dsa_slave_dev_check(const struct net_device *dev);
+#else
+static inline bool dsa_slave_dev_check(const struct net_device *dev)
+{
+ return false;
+}
+#endif
netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index c6f7bd22d..59b873653 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -138,6 +138,7 @@ enum flow_action_id {
FLOW_ACTION_MPLS_PUSH,
FLOW_ACTION_MPLS_POP,
FLOW_ACTION_MPLS_MANGLE,
+ FLOW_ACTION_PPPOE_PUSH,
NUM_FLOW_ACTIONS,
};
@@ -213,6 +214,9 @@ struct flow_action_entry {
u8 bos;
u8 ttl;
} mpls_mangle;
+ struct { /* FLOW_ACTION_PPPOE_PUSH */
+ u16 sid;
+ } pppoe;
};
};
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2c739fc75..89ab8f180 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
}
-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
+ bool forwarding)
{
struct inet6_dev *idev;
unsigned int mtu;
- if (dst_metric_locked(dst, RTAX_MTU)) {
+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 7b3c873f8..e95483192 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,7 +4,4 @@
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-#include <linux/sysctl.h>
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
-
#endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 90690e37a..ce0bc3e62 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
!nf_ct_is_dying(ct);
}
+#define NF_CT_DAY (86400 * HZ)
+
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+}
+
struct kernel_param;
int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index f7a060c6e..7f44a7715 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
#endif
}
+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
+ unsigned int bytes);
+
+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
+ unsigned int bytes)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_ct_acct_add(ct, dir, 1, bytes);
+#endif
+}
+
void nf_conntrack_acct_pernet_init(struct net *net);
int nf_conntrack_acct_init(void);
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 68d7fc92..7cf89767 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -8,31 +8,99 @@
#include <linux/rcupdate.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/flow_offload.h>
#include <net/dst.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
struct nf_flowtable;
+struct nf_flow_rule;
+struct flow_offload;
+enum flow_offload_tuple_dir;
+
+struct nf_flow_key {
+ struct flow_dissector_key_meta meta;
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_control enc_control;
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_vlan vlan;
+ struct flow_dissector_key_vlan cvlan;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_keyid enc_key_id;
+ union {
+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
+ };
+ struct flow_dissector_key_tcp tcp;
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct nf_flow_match {
+ struct flow_dissector dissector;
+ struct nf_flow_key key;
+ struct nf_flow_key mask;
+};
+
+struct nf_flow_rule {
+ struct nf_flow_match match;
+ struct flow_rule *rule;
+};
struct nf_flowtable_type {
struct list_head list;
int family;
int (*init)(struct nf_flowtable *ft);
+ int (*setup)(struct nf_flowtable *ft,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+ int (*action)(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule);
void (*free)(struct nf_flowtable *ft);
nf_hookfn *hook;
struct module *owner;
};
+enum nf_flowtable_flags {
+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
+};
+
struct nf_flowtable {
struct list_head list;
struct rhashtable rhashtable;
+ int priority;
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
+ unsigned int flags;
+ struct flow_block flow_block;
+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
+ possible_net_t net;
};
+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
+{
+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
+}
+
enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
+
+enum flow_offload_xmit_type {
+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
+ FLOW_OFFLOAD_XMIT_NEIGH,
+ FLOW_OFFLOAD_XMIT_XFRM,
+ FLOW_OFFLOAD_XMIT_DIRECT,
+};
+
+#define NF_FLOW_TABLE_ENCAP_MAX 2
struct flow_offload_tuple {
union {
@@ -52,13 +120,30 @@ struct flow_offload_tuple {
u8 l3proto;
u8 l4proto;
- u8 dir;
+ struct {
+ u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
- u16 mtu;
+ /* All members above are keys for lookups, see flow_offload_hash(). */
+ struct { } __hash;
- struct {
- struct dst_entry *dst_cache;
- u32 dst_cookie;
+ u8 dir:2,
+ xmit_type:2,
+ encap_num:2,
+ in_vlan_ingress:2;
+ u16 mtu;
+ union {
+ struct {
+ struct dst_entry *dst_cache;
+ u32 dst_cookie;
+ };
+ struct {
+ u32 ifidx;
+ u32 hw_ifidx;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
};
};
@@ -67,52 +152,139 @@ struct flow_offload_tuple_rhash {
struct flow_offload_tuple tuple;
};
-#define FLOW_OFFLOAD_SNAT 0x1
-#define FLOW_OFFLOAD_DNAT 0x2
-#define FLOW_OFFLOAD_DYING 0x4
-#define FLOW_OFFLOAD_TEARDOWN 0x8
+enum nf_flow_flags {
+ NF_FLOW_SNAT,
+ NF_FLOW_DNAT,
+ NF_FLOW_TEARDOWN,
+ NF_FLOW_HW,
+ NF_FLOW_HW_DYING,
+ NF_FLOW_HW_DEAD,
+ NF_FLOW_HW_PENDING,
+};
+
+enum flow_offload_type {
+ NF_FLOW_OFFLOAD_UNSPEC = 0,
+ NF_FLOW_OFFLOAD_ROUTE,
+};
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
- u32 flags;
- union {
- /* Your private driver data here. */
- u32 timeout;
- };
+ struct nf_conn *ct;
+ unsigned long flags;
+ u16 type;
+ u32 timeout;
+ struct rcu_head rcu_head;
};
#define NF_FLOW_TIMEOUT (30 * HZ)
+#define nf_flowtable_time_stamp (u32)jiffies
+
+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
+
+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
+{
+ return (__s32)(timeout - nf_flowtable_time_stamp);
+}
struct nf_flow_route {
struct {
- struct dst_entry *dst;
+ struct dst_entry *dst;
+ struct {
+ u32 ifindex;
+ struct {
+ u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+ u8 num_encaps:2,
+ ingress_vlans:2;
+ } in;
+ struct {
+ u32 ifindex;
+ u32 hw_ifindex;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
+ enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX];
};
-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
- struct nf_flow_route *route);
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
void flow_offload_free(struct flow_offload *flow);
+static inline int
+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct flow_block *block = &flow_table->flow_block;
+ struct flow_block_cb *block_cb;
+ int err = 0;
+
+ down_write(&flow_table->flow_block_lock);
+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
+ if (block_cb) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
+ if (IS_ERR(block_cb)) {
+ err = PTR_ERR(block_cb);
+ goto unlock;
+ }
+
+ list_add_tail(&block_cb->list, &block->cb_list);
+
+unlock:
+ up_write(&flow_table->flow_block_lock);
+ return err;
+}
+
+static inline void
+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct flow_block *block = &flow_table->flow_block;
+ struct flow_block_cb *block_cb;
+
+ down_write(&flow_table->flow_block_lock);
+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
+ if (block_cb) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ } else {
+ WARN_ON(true);
+ }
+ up_write(&flow_table->flow_block_lock);
+}
+
+int flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route);
+
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+void flow_offload_refresh(struct nf_flowtable *flow_table,
+ struct flow_offload *flow);
+
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+ struct net_device *dev);
void nf_flow_table_cleanup(struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
void flow_offload_teardown(struct flow_offload *flow);
-static inline void flow_offload_dead(struct flow_offload *flow)
-{
- flow->flags |= FLOW_OFFLOAD_DYING;
-}
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir);
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data);
+
+void nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
struct flow_ports {
__be16 source, dest;
@@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
MODULE_ALIAS("nf-flowtable-" __stringify(family))
+void nf_flow_offload_add(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+void nf_flow_offload_del(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+
+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule);
+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule);
+
+int nf_flow_table_offload_init(void);
+void nf_flow_table_offload_exit(void);
+
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+{
+ __be16 proto;
+
+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+ sizeof(struct pppoe_hdr)));
+ switch (proto) {
+ case htons(PPP_IP):
+ return htons(ETH_P_IP);
+ case htons(PPP_IPV6):
+ return htons(ETH_P_IPV6);
+ }
+
+ return 0;
+}
+
#endif /* _NF_FLOW_TABLE_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 806454e76..9e3963c8f 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -27,6 +27,9 @@ struct nf_tcp_net {
int tcp_loose;
int tcp_be_liberal;
int tcp_max_retrans;
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ unsigned int offload_timeout;
+#endif
};
enum udp_conntrack {
@@ -37,6 +40,9 @@ enum udp_conntrack {
struct nf_udp_net {
unsigned int timeouts[UDP_CT_MAX];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ unsigned int offload_timeout;
+#endif
};
struct nf_icmp_net {
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 336014bf8..ae698d11c 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -105,14 +105,19 @@ enum ip_conntrack_status {
IPS_OFFLOAD_BIT = 14,
IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+ /* Conntrack has been offloaded to hardware. */
+ IPS_HW_OFFLOAD_BIT = 15,
+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
+
/* Be careful here, modifying these bits can make things messy,
* so don't let users modify them directly.
*/
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
- __IPS_MAX_BIT = 15,
+ __IPS_MAX_BIT = 16,
};
/* Connection tracking event types */
diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
new file mode 100644
index 000000000..5841bbe0e
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _XT_FLOWOFFLOAD_H
+#define _XT_FLOWOFFLOAD_H
+
+#include <linux/types.h>
+
+enum {
+ XT_FLOWOFFLOAD_HW = 1 << 0,
+
+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
+};
+
+struct xt_flowoffload_target_info {
+ __u32 flags;
+};
+
+#endif /* _XT_FLOWOFFLOAD_H */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 589615ec4..444ab5fae 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
return real_dev->ifindex;
}
+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
+
+ path->type = DEV_PATH_VLAN;
+ path->encap.id = vlan->vlan_id;
+ path->encap.proto = vlan->vlan_proto;
+ path->dev = ctx->dev;
+ ctx->dev = vlan->real_dev;
+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
+ return -ENOSPC;
+
+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
+ ctx->num_vlans++;
+
+ return 0;
+}
+
static const struct ethtool_ops vlan_ethtool_ops = {
.get_link_ksettings = vlan_ethtool_get_link_ksettings,
.get_drvinfo = vlan_ethtool_get_drvinfo,
@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
#endif
.ndo_fix_features = vlan_dev_fix_features,
.ndo_get_iflink = vlan_dev_get_iflink,
+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
};
static void vlan_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 501f77f0f..0940b44cd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
return br_del_if(br, slave_dev);
}
+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct net_bridge_fdb_entry *f;
+ struct net_bridge_port *dst;
+ struct net_bridge *br;
+
+ if (netif_is_bridge_port(ctx->dev))
+ return -1;
+
+ br = netdev_priv(ctx->dev);
+
+ br_vlan_fill_forward_path_pvid(br, ctx, path);
+
+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
+ if (!f || !f->dst)
+ return -1;
+
+ dst = READ_ONCE(f->dst);
+ if (!dst)
+ return -1;
+
+ if (br_vlan_fill_forward_path_mode(br, dst, path))
+ return -1;
+
+ path->type = DEV_PATH_BRIDGE;
+ path->dev = dst->br->dev;
+ ctx->dev = dst->dev;
+
+ switch (path->bridge.vlan_mode) {
+ case DEV_PATH_BR_VLAN_TAG:
+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
+ return -ENOSPC;
+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
+ ctx->num_vlans++;
+ break;
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
+ case DEV_PATH_BR_VLAN_UNTAG:
+ ctx->num_vlans--;
+ break;
+ case DEV_PATH_BR_VLAN_KEEP:
+ break;
+ }
+
+ return 0;
+}
+
static const struct ethtool_ops br_ethtool_ops = {
.get_drvinfo = br_getinfo,
.get_link = ethtool_op_get_link,
@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
.ndo_features_check = passthru_features_check,
+ .ndo_fill_forward_path = br_fill_forward_path,
};
static struct device_type br_type = {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a736be8a1..4bd9e9b57 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
void *ptr);
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+ struct net_device_path_ctx *ctx,
+ struct net_device_path *path);
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+ struct net_bridge_port *dst,
+ struct net_device_path *path);
+
static inline struct net_bridge_vlan_group *br_vlan_group(
const struct net_bridge *br)
{
@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
return 0;
}
+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+ struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+}
+
+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+ struct net_bridge_port *dst,
+ struct net_device_path *path)
+{
+ return 0;
+}
+
static inline struct net_bridge_vlan_group *br_vlan_group(
const struct net_bridge *br)
{
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 9257292bd..bcfd16924 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
}
EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+ struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct net_bridge_vlan_group *vg;
+ int idx = ctx->num_vlans - 1;
+ u16 vid;
+
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
+ return;
+
+ vg = br_vlan_group(br);
+
+ if (idx >= 0 &&
+ ctx->vlan[idx].proto == br->vlan_proto) {
+ vid = ctx->vlan[idx].id;
+ } else {
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
+ vid = br_get_pvid(vg);
+ }
+
+ path->bridge.vlan_id = vid;
+ path->bridge.vlan_proto = br->vlan_proto;
+}
+
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+ struct net_bridge_port *dst,
+ struct net_device_path *path)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
+ return 0;
+
+ vg = nbp_vlan_group_rcu(dst);
+ v = br_vlan_find(vg, path->bridge.vlan_id);
+ if (!v || !br_vlan_should_use(v))
+ return -EINVAL;
+
+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
+ return 0;
+
+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
+ else
+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
+
+ return 0;
+}
+
int br_vlan_get_info(const struct net_device *dev, u16 vid,
struct bridge_vlan_info *p_vinfo)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index fe2c856b9..4f0edb218 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
+{
+ int k = stack->num_paths++;
+
+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+ return NULL;
+
+ return &stack->path[k];
+}
+
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+ struct net_device_path_stack *stack)
+{
+ const struct net_device *last_dev;
+ struct net_device_path_ctx ctx = {
+ .dev = dev,
+ };
+ struct net_device_path *path;
+ int ret = 0;
+
+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
+ stack->num_paths = 0;
+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+ last_dev = ctx.dev;
+ path = dev_fwd_path(stack);
+ if (!path)
+ return -1;
+
+ memset(path, 0, sizeof(struct net_device_path));
+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
+ if (ret < 0)
+ return -1;
+
+ if (WARN_ON_ONCE(last_dev == ctx.dev))
+ return -1;
+ }
+ path = dev_fwd_path(stack);
+ if (!path)
+ return -1;
+ path->type = DEV_PATH_ETHERNET;
+ path->dev = ctx.dev;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ca80f8699..35a1249a9 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
+{
+ if (!netdev || !dsa_slave_dev_check(netdev))
+ return ERR_PTR(-ENODEV);
+
+ return dsa_slave_to_port(netdev);
+}
+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
+
static int __init dsa_init_module(void)
{
int rc;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 036fda317..2dfaa1eac 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -22,8 +22,6 @@
#include "dsa_priv.h"
-static bool dsa_slave_dev_check(const struct net_device *dev);
-
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
}
}
+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
+ void *type_data)
+{
+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
+ struct net_device *master = cpu_dp->master;
+
+ if (!master->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
+}
+
static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_switch *ds = dp->ds;
- if (type == TC_SETUP_BLOCK)
+ switch (type) {
+ case TC_SETUP_BLOCK:
return dsa_slave_setup_tc_block(dev, type_data);
+ case TC_SETUP_FT:
+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
+ default:
+ break;
+ }
if (!ds->ops->port_setup_tc)
return -EOPNOTSUPP;
@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
return dp->ds->devlink ? &dp->devlink_port : NULL;
}
+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
+{
+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+
+ path->dev = ctx->dev;
+ path->type = DEV_PATH_DSA;
+ path->dsa.proto = cpu_dp->tag_ops->proto;
+ path->dsa.port = dp->index;
+ ctx->dev = cpu_dp->master;
+
+ return 0;
+}
+
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
.ndo_get_devlink_port = dsa_slave_get_devlink_port,
+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
};
static struct device_type dsa_type = {
@@ -1497,10 +1529,11 @@ void dsa_slave_destroy(struct net_device *slave_dev)
free_netdev(slave_dev);
}
-static bool dsa_slave_dev_check(const struct net_device *dev)
+bool dsa_slave_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &dsa_slave_netdev_ops;
}
+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f17b40211..803b92e4c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,6 @@ config NF_TABLES_ARP
help
This option enables the ARP support for nf_tables.
-endif # NF_TABLES
-
config NF_FLOW_TABLE_IPV4
tristate "Netfilter flow table IPv4 module"
depends on NF_FLOW_TABLE
@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5585e3a94..bb76f6061 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = ip6_dst_mtu_forward(dst);
+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 69443e9a3..0b481d236 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
multicast or blackhole.
endif # NF_TABLES_IPV6
-endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate "Netfilter flow table IPv6 module"
@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 98aaf0b79..2b357ac71 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -83,7 +83,7 @@ enum rt6_nud_state {
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
-static unsigned int ip6_mtu(const struct dst_entry *dst);
+static unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
static unsigned int ip6_mtu(const struct dst_entry *dst)
{
- struct inet6_dev *idev;
- unsigned int mtu;
-
- mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu)
- goto out;
-
- mtu = IPV6_MIN_MTU;
-
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
-
-out:
- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip6_dst_mtu_maybe_forward(dst, false);
}
/* MTU selection:
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index b967763f5..c040e713a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
endif # NF_TABLES_NETDEV
-endif # NF_TABLES
-
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
depends on NF_FLOW_TABLE
@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
depends on NETFILTER_INGRESS
depends on NF_CONNTRACK
- depends on NF_TABLES
help
This option adds the flow table core infrastructure.
@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_CT
+config NETFILTER_XT_TARGET_FLOWOFFLOAD
+ tristate '"FLOWOFFLOAD" target support'
+ depends on NF_FLOW_TABLE
+ depends on NETFILTER_INGRESS
+ help
+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
+ module to speed up processing of packets by bypassing the usual
+ netfilter chains
+
config NETFILTER_XT_TARGET_RATEEST
tristate '"RATEEST" target support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4fc075b61..d93a121bc 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
+ nf_flow_table_offload.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f6ab6f484..f689e19d8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -864,9 +864,8 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
-static inline void nf_ct_acct_update(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int len)
+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
+ unsigned int bytes)
{
struct nf_conn_acct *acct;
@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
if (acct) {
struct nf_conn_counter *counter = acct->counter;
- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
+ atomic64_add(packets, &counter[dir].packets);
+ atomic64_add(bytes, &counter[dir].bytes);
}
}
+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
const struct nf_conn *loser_ct)
@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
/* u32 should be fine since we must have seen one packet. */
bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
- nf_ct_acct_update(ct, ctinfo, bytes);
+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
}
}
@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+ nf_ct_offload_timeout(tmp);
continue;
+ }
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
WRITE_ONCE(ct->timeout, extra_jiffies);
acct:
if (do_acct)
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb)
{
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
return nf_ct_delete(ct, 0, 0);
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 7204f0366..3742bae21 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
tn->tcp_loose = nf_ct_tcp_loose;
tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ tn->offload_timeout = 30 * HZ;
+#endif
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index e3a2d018f..a1579d6c3 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
for (i = 0; i < UDP_CT_MAX; i++)
un->timeouts[i] = udp_timeouts[i];
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ un->offload_timeout = 30 * HZ;
+#endif
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9c6259c28..10d9f93ce 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
+ seq_puts(s, "[HW_OFFLOAD] ");
+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
seq_puts(s, "[OFFLOAD] ");
else if (test_bit(IPS_ASSURED_BIT, &ct->status))
seq_puts(s, "[ASSURED] ");
@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
+#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
+#endif
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
#ifdef CONFIG_NF_CT_PROTO_SCTP
@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
+ .procname = "nf_flowtable_tcp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose",
.maxlen = sizeof(int),
@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
+ .procname = "nf_flowtable_udp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
.procname = "nf_conntrack_icmp_timeout",
.maxlen = sizeof(unsigned int),
@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
XASSIGN(LIBERAL, &tn->tcp_be_liberal);
XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
#undef XASSIGN
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
+#endif
+
}
static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
+#endif
nf_conntrack_standalone_init_tcp_sysctl(net, table);
nf_conntrack_standalone_init_sctp_sysctl(net, table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index f212cec0..10365581 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -7,43 +7,21 @@
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/ip6_route.h>
-#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-struct flow_offload_entry {
- struct flow_offload flow;
- struct nf_conn *ct;
- struct rcu_head rcu_head;
-};
-
static DEFINE_MUTEX(flowtable_lock);
static LIST_HEAD(flowtables);
-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
-{
- const struct rt6_info *rt;
-
- if (flow_tuple->l3proto == NFPROTO_IPV6) {
- rt = (const struct rt6_info *)flow_tuple->dst_cache;
- return rt6_get_cookie(rt);
- }
-
- return 0;
-}
-
static void
-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
- struct nf_flow_route *route,
+flow_offload_fill_dir(struct flow_offload *flow,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
- struct dst_entry *other_dst = route->tuple[!dir].dst;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
ft->dir = dir;
@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
case NFPROTO_IPV4:
ft->src_v4 = ctt->src.u3.in;
ft->dst_v4 = ctt->dst.u3.in;
- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
ft->src_v6 = ctt->src.u3.in6;
ft->dst_v6 = ctt->dst.u3.in6;
- ft->mtu = ip6_dst_mtu_forward(dst);
break;
}
@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->l4proto = ctt->dst.protonum;
ft->src_port = ctt->src.u.tcp.port;
ft->dst_port = ctt->dst.u.tcp.port;
-
- ft->iifidx = other_dst->dev->ifindex;
- ft->dst_cache = dst;
- ft->dst_cookie = flow_offload_dst_cookie(ft);
}
-struct flow_offload *
-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
{
- struct flow_offload_entry *entry;
struct flow_offload *flow;
if (unlikely(nf_ct_is_dying(ct) ||
!atomic_inc_not_zero(&ct->ct_general.use)))
return NULL;
- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry)
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
goto err_ct_refcnt;
- flow = &entry->flow;
-
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
- goto err_dst_cache_original;
-
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
- goto err_dst_cache_reply;
+ flow->ct = ct;
- entry->ct = ct;
-
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
if (ct->status & IPS_SRC_NAT)
- flow->flags |= FLOW_OFFLOAD_SNAT;
+ __set_bit(NF_FLOW_SNAT, &flow->flags);
if (ct->status & IPS_DST_NAT)
- flow->flags |= FLOW_OFFLOAD_DNAT;
+ __set_bit(NF_FLOW_DNAT, &flow->flags);
return flow;
-err_dst_cache_reply:
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
-err_dst_cache_original:
- kfree(entry);
err_ct_refcnt:
nf_ct_put(ct);
@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
{
- tcp->state = TCP_CONNTRACK_ESTABLISHED;
- tcp->seen[0].td_maxwin = 0;
- tcp->seen[1].td_maxwin = 0;
+ const struct rt6_info *rt;
+
+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
+ return rt6_get_cookie(rt);
+ }
+
+ return 0;
}
-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
+static int flow_offload_fill_route(struct flow_offload *flow,
+ const struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ struct dst_entry *dst = route->tuple[dir].dst;
+ int i, j = 0;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
+ break;
+ case NFPROTO_IPV6:
+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
+ break;
+ }
+
+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
+ flow_tuple->in_vlan_ingress |= BIT(j);
+ j++;
+ }
+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+
+ switch (route->tuple[dir].xmit_type) {
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
+ ETH_ALEN);
+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
+ ETH_ALEN);
+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+ break;
+ case FLOW_OFFLOAD_XMIT_XFRM:
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ if (!dst_hold_safe(route->tuple[dir].dst))
+ return -1;
+
+ flow_tuple->dst_cache = dst;
+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
+
+ return 0;
+}
+
+static void nft_flow_dst_release(struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir)
+{
+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
+}
+
+int flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route)
+{
+ int err;
+
+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (err < 0)
+ return err;
+
+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
+ if (err < 0)
+ goto err_route_reply;
+
+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
+
+ return 0;
+
+err_route_reply:
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(flow_offload_route_init);
-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
- return (__s32)(timeout - (u32)jiffies);
+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
+ tcp->seen[0].td_maxwin = 0;
+ tcp->seen[1].td_maxwin = 0;
}
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
{
- const struct nf_conntrack_l4proto *l4proto;
+ struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
- unsigned int timeout;
+ s32 timeout;
- l4proto = nf_ct_l4proto_find(l4num);
- if (!l4proto)
- return;
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
- if (l4num == IPPROTO_TCP)
- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
- else if (l4num == IPPROTO_UDP)
- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
- else
+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ timeout -= tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+
+ timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout -= tn->offload_timeout;
+ } else {
return;
+ }
+
+ if (timeout < 0)
+ timeout = 0;
- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
- ct->timeout = nfct_time_stamp + timeout;
+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
flow_offload_fixup_ct_timeout(ct);
}
-void flow_offload_free(struct flow_offload *flow)
+static void flow_offload_route_release(struct flow_offload *flow)
{
- struct flow_offload_entry *e;
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
+}
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
- e = container_of(flow, struct flow_offload_entry, flow);
- if (flow->flags & FLOW_OFFLOAD_DYING)
- nf_ct_delete(e->ct, 0, 0);
- nf_ct_put(e->ct);
- kfree_rcu(e, rcu_head);
+void flow_offload_free(struct flow_offload *flow)
+{
+ switch (flow->type) {
+ case NF_FLOW_OFFLOAD_ROUTE:
+ flow_offload_route_release(flow);
+ break;
+ default:
+ break;
+ }
+ nf_ct_put(flow->ct);
+ kfree_rcu(flow, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple *tuple = data;
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
}
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple_rhash *tuplehash = data;
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
}
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
return 1;
return 0;
@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
.automatic_shrinking = true,
};
-#define DAY (86400 * HZ)
-
-/* Set an arbitrary timeout large enough not to ever expire, this save
- * us a check for the IPS_OFFLOAD_BIT from the packet path via
- * nf_ct_is_expired().
- */
-static void nf_ct_offload_timeout(struct flow_offload *flow)
+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
{
- struct flow_offload_entry *entry;
- struct nf_conn *ct;
+ unsigned long timeout = NF_FLOW_TIMEOUT;
+ struct net *net = nf_ct_net(flow->ct);
+ int l4num = nf_ct_protonum(flow->ct);
- entry = container_of(flow, struct flow_offload_entry, flow);
- ct = entry->ct;
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
- if (nf_ct_expires(ct) < DAY / 2)
- ct->timeout = nfct_time_stamp + DAY;
+ timeout = tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+
+ timeout = tn->offload_timeout;
+ }
+
+ return timeout;
}
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
int err;
- nf_ct_offload_timeout(flow);
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
err = rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[0].node,
@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
return err;
}
+ nf_ct_offload_timeout(flow->ct);
+
+ if (nf_flowtable_hw_offload(flow_table)) {
+ __set_bit(NF_FLOW_HW, &flow->flags);
+ nf_flow_offload_add(flow_table, flow);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
+void flow_offload_refresh(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+{
+ u32 timeout;
+
+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ if (timeout - READ_ONCE(flow->timeout) > HZ)
+ WRITE_ONCE(flow->timeout, timeout);
+ else
+ return;
+
+ if (likely(!nf_flowtable_hw_offload(flow_table)))
+ return;
+
+ nf_flow_offload_add(flow_table, flow);
+}
+EXPORT_SYMBOL_GPL(flow_offload_refresh);
+
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return nf_flow_timeout_delta(flow->timeout) <= 0;
@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
nf_flow_offload_rhash_params);
@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
- e = container_of(flow, struct flow_offload_entry, flow);
- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
if (nf_flow_has_expired(flow))
- flow_offload_fixup_ct(e->ct);
- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
- flow_offload_fixup_ct_timeout(e->ct);
-
- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
- flow_offload_fixup_ct_state(e->ct);
+ flow_offload_fixup_ct(flow->ct);
+ else
+ flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
- e = container_of(flow, struct flow_offload_entry, flow);
- flow_offload_fixup_ct_state(e->ct);
+ flow_offload_fixup_ct_state(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
{
struct flow_offload_tuple_rhash *tuplehash;
struct flow_offload *flow;
- struct flow_offload_entry *e;
int dir;
tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
return NULL;
- e = container_of(flow, struct flow_offload_entry, flow);
- if (unlikely(nf_ct_is_dying(e->ct)))
+ if (unlikely(nf_ct_is_dying(flow->ct)))
return NULL;
return tuplehash;
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
-static int
-nf_flow_table_iterate(struct nf_flowtable *flow_table,
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data)
{
@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
-
if (IS_ERR(tuplehash)) {
if (PTR_ERR(tuplehash) != -EAGAIN) {
err = PTR_ERR(tuplehash);
@@ -359,23 +430,49 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
{
- struct nf_flowtable *flow_table = data;
- struct flow_offload_entry *e;
- bool teardown;
+ struct dst_entry *dst;
- e = container_of(flow, struct flow_offload_entry, flow);
+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+ dst = tuple->dst_cache;
+ if (!dst_check(dst, tuple->dst_cookie))
+ return true;
+ }
- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
- FLOW_OFFLOAD_TEARDOWN);
+ return false;
+}
- if (!teardown)
- nf_ct_offload_timeout(flow);
+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
+{
+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
+}
- if (nf_flow_has_expired(flow) || teardown)
- flow_offload_del(flow_table, flow);
+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
+{
+ struct nf_flowtable *flow_table = data;
+
+ if (nf_flow_has_expired(flow) ||
+ nf_ct_is_dying(flow->ct) ||
+ nf_flow_has_stale_dst(flow))
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
+ nf_flow_offload_del(flow_table, flow);
+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
+ flow_offload_del(flow_table, flow);
+ } else {
+ flow_offload_del(flow_table, flow);
+ }
+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
+ nf_flow_offload_stats(flow_table, flow);
+ }
}
static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
- __be16 port, __be16 new_port)
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
{
struct tcphdr *tcph;
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
-
- return 0;
}
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
- __be16 port, __be16 new_port)
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
{
struct udphdr *udph;
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace2(&udph->check, skb, port,
@@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
-
- return 0;
}
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
- u8 protocol, __be16 port, __be16 new_port)
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, __be16 port, __be16 new_port)
{
switch (protocol) {
case IPPROTO_TCP:
- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
- return NF_DROP;
+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
break;
case IPPROTO_UDP:
- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
- return NF_DROP;
+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
break;
}
-
- return 0;
}
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
{
struct flow_ports *hdr;
__be16 port, new_port;
- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- return -1;
-
hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) {
@@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
hdr->dest = new_port;
break;
- default:
- return -1;
}
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
}
EXPORT_SYMBOL_GPL(nf_flow_snat_port);
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
+ unsigned int thoff, u8 protocol,
+ enum flow_offload_tuple_dir dir)
{
struct flow_ports *hdr;
__be16 port, new_port;
- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- return -1;
-
hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) {
@@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
hdr->source = new_port;
break;
- default:
- return -1;
}
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
@@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
{
int err;
- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+ flow_block_init(&flowtable->flow_block);
+ init_rwsem(&flowtable->flow_block_lock);
err = rhashtable_init(&flowtable->rhashtable,
&nf_flow_offload_rhash_params);
@@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
- struct flow_offload_entry *e;
-
- e = container_of(flow, struct flow_offload_entry, flow);
if (!dev) {
flow_offload_teardown(flow);
return;
}
- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
+
+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
(flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
flow->tuplehash[1].tuple.iifidx == dev->ifindex))
- flow_offload_dead(flow);
+ flow_offload_teardown(flow);
}
-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
- struct net_device *dev)
+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+ struct net_device *dev)
{
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
+ nf_flow_table_offload_flush(flowtable);
}
void nf_flow_table_cleanup(struct net_device *dev)
@@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
mutex_lock(&flowtable_lock);
list_for_each_entry(flowtable, &flowtables, list)
- nf_flow_table_iterate_cleanup(flowtable, dev);
+ nf_flow_table_gc_cleanup(flowtable, dev);
mutex_unlock(&flowtable_lock);
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
@@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_lock(&flowtable_lock);
list_del(&flow_table->list);
mutex_unlock(&flowtable_lock);
+
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
+ nf_flow_table_offload_flush(flow_table);
+ if (nf_flowtable_hw_offload(flow_table))
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
+ flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
@@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
static int __init nf_flow_table_module_init(void)
{
- return register_netdevice_notifier(&flow_offload_netdev_notifier);
+ int ret;
+
+ ret = nf_flow_table_offload_init();
+ if (ret)
+ return ret;
+
+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
+ if (ret)
+ nf_flow_table_offload_exit();
+
+ return ret;
}
static void __exit nf_flow_table_module_exit(void)
{
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ nf_flow_table_offload_exit();
}
module_init(nf_flow_table_module_init);
@@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter flow table module");
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 397129b2..6257d87c 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -7,11 +7,13 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
+#include <linux/if_ether.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack_acct.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
if (proto != IPPROTO_TCP)
return 0;
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
if (unlikely(tcph->fin || tcph->rst)) {
flow_offload_teardown(flow);
@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
return 0;
}
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
- __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
{
struct tcphdr *tcph;
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
- return 0;
}
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
- __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
{
struct udphdr *udph;
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
-
- return 0;
}
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
- unsigned int thoff, __be32 addr,
- __be32 new_addr)
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
{
switch (iph->protocol) {
case IPPROTO_TCP:
- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
break;
case IPPROTO_UDP:
- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
break;
}
-
- return 0;
}
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- struct iphdr *iph, unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ip(const struct flow_offload *flow,
+ struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
- default:
- return -1;
}
csum_replace4(&iph->check, addr, new_addr);
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- struct iphdr *iph, unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
+ struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
- default:
- return -1;
}
csum_replace4(&iph->check, addr, new_addr);
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- unsigned int thoff, enum flow_offload_tuple_dir dir)
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ unsigned int thoff, enum flow_offload_tuple_dir dir,
+ struct iphdr *iph)
{
- struct iphdr *iph = ip_hdr(skb);
-
- if (flow->flags & FLOW_OFFLOAD_SNAT &&
- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
- return -1;
- if (flow->flags & FLOW_OFFLOAD_DNAT &&
- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
- return -1;
-
- return 0;
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
+ }
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
+ }
}
static bool ip_has_options(unsigned int thoff)
@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
return thoff != sizeof(struct iphdr);
}
+static void nf_flow_tuple_encap(struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
+{
+ struct vlan_ethhdr *veth;
+ struct pppoe_hdr *phdr;
+ int i = 0;
+
+ if (skb_vlan_tag_present(skb)) {
+ tuple->encap[i].id = skb_vlan_tag_get(skb);
+ tuple->encap[i].proto = skb->vlan_proto;
+ i++;
+ }
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
+ tuple->encap[i].proto = skb->protocol;
+ break;
+ case htons(ETH_P_PPP_SES):
+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ tuple->encap[i].id = ntohs(phdr->sid);
+ tuple->encap[i].proto = skb->protocol;
+ break;
+ }
+}
+
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
+ u32 offset)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
- if (!pskb_may_pull(skb, sizeof(*iph)))
+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
return -1;
- iph = ip_hdr(skb);
- thoff = iph->ihl * 4;
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ thoff = (iph->ihl * 4);
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
- if (iph->protocol != IPPROTO_TCP &&
- iph->protocol != IPPROTO_UDP)
+ thoff += offset;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ *hdrsize = sizeof(struct tcphdr);
+ break;
+ case IPPROTO_UDP:
+ *hdrsize = sizeof(struct udphdr);
+ break;
+ default:
return -1;
+ }
if (iph->ttl <= 1)
return -1;
- thoff = iph->ihl * 4;
- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
- iph = ip_hdr(skb);
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
tuple->l3proto = AF_INET;
tuple->l4proto = iph->protocol;
tuple->iifidx = dev->ifindex;
+ nf_flow_tuple_encap(skb, tuple);
return 0;
}
@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+ u32 *offset)
+{
+ struct vlan_ethhdr *veth;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ if (veth->h_vlan_encapsulated_proto == proto) {
+ *offset += VLAN_HLEN;
+ return true;
+ }
+ break;
+ case htons(ETH_P_PPP_SES):
+ if (nf_flow_pppoe_proto(skb) == proto) {
+ *offset += PPPOE_SES_HLEN;
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+static void nf_flow_encap_pop(struct sk_buff *skb,
+ struct flow_offload_tuple_rhash *tuplehash)
+{
+ struct vlan_hdr *vlan_hdr;
+ int i;
+
+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
+ if (skb_vlan_tag_present(skb)) {
+ __vlan_hwaccel_clear_tag(skb);
+ continue;
+ }
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ vlan_hdr = (struct vlan_hdr *)skb->data;
+ __skb_pull(skb, VLAN_HLEN);
+ vlan_set_encap_proto(skb, vlan_hdr);
+ skb_reset_network_header(skb);
+ break;
+ case htons(ETH_P_PPP_SES):
+ skb->protocol = nf_flow_pppoe_proto(skb);
+ skb_pull(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+ break;
+ }
+ }
+}
+
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ const struct flow_offload_tuple_rhash *tuplehash,
+ unsigned short type)
+{
+ struct net_device *outdev;
+
+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
+ if (!outdev)
+ return NF_DROP;
+
+ skb->dev = outdev;
+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
+ tuplehash->tuple.out.h_source, skb->len);
+ dev_queue_xmit(skb);
+
+ return NF_STOLEN;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
+ u32 hdrsize, offset = 0;
+ unsigned int thoff, mtu;
struct rtable *rt;
- unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
+ int ret;
- if (skb->protocol != htons(ETH_P_IP))
+ if (skb->protocol != htons(ETH_P_IP) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
return NF_ACCEPT;
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
- outdev = rt->dst.dev;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
- return NF_ACCEPT;
- if (skb_try_make_writable(skb, sizeof(*iph)))
- return NF_DROP;
-
- thoff = ip_hdr(skb)->ihl * 4;
- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return NF_ACCEPT;
- if (!dst_check(&rt->dst, 0)) {
- flow_offload_teardown(flow);
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ thoff = (iph->ihl * 4) + offset;
+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
- }
- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+ if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ flow_offload_refresh(flow_table, flow);
+
+ nf_flow_encap_pop(skb, tuplehash);
+ thoff -= offset;
+
iph = ip_hdr(skb);
+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
+
ip_decrease_ttl(iph);
skb->tstamp = 0;
- if (unlikely(dst_xfrm(&rt->dst))) {
+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
- skb->dev = outdev;
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
- skb_dst_set_noref(skb, &rt->dst);
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+ switch (tuplehash->tuple.xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ outdev = rt->dst.dev;
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ skb_dst_set_noref(skb, &rt->dst);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+ ret = NF_STOLEN;
+ break;
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+ if (ret == NF_DROP)
+ flow_offload_teardown(flow);
+ break;
+ }
- return NF_STOLEN;
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
- struct in6_addr *addr,
- struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr,
+ struct ipv6hdr *ip6h)
{
struct tcphdr *tcph;
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
-
- return 0;
}
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
- struct in6_addr *addr,
- struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
{
struct udphdr *udph;
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
-
- return 0;
}
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff, struct in6_addr *addr,
- struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
{
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
break;
case IPPROTO_UDP:
- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
break;
}
-
- return 0;
}
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
ip6h->daddr = new_addr;
break;
- default:
- return -1;
}
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
- default:
- return -1;
}
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb,
- enum flow_offload_tuple_dir dir)
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir,
+ struct ipv6hdr *ip6h)
{
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
unsigned int thoff = sizeof(*ip6h);
- if (flow->flags & FLOW_OFFLOAD_SNAT &&
- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
- return -1;
- if (flow->flags & FLOW_OFFLOAD_DNAT &&
- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
- return -1;
-
- return 0;
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
+ }
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
+ }
}
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple, u32 *hdrsize,
+ u32 offset)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ thoff = sizeof(*ip6h) + offset;
+ if (!pskb_may_pull(skb, thoff))
return -1;
- ip6h = ipv6_hdr(skb);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
- if (ip6h->nexthdr != IPPROTO_TCP &&
- ip6h->nexthdr != IPPROTO_UDP)
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ *hdrsize = sizeof(struct tcphdr);
+ break;
+ case IPPROTO_UDP:
+ *hdrsize = sizeof(struct udphdr);
+ break;
+ default:
return -1;
+ }
if (ip6h->hop_limit <= 1)
return -1;
- thoff = sizeof(*ip6h);
- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
- ip6h = ipv6_hdr(skb);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
tuple->l3proto = AF_INET6;
tuple->l4proto = ip6h->nexthdr;
tuple->iifidx = dev->ifindex;
+ nf_flow_tuple_encap(skb, tuple);
return 0;
}
@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct in6_addr *nexthop;
struct flow_offload *flow;
struct net_device *outdev;
+ unsigned int thoff, mtu;
+ u32 hdrsize, offset = 0;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
+ int ret;
- if (skb->protocol != htons(ETH_P_IPV6))
+ if (skb->protocol != htons(ETH_P_IPV6) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
return NF_ACCEPT;
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
- outdev = rt->dst.dev;
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return NF_ACCEPT;
- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
- sizeof(*ip6h)))
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ thoff = sizeof(*ip6h) + offset;
+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
- flow_offload_teardown(flow);
- return NF_ACCEPT;
- }
-
- if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
- return NF_DROP;
+ flow_offload_refresh(flow_table, flow);
+
+ nf_flow_encap_pop(skb, tuplehash);
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
ip6h = ipv6_hdr(skb);
+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
+
ip6h->hop_limit--;
skb->tstamp = 0;
- if (unlikely(dst_xfrm(&rt->dst))) {
+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+
+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
- skb->dev = outdev;
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
- skb_dst_set_noref(skb, &rt->dst);
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+ switch (tuplehash->tuple.xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ outdev = rt->dst.dev;
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ skb_dst_set_noref(skb, &rt->dst);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+ ret = NF_STOLEN;
+ break;
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+ if (ret == NF_DROP)
+ flow_offload_teardown(flow);
+ break;
+ }
- return NF_STOLEN;
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
new file mode 100644
index 000000000..d94c6fb92
--- /dev/null
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -0,0 +1,1191 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <linux/tc_act/tc_csum.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+static struct workqueue_struct *nf_flow_offload_add_wq;
+static struct workqueue_struct *nf_flow_offload_del_wq;
+static struct workqueue_struct *nf_flow_offload_stats_wq;
+
+struct flow_offload_work {
+ struct list_head list;
+ enum flow_cls_command cmd;
+ int priority;
+ struct nf_flowtable *flowtable;
+ struct flow_offload *flow;
+ struct work_struct work;
+};
+
+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
+ (__match)->dissector.offset[__type] = \
+ offsetof(struct nf_flow_key, __field)
+
+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
+ struct ip_tunnel_info *tun_info)
+{
+ struct nf_flow_key *mask = &match->mask;
+ struct nf_flow_key *key = &match->key;
+ unsigned int enc_keys;
+
+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
+ return;
+
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
+ mask->enc_key_id.keyid = 0xffffffff;
+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+
+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ enc_ipv4);
+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
+ if (key->enc_ipv4.src)
+ mask->enc_ipv4.src = 0xffffffff;
+ if (key->enc_ipv4.dst)
+ mask->enc_ipv4.dst = 0xffffffff;
+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ } else {
+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
+ sizeof(struct in6_addr));
+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
+ sizeof(struct in6_addr));
+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
+ sizeof(struct in6_addr)))
+ memset(&mask->enc_ipv6.src, 0xff,
+ sizeof(struct in6_addr));
+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
+ sizeof(struct in6_addr)))
+ memset(&mask->enc_ipv6.dst, 0xff,
+ sizeof(struct in6_addr));
+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
+
+ match->dissector.used_keys |= enc_keys;
+}
+
+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
+ struct flow_dissector_key_vlan *mask,
+ u16 vlan_id, __be16 proto)
+{
+ key->vlan_id = vlan_id;
+ mask->vlan_id = VLAN_VID_MASK;
+ key->vlan_tpid = proto;
+ mask->vlan_tpid = 0xffff;
+}
+
+static int nf_flow_rule_match(struct nf_flow_match *match,
+ const struct flow_offload_tuple *tuple,
+ struct dst_entry *other_dst)
+{
+ struct nf_flow_key *mask = &match->mask;
+ struct nf_flow_key *key = &match->key;
+ struct ip_tunnel_info *tun_info;
+ bool vlan_encap = false;
+
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ if (other_dst && other_dst->lwtstate) {
+ tun_info = lwt_tun_info(other_dst->lwtstate);
+ nf_flow_rule_lwt_match(match, tun_info);
+ }
+
+ key->meta.ingress_ifindex = tuple->iifidx;
+ mask->meta.ingress_ifindex = 0xffffffff;
+
+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+ tuple->encap[0].id,
+ tuple->encap[0].proto);
+ vlan_encap = true;
+ }
+
+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
+ if (vlan_encap) {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
+ cvlan);
+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
+ tuple->encap[1].id,
+ tuple->encap[1].proto);
+ } else {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
+ vlan);
+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+ tuple->encap[1].id,
+ tuple->encap[1].proto);
+ }
+ }
+
+ switch (tuple->l3proto) {
+ case AF_INET:
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ key->basic.n_proto = htons(ETH_P_IP);
+ key->ipv4.src = tuple->src_v4.s_addr;
+ mask->ipv4.src = 0xffffffff;
+ key->ipv4.dst = tuple->dst_v4.s_addr;
+ mask->ipv4.dst = 0xffffffff;
+ break;
+ case AF_INET6:
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ key->basic.n_proto = htons(ETH_P_IPV6);
+ key->ipv6.src = tuple->src_v6;
+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
+ key->ipv6.dst = tuple->dst_v6;
+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ mask->control.addr_type = 0xffff;
+ match->dissector.used_keys |= BIT(key->control.addr_type);
+ mask->basic.n_proto = 0xffff;
+
+ switch (tuple->l4proto) {
+ case IPPROTO_TCP:
+ key->tcp.flags = 0;
+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ key->basic.ip_proto = tuple->l4proto;
+ mask->basic.ip_proto = 0xff;
+
+ key->tp.src = tuple->src_port;
+ mask->tp.src = 0xffff;
+ key->tp.dst = tuple->dst_port;
+ mask->tp.dst = 0xffff;
+
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS);
+ return 0;
+}
+
+static void flow_offload_mangle(struct flow_action_entry *entry,
+ enum flow_action_mangle_base htype, u32 offset,
+ const __be32 *value, const __be32 *mask)
+{
+ entry->id = FLOW_ACTION_MANGLE;
+ entry->mangle.htype = htype;
+ entry->mangle.offset = offset;
+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
+ memcpy(&entry->mangle.val, value, sizeof(u32));
+}
+
+static inline struct flow_action_entry *
+flow_action_entry_next(struct nf_flow_rule *flow_rule)
+{
+ int i = flow_rule->rule->action.num_entries++;
+
+ return &flow_rule->rule->action.entries[i];
+}
+
+static int flow_offload_eth_src(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+ const struct flow_offload_tuple *other_tuple, *this_tuple;
+ struct net_device *dev = NULL;
+ const unsigned char *addr;
+ u32 mask, val;
+ u16 val16;
+
+ this_tuple = &flow->tuplehash[dir].tuple;
+
+ switch (this_tuple->xmit_type) {
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ addr = this_tuple->out.h_source;
+ break;
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ dev = dev_get_by_index(net, other_tuple->iifidx);
+ if (!dev)
+ return -ENOENT;
+
+ addr = dev->dev_addr;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ mask = ~0xffff0000;
+ memcpy(&val16, addr, 2);
+ val = val16 << 16;
+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+ &val, &mask);
+
+ mask = ~0xffffffff;
+ memcpy(&val, addr + 2, 4);
+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
+ &val, &mask);
+
+ if (dev)
+ dev_put(dev);
+
+ return 0;
+}
+
+static int flow_offload_eth_dst(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+ const struct flow_offload_tuple *other_tuple, *this_tuple;
+ const struct dst_entry *dst_cache;
+ unsigned char ha[ETH_ALEN];
+ struct neighbour *n;
+ const void *daddr;
+ u32 mask, val;
+ u8 nud_state;
+ u16 val16;
+
+ this_tuple = &flow->tuplehash[dir].tuple;
+
+ switch (this_tuple->xmit_type) {
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ ether_addr_copy(ha, this_tuple->out.h_dest);
+ break;
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ daddr = &other_tuple->src_v4;
+ dst_cache = this_tuple->dst_cache;
+ n = dst_neigh_lookup(dst_cache, daddr);
+ if (!n)
+ return -ENOENT;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(ha, n->ha);
+ read_unlock_bh(&n->lock);
+ neigh_release(n);
+
+ if (!(nud_state & NUD_VALID))
+ return -ENOENT;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ mask = ~0xffffffff;
+ memcpy(&val, ha, 4);
+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
+ &val, &mask);
+
+ mask = ~0x0000ffff;
+ memcpy(&val16, ha + 4, 2);
+ val = val16;
+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+ &val, &mask);
+
+ return 0;
+}
+
+static void flow_offload_ipv4_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask = ~htonl(0xffffffff);
+ __be32 addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ offset = offsetof(struct iphdr, saddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ offset = offsetof(struct iphdr, daddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+ &addr, &mask);
+}
+
+static void flow_offload_ipv4_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask = ~htonl(0xffffffff);
+ __be32 addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ offset = offsetof(struct iphdr, daddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ offset = offsetof(struct iphdr, saddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+ &addr, &mask);
+}
+
+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
+ unsigned int offset,
+ const __be32 *addr, const __be32 *mask)
+{
+ struct flow_action_entry *entry;
+ int i, j;
+
+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
+ entry = flow_action_entry_next(flow_rule);
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
+ offset + i, &addr[j], mask);
+ }
+}
+
+static void flow_offload_ipv6_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ u32 mask = ~htonl(0xffffffff);
+ const __be32 *addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
+ offset = offsetof(struct ipv6hdr, saddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
+ offset = offsetof(struct ipv6hdr, daddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
+}
+
+static void flow_offload_ipv6_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ u32 mask = ~htonl(0xffffffff);
+ const __be32 *addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
+ offset = offsetof(struct ipv6hdr, daddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
+ offset = offsetof(struct ipv6hdr, saddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
+}
+
+static int flow_offload_l4proto(const struct flow_offload *flow)
+{
+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+ u8 type = 0;
+
+ switch (protonum) {
+ case IPPROTO_TCP:
+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
+ break;
+ case IPPROTO_UDP:
+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
+ break;
+ default:
+ break;
+ }
+
+ return type;
+}
+
+static void flow_offload_port_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask, port;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ port = htonl(port << 16);
+ mask = ~htonl(0xffff0000);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ port = htonl(port);
+ mask = ~htonl(0xffff);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+ &port, &mask);
+}
+
+static void flow_offload_port_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask, port;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ port = htonl(port);
+ mask = ~htonl(0xffff);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ port = htonl(port << 16);
+ mask = ~htonl(0xffff0000);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+ &port, &mask);
+}
+
+static void flow_offload_ipv4_checksum(struct net *net,
+ const struct flow_offload *flow,
+ struct nf_flow_rule *flow_rule)
+{
+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+
+ entry->id = FLOW_ACTION_CSUM;
+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
+
+ switch (protonum) {
+ case IPPROTO_TCP:
+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
+ break;
+ case IPPROTO_UDP:
+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
+ break;
+ }
+}
+
+static void flow_offload_redirect(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *this_tuple, *other_tuple;
+ struct flow_action_entry *entry;
+ struct net_device *dev;
+ int ifindex;
+
+ this_tuple = &flow->tuplehash[dir].tuple;
+ switch (this_tuple->xmit_type) {
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ this_tuple = &flow->tuplehash[dir].tuple;
+ ifindex = this_tuple->out.hw_ifidx;
+ break;
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ ifindex = other_tuple->iifidx;
+ break;
+ default:
+ return;
+ }
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return;
+
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_REDIRECT;
+ entry->dev = dev;
+}
+
+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *this_tuple;
+ struct flow_action_entry *entry;
+ struct dst_entry *dst;
+
+ this_tuple = &flow->tuplehash[dir].tuple;
+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+ return;
+
+ dst = this_tuple->dst_cache;
+ if (dst && dst->lwtstate) {
+ struct ip_tunnel_info *tun_info;
+
+ tun_info = lwt_tun_info(dst->lwtstate);
+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+ entry->tunnel = tun_info;
+ }
+ }
+}
+
+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *other_tuple;
+ struct flow_action_entry *entry;
+ struct dst_entry *dst;
+
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+ return;
+
+ dst = other_tuple->dst_cache;
+ if (dst && dst->lwtstate) {
+ struct ip_tunnel_info *tun_info;
+
+ tun_info = lwt_tun_info(dst->lwtstate);
+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
+ }
+ }
+}
+
+static int
+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *other_tuple;
+ const struct flow_offload_tuple *tuple;
+ int i;
+
+ flow_offload_decap_tunnel(flow, dir, flow_rule);
+ flow_offload_encap_tunnel(flow, dir, flow_rule);
+
+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+ return -1;
+
+ tuple = &flow->tuplehash[dir].tuple;
+
+ for (i = 0; i < tuple->encap_num; i++) {
+ struct flow_action_entry *entry;
+
+ if (tuple->in_vlan_ingress & BIT(i))
+ continue;
+
+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_VLAN_POP;
+ }
+ }
+
+ other_tuple = &flow->tuplehash[!dir].tuple;
+
+ for (i = 0; i < other_tuple->encap_num; i++) {
+ struct flow_action_entry *entry;
+
+ if (other_tuple->in_vlan_ingress & BIT(i))
+ continue;
+
+ entry = flow_action_entry_next(flow_rule);
+
+ switch (other_tuple->encap[i].proto) {
+ case htons(ETH_P_PPP_SES):
+ entry->id = FLOW_ACTION_PPPOE_PUSH;
+ entry->pppoe.sid = other_tuple->encap[i].id;
+ break;
+ case htons(ETH_P_8021Q):
+ entry->id = FLOW_ACTION_VLAN_PUSH;
+ entry->vlan.vid = other_tuple->encap[i].id;
+ entry->vlan.proto = other_tuple->encap[i].proto;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
+ return -1;
+
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
+ flow_offload_port_snat(net, flow, dir, flow_rule);
+ }
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
+ flow_offload_port_dnat(net, flow, dir, flow_rule);
+ }
+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
+ test_bit(NF_FLOW_DNAT, &flow->flags))
+ flow_offload_ipv4_checksum(net, flow, flow_rule);
+
+ flow_offload_redirect(net, flow, dir, flow_rule);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
+
+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
+ return -1;
+
+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
+ flow_offload_port_snat(net, flow, dir, flow_rule);
+ }
+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
+ flow_offload_port_dnat(net, flow, dir, flow_rule);
+ }
+
+ flow_offload_redirect(net, flow, dir, flow_rule);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
+
+#define NF_FLOW_RULE_ACTION_MAX 16
+
+static struct nf_flow_rule *
+nf_flow_offload_rule_alloc(struct net *net,
+ const struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir)
+{
+ const struct nf_flowtable *flowtable = offload->flowtable;
+ const struct flow_offload_tuple *tuple, *other_tuple;
+ const struct flow_offload *flow = offload->flow;
+ struct dst_entry *other_dst = NULL;
+ struct nf_flow_rule *flow_rule;
+ int err = -ENOMEM;
+
+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
+ if (!flow_rule)
+ goto err_flow;
+
+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
+ if (!flow_rule->rule)
+ goto err_flow_rule;
+
+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
+ flow_rule->rule->match.mask = &flow_rule->match.mask;
+ flow_rule->rule->match.key = &flow_rule->match.key;
+
+ tuple = &flow->tuplehash[dir].tuple;
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
+ other_dst = other_tuple->dst_cache;
+
+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
+ if (err < 0)
+ goto err_flow_match;
+
+ flow_rule->rule->action.num_entries = 0;
+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
+ goto err_flow_match;
+
+ return flow_rule;
+
+err_flow_match:
+ kfree(flow_rule->rule);
+err_flow_rule:
+ kfree(flow_rule);
+err_flow:
+ return NULL;
+}
+
+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry;
+ int i;
+
+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
+ entry = &flow_rule->rule->action.entries[i];
+ if (entry->id != FLOW_ACTION_REDIRECT)
+ continue;
+
+ dev_put(entry->dev);
+ }
+ kfree(flow_rule->rule);
+ kfree(flow_rule);
+}
+
+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
+{
+ int i;
+
+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
+ __nf_flow_offload_destroy(flow_rule[i]);
+}
+
+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule[])
+{
+ struct net *net = read_pnet(&offload->flowtable->net);
+
+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
+ FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (!flow_rule[0])
+ return -ENOMEM;
+
+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
+ FLOW_OFFLOAD_DIR_REPLY);
+ if (!flow_rule[1]) {
+ __nf_flow_offload_destroy(flow_rule[0]);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
+ __be16 proto, int priority,
+ enum flow_cls_command cmd,
+ const struct flow_offload_tuple *tuple,
+ struct netlink_ext_ack *extack)
+{
+ cls_flow->common.protocol = proto;
+ cls_flow->common.prio = priority;
+ cls_flow->common.extack = extack;
+ cls_flow->command = cmd;
+ cls_flow->cookie = (unsigned long)tuple;
+}
+
+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
+ struct flow_offload *flow,
+ struct nf_flow_rule *flow_rule,
+ enum flow_offload_tuple_dir dir,
+ int priority, int cmd,
+ struct flow_stats *stats,
+ struct list_head *block_cb_list)
+{
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+ int err, i = 0;
+
+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
+ &flow->tuplehash[dir].tuple, &extack);
+ if (cmd == FLOW_CLS_REPLACE)
+ cls_flow.rule = flow_rule->rule;
+
+ down_read(&flowtable->flow_block_lock);
+ list_for_each_entry(block_cb, block_cb_list, list) {
+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
+ block_cb->cb_priv);
+ if (err < 0)
+ continue;
+
+ i++;
+ }
+ up_read(&flowtable->flow_block_lock);
+
+ if (cmd == FLOW_CLS_STATS)
+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
+
+ return i;
+}
+
+static int flow_offload_tuple_add(struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule,
+ enum flow_offload_tuple_dir dir)
+{
+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
+ flow_rule, dir, offload->priority,
+ FLOW_CLS_REPLACE, NULL,
+ &offload->flowtable->flow_block.cb_list);
+}
+
+static void flow_offload_tuple_del(struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir)
+{
+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
+ offload->priority, FLOW_CLS_DESTROY, NULL,
+ &offload->flowtable->flow_block.cb_list);
+}
+
+static int flow_offload_rule_add(struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule[])
+{
+ int ok_count = 0;
+
+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
+ FLOW_OFFLOAD_DIR_ORIGINAL);
+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+ FLOW_OFFLOAD_DIR_REPLY);
+ if (ok_count == 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+static void flow_offload_work_add(struct flow_offload_work *offload)
+{
+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
+ int err;
+
+ err = nf_flow_offload_alloc(offload, flow_rule);
+ if (err < 0)
+ return;
+
+ err = flow_offload_rule_add(offload, flow_rule);
+ if (err < 0)
+ goto out;
+
+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+
+out:
+ nf_flow_offload_destroy(flow_rule);
+}
+
+static void flow_offload_work_del(struct flow_offload_work *offload)
+{
+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
+}
+
+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir,
+ struct flow_stats *stats)
+{
+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
+ offload->priority, FLOW_CLS_STATS, stats,
+ &offload->flowtable->flow_block.cb_list);
+}
+
+static void flow_offload_work_stats(struct flow_offload_work *offload)
+{
+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
+ u64 lastused;
+
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
+
+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
+ lastused + flow_offload_get_timeout(offload->flow));
+
+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
+ if (stats[0].pkts)
+ nf_ct_acct_add(offload->flow->ct,
+ FLOW_OFFLOAD_DIR_ORIGINAL,
+ stats[0].pkts, stats[0].bytes);
+ if (stats[1].pkts)
+ nf_ct_acct_add(offload->flow->ct,
+ FLOW_OFFLOAD_DIR_REPLY,
+ stats[1].pkts, stats[1].bytes);
+ }
+}
+
+static void flow_offload_work_handler(struct work_struct *work)
+{
+ struct flow_offload_work *offload;
+
+ offload = container_of(work, struct flow_offload_work, work);
+ switch (offload->cmd) {
+ case FLOW_CLS_REPLACE:
+ flow_offload_work_add(offload);
+ break;
+ case FLOW_CLS_DESTROY:
+ flow_offload_work_del(offload);
+ break;
+ case FLOW_CLS_STATS:
+ flow_offload_work_stats(offload);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
+ kfree(offload);
+}
+
+static void flow_offload_queue_work(struct flow_offload_work *offload)
+{
+ if (offload->cmd == FLOW_CLS_REPLACE)
+ queue_work(nf_flow_offload_add_wq, &offload->work);
+ else if (offload->cmd == FLOW_CLS_DESTROY)
+ queue_work(nf_flow_offload_del_wq, &offload->work);
+ else
+ queue_work(nf_flow_offload_stats_wq, &offload->work);
+}
+
+static struct flow_offload_work *
+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
+ struct flow_offload *flow, unsigned int cmd)
+{
+ struct flow_offload_work *offload;
+
+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
+ return NULL;
+
+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload) {
+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
+ return NULL;
+ }
+
+ offload->cmd = cmd;
+ offload->flow = flow;
+ offload->priority = flowtable->priority;
+ offload->flowtable = flowtable;
+ INIT_WORK(&offload->work, flow_offload_work_handler);
+
+ return offload;
+}
+
+
+void nf_flow_offload_add(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+
+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
+ if (!offload)
+ return;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_del(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+
+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
+ if (!offload)
+ return;
+
+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+ __s32 delta;
+
+ delta = nf_flow_timeout_delta(flow->timeout);
+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
+ return;
+
+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
+ if (!offload)
+ return;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
+{
+ if (nf_flowtable_hw_offload(flowtable)) {
+ flush_workqueue(nf_flow_offload_add_wq);
+ flush_workqueue(nf_flow_offload_del_wq);
+ flush_workqueue(nf_flow_offload_stats_wq);
+ }
+}
+
+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
+ struct flow_block_offload *bo,
+ enum flow_block_command cmd)
+{
+ struct flow_block_cb *block_cb, *next;
+ int err = 0;
+
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
+ struct net *net,
+ enum flow_block_command cmd,
+ struct nf_flowtable *flowtable,
+ struct netlink_ext_ack *extack)
+{
+ memset(bo, 0, sizeof(*bo));
+ bo->net = net;
+ bo->block = &flowtable->flow_block;
+ bo->command = cmd;
+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo->extack = extack;
+ INIT_LIST_HEAD(&bo->cb_list);
+}
+
+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
+ struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd,
+ struct netlink_ext_ack *extack)
+{
+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+ extack);
+ flow_indr_block_call(dev, bo, cmd);
+
+ if (list_empty(&bo->cb_list))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
+ struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+ extack);
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
+ int err;
+
+ if (!nf_flowtable_hw_offload(flowtable))
+ return 0;
+
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
+ &extack);
+ else
+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
+ &extack);
+ if (err < 0)
+ return err;
+
+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
+
+int nf_flow_table_offload_init(void)
+{
+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!nf_flow_offload_add_wq)
+ return -ENOMEM;
+
+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!nf_flow_offload_del_wq)
+ goto err_del_wq;
+
+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!nf_flow_offload_stats_wq)
+ goto err_stats_wq;
+
+ return 0;
+
+err_stats_wq:
+ destroy_workqueue(nf_flow_offload_del_wq);
+err_del_wq:
+ destroy_workqueue(nf_flow_offload_add_wq);
+ return -ENOMEM;
+}
+
+void nf_flow_table_offload_exit(void)
+{
+ destroy_workqueue(nf_flow_offload_add_wq);
+ destroy_workqueue(nf_flow_offload_del_wq);
+ destroy_workqueue(nf_flow_offload_stats_wq);
+}
diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
new file mode 100644
index 000000000..ae1eb2656
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct xt_flowoffload_hook {
+ struct hlist_node list;
+ struct nf_hook_ops ops;
+ struct net *net;
+ bool registered;
+ bool used;
+};
+
+struct xt_flowoffload_table {
+ struct nf_flowtable ft;
+ struct hlist_head hooks;
+ struct delayed_work work;
+};
+
+struct nf_forward_info {
+ const struct net_device *indev;
+ const struct net_device *outdev;
+ const struct net_device *hw_outdev;
+ struct id {
+ __u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+ u8 num_encaps;
+ u8 ingress_vlans;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ enum flow_offload_xmit_type xmit_type;
+};
+
+static DEFINE_SPINLOCK(hooks_lock);
+
+struct xt_flowoffload_table flowtable[2];
+
+static unsigned int
+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct vlan_ethhdr *veth;
+ __be16 proto;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ break;
+ default:
+ proto = skb->protocol;
+ break;
+ }
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
+ }
+
+ return NF_ACCEPT;
+}
+
+static int
+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
+ struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+ struct nf_hook_ops *ops;
+
+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
+ if (!hook)
+ return -ENOMEM;
+
+ ops = &hook->ops;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = NF_NETDEV_INGRESS;
+ ops->priority = 10;
+ ops->priv = &table->ft;
+ ops->hook = xt_flowoffload_net_hook;
+ ops->dev = dev;
+
+ hlist_add_head(&hook->list, &table->hooks);
+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
+
+ return 0;
+}
+
+static struct xt_flowoffload_hook *
+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
+ struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->ops.dev == dev)
+ return hook;
+ }
+
+ return NULL;
+}
+
+static void
+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
+ struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+
+ if (!dev)
+ return;
+
+ spin_lock_bh(&hooks_lock);
+ hook = flow_offload_lookup_hook(table, dev);
+ if (hook)
+ hook->used = true;
+ else
+ xt_flowoffload_create_hook(table, dev);
+ spin_unlock_bh(&hooks_lock);
+}
+
+static void
+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
+{
+ struct xt_flowoffload_hook *hook;
+
+restart:
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->registered)
+ continue;
+
+ hook->registered = true;
+ hook->net = dev_net(hook->ops.dev);
+ spin_unlock_bh(&hooks_lock);
+ nf_register_net_hook(hook->net, &hook->ops);
+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
+ table->ft.type->setup(&table->ft, hook->ops.dev,
+ FLOW_BLOCK_BIND);
+ spin_lock_bh(&hooks_lock);
+ goto restart;
+ }
+
+}
+
+static bool
+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
+{
+ struct xt_flowoffload_hook *hook;
+ bool active = false;
+
+restart:
+ spin_lock_bh(&hooks_lock);
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->used || !hook->registered) {
+ active = true;
+ continue;
+ }
+
+ hlist_del(&hook->list);
+ spin_unlock_bh(&hooks_lock);
+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
+ table->ft.type->setup(&table->ft, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+ nf_unregister_net_hook(hook->net, &hook->ops);
+ kfree(hook);
+ goto restart;
+ }
+ spin_unlock_bh(&hooks_lock);
+
+ return active;
+}
+
+static void
+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
+{
+ struct xt_flowoffload_table *table = data;
+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
+ struct xt_flowoffload_hook *hook;
+
+ spin_lock_bh(&hooks_lock);
+ hlist_for_each_entry(hook, &table->hooks, list) {
+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
+ hook->ops.dev->ifindex != tuple1->iifidx)
+ continue;
+
+ hook->used = true;
+ }
+ spin_unlock_bh(&hooks_lock);
+}
+
+static void
+xt_flowoffload_hook_work(struct work_struct *work)
+{
+ struct xt_flowoffload_table *table;
+ struct xt_flowoffload_hook *hook;
+ int err;
+
+ table = container_of(work, struct xt_flowoffload_table, work.work);
+
+ spin_lock_bh(&hooks_lock);
+ xt_flowoffload_register_hooks(table);
+ hlist_for_each_entry(hook, &table->hooks, list)
+ hook->used = false;
+ spin_unlock_bh(&hooks_lock);
+
+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
+ table);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ if (!xt_flowoffload_cleanup_hooks(table))
+ return;
+
+out:
+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
+}
+
+static bool
+xt_flowoffload_skip(struct sk_buff *skb, int family)
+{
+ if (skb_sec_path(skb))
+ return true;
+
+ if (family == NFPROTO_IPV4) {
+ const struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ }
+
+ return false;
+}
+
+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
+{
+ if (dst_xfrm(dst))
+ return FLOW_OFFLOAD_XMIT_XFRM;
+
+ return FLOW_OFFLOAD_XMIT_NEIGH;
+}
+
+static void nf_default_forward_path(struct nf_flow_route *route,
+ struct dst_entry *dst_cache,
+ enum ip_conntrack_dir dir,
+ struct net_device **dev)
+{
+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
+ route->tuple[dir].dst = dst_cache;
+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
+}
+
+static bool nf_is_valid_ether_device(const struct net_device *dev)
+{
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+ return false;
+
+ return true;
+}
+
+static void nf_dev_path_info(const struct net_device_path_stack *stack,
+ struct nf_forward_info *info,
+ unsigned char *ha)
+{
+ const struct net_device_path *path;
+ int i;
+
+ memcpy(info->h_dest, ha, ETH_ALEN);
+
+ for (i = 0; i < stack->num_paths; i++) {
+ path = &stack->path[i];
+
+ info->indev = path->dev;
+
+ switch (path->type) {
+ case DEV_PATH_ETHERNET:
+ case DEV_PATH_DSA:
+ case DEV_PATH_VLAN:
+ case DEV_PATH_PPPOE:
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ if (path->type == DEV_PATH_ETHERNET)
+ break;
+ if (path->type == DEV_PATH_DSA) {
+ i = stack->num_paths;
+ break;
+ }
+
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+ info->indev = NULL;
+ break;
+ }
+ if (!info->outdev)
+ info->outdev = path->dev;
+ info->encap[info->num_encaps].id = path->encap.id;
+ info->encap[info->num_encaps].proto = path->encap.proto;
+ info->num_encaps++;
+ if (path->type == DEV_PATH_PPPOE)
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+ break;
+ case DEV_PATH_BRIDGE:
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ switch (path->bridge.vlan_mode) {
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
+ info->ingress_vlans |= BIT(info->num_encaps - 1);
+ break;
+ case DEV_PATH_BR_VLAN_TAG:
+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
+ info->num_encaps++;
+ break;
+ case DEV_PATH_BR_VLAN_UNTAG:
+ info->num_encaps--;
+ break;
+ case DEV_PATH_BR_VLAN_KEEP:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ if (!info->outdev)
+ info->outdev = info->indev;
+
+ info->hw_outdev = info->indev;
+
+ if (nf_is_valid_ether_device(info->indev))
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+}
+
+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
+ const struct dst_entry *dst_cache,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir, u8 *ha,
+ struct net_device_path_stack *stack)
+{
+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
+ struct net_device *dev = dst_cache->dev;
+ struct neighbour *n;
+ u8 nud_state;
+
+ if (!nf_is_valid_ether_device(dev))
+ goto out;
+
+ n = dst_neigh_lookup(dst_cache, daddr);
+ if (!n)
+ return -1;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(ha, n->ha);
+ read_unlock_bh(&n->lock);
+ neigh_release(n);
+
+ if (!(nud_state & NUD_VALID))
+ return -1;
+
+out:
+ return dev_fill_forward_path(dev, ha, stack);
+}
+
+static int nf_dev_forward_path(struct nf_flow_route *route,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ struct net_device **devs)
+{
+ const struct dst_entry *dst = route->tuple[dir].dst;
+ struct net_device_path_stack stack;
+ struct nf_forward_info info = {};
+ unsigned char ha[ETH_ALEN];
+ int i;
+
+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+ nf_dev_path_info(&stack, &info, ha);
+
+ devs[!dir] = (struct net_device *)info.indev;
+ if (!info.indev)
+ return -1;
+
+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
+ for (i = 0; i < info.num_encaps; i++) {
+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
+ }
+ route->tuple[!dir].in.num_encaps = info.num_encaps;
+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
+
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
+ route->tuple[dir].xmit_type = info.xmit_type;
+ }
+
+ return 0;
+}
+
+static int
+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ const struct xt_action_param *par, int ifindex,
+ struct net_device **devs)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (xt_family(par)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+ fl.u.ip4.flowi4_oif = ifindex;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
+ fl.u.ip6.flowi6_oif = ifindex;
+ break;
+ }
+
+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
+ if (!dst)
+ return -ENOENT;
+
+ nf_default_forward_path(route, dst, dir, devs);
+
+ return 0;
+}
+
+static int
+xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
+ const struct xt_action_param *par,
+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
+ struct net_device **devs)
+{
+ int ret;
+
+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
+ devs[dir]->ifindex,
+ devs);
+ if (ret)
+ return ret;
+
+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
+ devs[!dir]->ifindex,
+ devs);
+ if (ret)
+ return ret;
+
+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
+ if (nf_dev_forward_path(route, ct, dir, devs))
+ return -1;
+ if (nf_dev_forward_path(route, ct, !dir, devs))
+ return -1;
+ }
+
+ return 0;
+}
+
+static unsigned int
+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct xt_flowoffload_table *table;
+ const struct xt_flowoffload_target_info *info = par->targinfo;
+ struct tcphdr _tcph, *tcph = NULL;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ struct nf_flow_route route = {};
+ struct flow_offload *flow = NULL;
+ struct net_device *devs[2] = {};
+ struct nf_conn *ct;
+ struct net *net;
+
+ if (xt_flowoffload_skip(skb, xt_family(par)))
+ return XT_CONTINUE;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return XT_CONTINUE;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+ case IPPROTO_TCP:
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ return XT_CONTINUE;
+
+ tcph = skb_header_pointer(skb, par->thoff,
+ sizeof(_tcph), &_tcph);
+ if (unlikely(!tcph || tcph->fin || tcph->rst))
+ return XT_CONTINUE;
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return XT_CONTINUE;
+ }
+
+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+ ct->status & IPS_SEQ_ADJUST)
+ return XT_CONTINUE;
+
+ if (!nf_ct_is_confirmed(ct))
+ return XT_CONTINUE;
+
+ devs[dir] = xt_out(par);
+ devs[!dir] = xt_in(par);
+
+ if (!devs[dir] || !devs[!dir])
+ return XT_CONTINUE;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return XT_CONTINUE;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
+ goto err_flow_route;
+
+ flow = flow_offload_alloc(ct);
+ if (!flow)
+ goto err_flow_alloc;
+
+ if (flow_offload_route_init(flow, &route) < 0)
+ goto err_flow_add;
+
+ if (tcph) {
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
+
+ net = read_pnet(&table->ft.net);
+ if (!net)
+ write_pnet(&table->ft.net, xt_net(par));
+
+ if (flow_offload_add(&table->ft, flow) < 0)
+ goto err_flow_add;
+
+ xt_flowoffload_check_device(table, devs[0]);
+ xt_flowoffload_check_device(table, devs[1]);
+
+ dst_release(route.tuple[!dir].dst);
+
+ return XT_CONTINUE;
+
+err_flow_add:
+ flow_offload_free(flow);
+err_flow_alloc:
+ dst_release(route.tuple[!dir].dst);
+err_flow_route:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+
+ return XT_CONTINUE;
+}
+
+static int flowoffload_chk(const struct xt_tgchk_param *par)
+{
+ struct xt_flowoffload_target_info *info = par->targinfo;
+
+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct xt_target offload_tg_reg __read_mostly = {
+ .family = NFPROTO_UNSPEC,
+ .name = "FLOWOFFLOAD",
+ .revision = 0,
+ .targetsize = sizeof(struct xt_flowoffload_target_info),
+ .usersize = sizeof(struct xt_flowoffload_target_info),
+ .checkentry = flowoffload_chk,
+ .target = flowoffload_tg,
+ .me = THIS_MODULE,
+};
+
+static int flow_offload_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct xt_flowoffload_hook *hook0, *hook1;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ spin_lock_bh(&hooks_lock);
+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
+ if (hook0)
+ hlist_del(&hook0->list);
+
+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
+ if (hook1)
+ hlist_del(&hook1->list);
+ spin_unlock_bh(&hooks_lock);
+
+ if (hook0) {
+ nf_unregister_net_hook(hook0->net, &hook0->ops);
+ kfree(hook0);
+ }
+
+ if (hook1) {
+ nf_unregister_net_hook(hook1->net, &hook1->ops);
+ kfree(hook1);
+ }
+
+ nf_flow_table_cleanup(dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = flow_offload_netdev_event,
+};
+
+static int nf_flow_rule_route_inet(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ int err;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
+ break;
+ case NFPROTO_IPV6:
+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
+ break;
+ default:
+ err = -1;
+ break;
+ }
+
+ return err;
+}
+
+static struct nf_flowtable_type flowtable_inet = {
+ .family = NFPROTO_INET,
+ .init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route_inet,
+ .free = nf_flow_table_free,
+ .hook = xt_flowoffload_net_hook,
+ .owner = THIS_MODULE,
+};
+
+static int init_flowtable(struct xt_flowoffload_table *tbl)
+{
+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
+ tbl->ft.type = &flowtable_inet;
+
+ return nf_flow_table_init(&tbl->ft);
+}
+
+static int __init xt_flowoffload_tg_init(void)
+{
+ int ret;
+
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
+
+ ret = init_flowtable(&flowtable[0]);
+ if (ret)
+ return ret;
+
+ ret = init_flowtable(&flowtable[1]);
+ if (ret)
+ goto cleanup;
+
+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
+
+ ret = xt_register_target(&offload_tg_reg);
+ if (ret)
+ goto cleanup2;
+
+ return 0;
+
+cleanup2:
+ nf_flow_table_free(&flowtable[1].ft);
+cleanup:
+ nf_flow_table_free(&flowtable[0].ft);
+ return ret;
+}
+
+static void __exit xt_flowoffload_tg_exit(void)
+{
+ xt_unregister_target(&offload_tg_reg);
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ nf_flow_table_free(&flowtable[0].ft);
+ nf_flow_table_free(&flowtable[1].ft);
+}
+
+MODULE_LICENSE("GPL");
+module_init(xt_flowoffload_tg_init);
+module_exit(xt_flowoffload_tg_exit);
--
2.18.0