| From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001 |
| From: Bo Jiao <Bo.Jiao@mediatek.com> |
| Date: Wed, 22 Jun 2022 09:42:19 +0800 |
| Subject: [PATCH 1/8] |
| 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2 |
| |
| --- |
| drivers/net/ethernet/mediatek/Makefile | 3 +- |
| drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +- |
| drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +- |
| drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++ |
| drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++ |
| .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++ |
| .../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++ |
| drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++ |
| drivers/net/ppp/ppp_generic.c | 22 + |
| drivers/net/ppp/pppoe.c | 24 + |
| include/linux/netdevice.h | 60 + |
| include/linux/ppp_channel.h | 3 + |
| include/net/dsa.h | 10 + |
| include/net/flow_offload.h | 4 + |
| include/net/ip6_route.h | 5 +- |
| .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 - |
| include/net/netfilter/nf_conntrack.h | 12 + |
| include/net/netfilter/nf_conntrack_acct.h | 11 + |
| include/net/netfilter/nf_flow_table.h | 264 +++- |
| include/net/netns/conntrack.h | 6 + |
| .../linux/netfilter/nf_conntrack_common.h | 9 +- |
| include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 + |
| net/8021q/vlan_dev.c | 21 + |
| net/bridge/br_device.c | 49 + |
| net/bridge/br_private.h | 20 + |
| net/bridge/br_vlan.c | 55 + |
| net/core/dev.c | 46 + |
| net/dsa/dsa.c | 9 + |
| net/dsa/slave.c | 41 +- |
| net/ipv4/netfilter/Kconfig | 4 +- |
| net/ipv6/ip6_output.c | 2 +- |
| net/ipv6/netfilter/Kconfig | 3 +- |
| net/ipv6/route.c | 22 +- |
| net/netfilter/Kconfig | 14 +- |
| net/netfilter/Makefile | 4 +- |
| net/netfilter/nf_conntrack_core.c | 20 +- |
| net/netfilter/nf_conntrack_proto_tcp.c | 4 + |
| net/netfilter/nf_conntrack_proto_udp.c | 4 + |
| net/netfilter/nf_conntrack_standalone.c | 34 +- |
| net/netfilter/nf_flow_table_core.c | 446 +++--- |
| net/netfilter/nf_flow_table_ip.c | 455 ++++--- |
| net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++ |
| net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++ |
| 43 files changed, 4913 insertions(+), 432 deletions(-) |
| create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c |
| create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h |
| create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c |
| create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c |
| create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h |
| create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h |
| create mode 100644 net/netfilter/nf_flow_table_offload.c |
| create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c |
| |
| diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile |
| index 13c5b4e8f..0a6af99f1 100755 |
| --- a/drivers/net/ethernet/mediatek/Makefile |
| +++ b/drivers/net/ethernet/mediatek/Makefile |
| @@ -4,5 +4,6 @@ |
| # |
| |
| obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o |
| -mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o |
| +mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \ |
| + mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o |
| obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/ |
| diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c |
| index 2b21f7ed0..819d8a0be 100755 |
| --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c |
| +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c |
| @@ -3081,6 +3081,7 @@ static int mtk_open(struct net_device *d |
| struct mtk_phylink_priv *phylink_priv = &mac->phylink_priv; |
| int err, i; |
| struct device_node *phy_node; |
| + u32 gdm_config = MTK_GDMA_TO_PDMA; |
| |
| err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0); |
| if (err) { |
| @@ -3157,7 +3158,10 @@ static int mtk_open(struct net_device *d |
| if (!phy_node && eth->xgmii->regmap_sgmii[mac->id]) |
| regmap_write(eth->xgmii->regmap_sgmii[mac->id], SGMSYS_QPHY_PWR_STATE_CTRL, 0); |
| |
| - mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA); |
| + if (eth->soc->offload_version && mtk_ppe_start(ð->ppe) == 0) |
| + gdm_config = MTK_GDMA_TO_PPE; |
| + |
| + mtk_gdm_config(eth, mac->id, gdm_config); |
| |
| return 0; |
| } |
| @@ -3238,6 +3242,9 @@ static int mtk_stop(struct net_device *d |
| |
| mtk_dma_free(eth); |
| |
| + if (eth->soc->offload_version) |
| + mtk_ppe_stop(ð->ppe); |
| + |
| return 0; |
| } |
| |
| @@ -3915,6 +3922,7 @@ static const struct net_device_ops mtk_n |
| #ifdef CONFIG_NET_POLL_CONTROLLER |
| .ndo_poll_controller = mtk_poll_controller, |
| #endif |
| + .ndo_setup_tc = mtk_eth_setup_tc, |
| }; |
| |
| static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) |
| @@ -4308,6 +4316,17 @@ static int mtk_probe(struct platform_dev |
| goto err_free_dev; |
| } |
| |
| + if (eth->soc->offload_version) { |
| + err = mtk_ppe_init(ð->ppe, eth->dev, |
| + eth->base + MTK_ETH_PPE_BASE, 2); |
| + if (err) |
| + goto err_free_dev; |
| + |
| + err = mtk_eth_offload_init(eth); |
| + if (err) |
| + goto err_free_dev; |
| + } |
| + |
| for (i = 0; i < MTK_MAX_DEVS; i++) { |
| if (!eth->netdev[i]) |
| continue; |
| @@ -4410,6 +4429,7 @@ static const struct mtk_soc_data mt2701_ |
| .required_clks = MT7623_CLKS_BITMAP, |
| .required_pctl = true, |
| .has_sram = false, |
| + .offload_version = 2, |
| .txrx = { |
| .txd_size = sizeof(struct mtk_tx_dma), |
| .rxd_size = sizeof(struct mtk_rx_dma), |
| @@ -4424,6 +4444,7 @@ static const struct mtk_soc_data mt7621_ |
| .required_clks = MT7621_CLKS_BITMAP, |
| .required_pctl = false, |
| .has_sram = false, |
| + .offload_version = 2, |
| .txrx = { |
| .txd_size = sizeof(struct mtk_tx_dma), |
| .rxd_size = sizeof(struct mtk_rx_dma), |
| @@ -4439,6 +4460,7 @@ static const struct mtk_soc_data mt7622_ |
| .required_clks = MT7622_CLKS_BITMAP, |
| .required_pctl = false, |
| .has_sram = false, |
| + .offload_version = 2, |
| .txrx = { |
| .txd_size = sizeof(struct mtk_tx_dma), |
| .rxd_size = sizeof(struct mtk_rx_dma), |
| @@ -4453,6 +4475,7 @@ static const struct mtk_soc_data mt7623_ |
| .required_clks = MT7623_CLKS_BITMAP, |
| .required_pctl = true, |
| .has_sram = false, |
| + .offload_version = 2, |
| .txrx = { |
| .txd_size = sizeof(struct mtk_tx_dma), |
| .rxd_size = sizeof(struct mtk_rx_dma), |
| diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h |
| index b6380ffeb..349f98503 100755 |
| --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h |
| +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h |
| @@ -15,6 +15,8 @@ |
| #include <linux/u64_stats_sync.h> |
| #include <linux/refcount.h> |
| #include <linux/phylink.h> |
| +#include <linux/rhashtable.h> |
| +#include "mtk_ppe.h" |
| |
| #define MTK_QDMA_PAGE_SIZE 2048 |
| #define MTK_MAX_RX_LENGTH 1536 |
| @@ -37,7 +39,8 @@ |
| NETIF_F_HW_VLAN_CTAG_TX | \ |
| NETIF_F_SG | NETIF_F_TSO | \ |
| NETIF_F_TSO6 | \ |
| - NETIF_F_IPV6_CSUM) |
| + NETIF_F_IPV6_CSUM |\ |
| + NETIF_F_HW_TC) |
| #define MTK_SET_FEATURES (NETIF_F_LRO | \ |
| NETIF_F_HW_VLAN_CTAG_RX) |
| #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM) |
| @@ -107,6 +110,7 @@ |
| #define MTK_GDMA_TCS_EN BIT(21) |
| #define MTK_GDMA_UCS_EN BIT(20) |
| #define MTK_GDMA_TO_PDMA 0x0 |
| +#define MTK_GDMA_TO_PPE 0x4444 |
| #define MTK_GDMA_DROP_ALL 0x7777 |
| |
| /* Unicast Filter MAC Address Register - Low */ |
| @@ -547,6 +551,12 @@ |
| #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK)) |
| #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff) |
| |
| +/* QDMA descriptor rxd4 */ |
| +#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0) |
| +#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14) |
| +#define MTK_RXD4_SRC_PORT GENMASK(21, 19) |
| +#define MTK_RXD4_ALG GENMASK(31, 22) |
| + |
| /* QDMA descriptor rxd4 */ |
| #define RX_DMA_L4_VALID BIT(24) |
| #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */ |
| @@ -1158,6 +1168,7 @@ struct mtk_soc_data { |
| u32 caps; |
| u32 required_clks; |
| bool required_pctl; |
| + u8 offload_version; |
| netdev_features_t hw_features; |
| bool has_sram; |
| }; |
| @@ -1271,6 +1282,9 @@ struct mtk_eth { |
| int ip_align; |
| spinlock_t syscfg0_lock; |
| struct timer_list mtk_dma_monitor_timer; |
| + |
| + struct mtk_ppe ppe; |
| + struct rhashtable flow_table; |
| }; |
| |
| /* struct mtk_mac - the structure that holds the info about the MACs of the |
| @@ -1319,4 +1333,8 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id); |
| void mtk_gdm_config(struct mtk_eth *eth, u32 config); |
| void ethsys_reset(struct mtk_eth *eth, u32 reset_bits); |
| |
| +int mtk_eth_offload_init(struct mtk_eth *eth); |
| +int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type, |
| + void *type_data); |
| + |
| int mtk_mac2xgmii_id(struct mtk_eth *eth, int mac_id); |
| diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c |
| new file mode 100644 |
| index 000000000..66298e223 |
| --- /dev/null |
| +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c |
| @@ -0,0 +1,509 @@ |
| +// SPDX-License-Identifier: GPL-2.0-only |
| +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ |
| + |
| +#include <linux/kernel.h> |
| +#include <linux/io.h> |
| +#include <linux/iopoll.h> |
| +#include <linux/etherdevice.h> |
| +#include <linux/platform_device.h> |
| +#include "mtk_ppe.h" |
| +#include "mtk_ppe_regs.h" |
| + |
| +static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val) |
| +{ |
| + writel(val, ppe->base + reg); |
| +} |
| + |
| +static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg) |
| +{ |
| + return readl(ppe->base + reg); |
| +} |
| + |
| +static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set) |
| +{ |
| + u32 val; |
| + |
| + val = ppe_r32(ppe, reg); |
| + val &= ~mask; |
| + val |= set; |
| + ppe_w32(ppe, reg, val); |
| + |
| + return val; |
| +} |
| + |
| +static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val) |
| +{ |
| + return ppe_m32(ppe, reg, 0, val); |
| +} |
| + |
| +static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val) |
| +{ |
| + return ppe_m32(ppe, reg, val, 0); |
| +} |
| + |
| +static int mtk_ppe_wait_busy(struct mtk_ppe *ppe) |
| +{ |
| + int ret; |
| + u32 val; |
| + |
| + ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val, |
| + !(val & MTK_PPE_GLO_CFG_BUSY), |
| + 20, MTK_PPE_WAIT_TIMEOUT_US); |
| + |
| + if (ret) |
| + dev_err(ppe->dev, "PPE table busy"); |
| + |
| + return ret; |
| +} |
| + |
| +static void mtk_ppe_cache_clear(struct mtk_ppe *ppe) |
| +{ |
| + ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR); |
| + ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR); |
| +} |
| + |
| +static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable) |
| +{ |
| + mtk_ppe_cache_clear(ppe); |
| + |
| + ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN, |
| + enable * MTK_PPE_CACHE_CTL_EN); |
| +} |
| + |
| +static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e) |
| +{ |
| + u32 hv1, hv2, hv3; |
| + u32 hash; |
| + |
| + switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) { |
| + case MTK_PPE_PKT_TYPE_BRIDGE: |
| + hv1 = e->bridge.src_mac_lo; |
| + hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16); |
| + hv2 = e->bridge.src_mac_hi >> 16; |
| + hv2 ^= e->bridge.dest_mac_lo; |
| + hv3 = e->bridge.dest_mac_hi; |
| + break; |
| + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: |
| + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: |
| + hv1 = e->ipv4.orig.ports; |
| + hv2 = e->ipv4.orig.dest_ip; |
| + hv3 = e->ipv4.orig.src_ip; |
| + break; |
| + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T: |
| + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T: |
| + hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3]; |
| + hv1 ^= e->ipv6.ports; |
| + |
| + hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2]; |
| + hv2 ^= e->ipv6.dest_ip[0]; |
| + |
| + hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1]; |
| + hv3 ^= e->ipv6.src_ip[0]; |
| + break; |
| + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: |
| + case MTK_PPE_PKT_TYPE_IPV6_6RD: |
| + default: |
| + WARN_ON_ONCE(1); |
| + return MTK_PPE_HASH_MASK; |
| + } |
| + |
| + hash = (hv1 & hv2) | ((~hv1) & hv3); |
| + hash = (hash >> 24) | ((hash & 0xffffff) << 8); |
| + hash ^= hv1 ^ hv2 ^ hv3; |
| + hash ^= hash >> 16; |
| + hash <<= 1; |
| + hash &= MTK_PPE_ENTRIES - 1; |
| + |
| + return hash; |
| +} |
| + |
| +static inline struct mtk_foe_mac_info * |
| +mtk_foe_entry_l2(struct mtk_foe_entry *entry) |
| +{ |
| + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); |
| + |
| + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) |
| + return &entry->ipv6.l2; |
| + |
| + return &entry->ipv4.l2; |
| +} |
| + |
| +static inline u32 * |
| +mtk_foe_entry_ib2(struct mtk_foe_entry *entry) |
| +{ |
| + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); |
| + |
| + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) |
| + return &entry->ipv6.ib2; |
| + |
| + return &entry->ipv4.ib2; |
| +} |
| + |
| +int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto, |
| + u8 pse_port, u8 *src_mac, u8 *dest_mac) |
| +{ |
| + struct mtk_foe_mac_info *l2; |
| + u32 ports_pad, val; |
| + |
| + memset(entry, 0, sizeof(*entry)); |
| + |
| + val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) | |
| + FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) | |
| + FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) | |
| + MTK_FOE_IB1_BIND_TTL | |
| + MTK_FOE_IB1_BIND_CACHE; |
| + entry->ib1 = val; |
| + |
| + val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) | |
| + FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) | |
| + FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port); |
| + |
| + if (is_multicast_ether_addr(dest_mac)) |
| + val |= MTK_FOE_IB2_MULTICAST; |
| + |
| + ports_pad = 0xa5a5a500 | (l4proto & 0xff); |
| + if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE) |
| + entry->ipv4.orig.ports = ports_pad; |
| + if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T) |
| + entry->ipv6.ports = ports_pad; |
| + |
| + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) { |
| + entry->ipv6.ib2 = val; |
| + l2 = &entry->ipv6.l2; |
| + } else { |
| + entry->ipv4.ib2 = val; |
| + l2 = &entry->ipv4.l2; |
| + } |
| + |
| + l2->dest_mac_hi = get_unaligned_be32(dest_mac); |
| + l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4); |
| + l2->src_mac_hi = get_unaligned_be32(src_mac); |
| + l2->src_mac_lo = get_unaligned_be16(src_mac + 4); |
| + |
| + if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T) |
| + l2->etype = ETH_P_IPV6; |
| + else |
| + l2->etype = ETH_P_IP; |
| + |
| + return 0; |
| +} |
| + |
| +int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port) |
| +{ |
| + u32 *ib2 = mtk_foe_entry_ib2(entry); |
| + u32 val; |
| + |
| + val = *ib2; |
| + val &= ~MTK_FOE_IB2_DEST_PORT; |
| + val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port); |
| + *ib2 = val; |
| + |
| + return 0; |
| +} |
| + |
| +int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress, |
| + __be32 src_addr, __be16 src_port, |
| + __be32 dest_addr, __be16 dest_port) |
| +{ |
| + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); |
| + struct mtk_ipv4_tuple *t; |
| + |
| + switch (type) { |
| + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: |
| + if (egress) { |
| + t = &entry->ipv4.new; |
| + break; |
| + } |
| + fallthrough; |
| + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: |
| + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: |
| + t = &entry->ipv4.orig; |
| + break; |
| + case MTK_PPE_PKT_TYPE_IPV6_6RD: |
| + entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr); |
| + entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr); |
| + return 0; |
| + default: |
| + WARN_ON_ONCE(1); |
| + return -EINVAL; |
| + } |
| + |
| + t->src_ip = be32_to_cpu(src_addr); |
| + t->dest_ip = be32_to_cpu(dest_addr); |
| + |
| + if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE) |
| + return 0; |
| + |
| + t->src_port = be16_to_cpu(src_port); |
| + t->dest_port = be16_to_cpu(dest_port); |
| + |
| + return 0; |
| +} |
| + |
| +int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry, |
| + __be32 *src_addr, __be16 src_port, |
| + __be32 *dest_addr, __be16 dest_port) |
| +{ |
| + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); |
| + u32 *src, *dest; |
| + int i; |
| + |
| + switch (type) { |
| + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: |
| + src = entry->dslite.tunnel_src_ip; |
| + dest = entry->dslite.tunnel_dest_ip; |
| + break; |
| + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T: |
| + case MTK_PPE_PKT_TYPE_IPV6_6RD: |
| + entry->ipv6.src_port = be16_to_cpu(src_port); |
| + entry->ipv6.dest_port = be16_to_cpu(dest_port); |
| + fallthrough; |
| + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T: |
| + src = entry->ipv6.src_ip; |
| + dest = entry->ipv6.dest_ip; |
| + break; |
| + default: |
| + WARN_ON_ONCE(1); |
| + return -EINVAL; |
| + } |
| + |
| + for (i = 0; i < 4; i++) |
| + src[i] = be32_to_cpu(src_addr[i]); |
| + for (i = 0; i < 4; i++) |
| + dest[i] = be32_to_cpu(dest_addr[i]); |
| + |
| + return 0; |
| +} |
| + |
| +int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port) |
| +{ |
| + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); |
| + |
| + l2->etype = BIT(port); |
| + |
| + if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER)) |
| + entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1); |
| + else |
| + l2->etype |= BIT(8); |
| + |
| + entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG; |
| + |
| + return 0; |
| +} |
| + |
| +int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid) |
| +{ |
| + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); |
| + |
| + switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) { |
| + case 0: |
| + entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG | |
| + FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1); |
| + l2->vlan1 = vid; |
| + return 0; |
| + case 1: |
| + if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) { |
| + l2->vlan1 = vid; |
| + l2->etype |= BIT(8); |
| + } else { |
| + l2->vlan2 = vid; |
| + entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1); |
| + } |
| + return 0; |
| + default: |
| + return -ENOSPC; |
| + } |
| +} |
| + |
| +int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid) |
| +{ |
| + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); |
| + |
| + if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) || |
| + (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) |
| + l2->etype = ETH_P_PPP_SES; |
| + |
| + entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE; |
| + l2->pppoe_id = sid; |
| + |
| + return 0; |
| +} |
| + |
| +static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry) |
| +{ |
| + return !(entry->ib1 & MTK_FOE_IB1_STATIC) && |
| + FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND; |
| +} |
| + |
| +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, |
| + u16 timestamp) |
| +{ |
| + struct mtk_foe_entry *hwe; |
| + u32 hash; |
| + |
| + timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP; |
| + entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP; |
| + entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp); |
| + |
| + hash = mtk_ppe_hash_entry(entry); |
| + hwe = &ppe->foe_table[hash]; |
| + if (!mtk_foe_entry_usable(hwe)) { |
| + hwe++; |
| + hash++; |
| + |
| + if (!mtk_foe_entry_usable(hwe)) |
| + return -ENOSPC; |
| + } |
| + |
| + memcpy(&hwe->data, &entry->data, sizeof(hwe->data)); |
| + wmb(); |
| + hwe->ib1 = entry->ib1; |
| + |
| + dma_wmb(); |
| + |
| + mtk_ppe_cache_clear(ppe); |
| + |
| + return hash; |
| +} |
| + |
| +int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base, |
| + int version) |
| +{ |
| + struct mtk_foe_entry *foe; |
| + |
| + /* need to allocate a separate device, since it PPE DMA access is |
| + * not coherent. |
| + */ |
| + ppe->base = base; |
| + ppe->dev = dev; |
| + ppe->version = version; |
| + |
| + foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe), |
| + &ppe->foe_phys, GFP_KERNEL); |
| + if (!foe) |
| + return -ENOMEM; |
| + |
| + ppe->foe_table = foe; |
| + |
| + mtk_ppe_debugfs_init(ppe); |
| + |
| + return 0; |
| +} |
| + |
| +static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) |
| +{ |
| + static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 }; |
| + int i, k; |
| + |
| + memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table)); |
| + |
| + if (!IS_ENABLED(CONFIG_SOC_MT7621)) |
| + return; |
| + |
| + /* skip all entries that cross the 1024 byte boundary */ |
| + for (i = 0; i < MTK_PPE_ENTRIES; i += 128) |
| + for (k = 0; k < ARRAY_SIZE(skip); k++) |
| + ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC; |
| +} |
| + |
| +int mtk_ppe_start(struct mtk_ppe *ppe) |
| +{ |
| + u32 val; |
| + |
| + mtk_ppe_init_foe_table(ppe); |
| + ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys); |
| + |
| + val = MTK_PPE_TB_CFG_ENTRY_80B | |
| + MTK_PPE_TB_CFG_AGE_NON_L4 | |
| + MTK_PPE_TB_CFG_AGE_UNBIND | |
| + MTK_PPE_TB_CFG_AGE_TCP | |
| + MTK_PPE_TB_CFG_AGE_UDP | |
| + MTK_PPE_TB_CFG_AGE_TCP_FIN | |
| + FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS, |
| + MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) | |
| + FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE, |
| + MTK_PPE_KEEPALIVE_DISABLE) | |
| + FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) | |
| + FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE, |
| + MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) | |
| + FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM, |
| + MTK_PPE_ENTRIES_SHIFT); |
| + ppe_w32(ppe, MTK_PPE_TB_CFG, val); |
| + |
| + ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK, |
| + MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6); |
| + |
| + mtk_ppe_cache_enable(ppe, true); |
| + |
| + val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG | |
| + MTK_PPE_FLOW_CFG_IP4_UDP_FRAG | |
| + MTK_PPE_FLOW_CFG_IP6_3T_ROUTE | |
| + MTK_PPE_FLOW_CFG_IP6_5T_ROUTE | |
| + MTK_PPE_FLOW_CFG_IP6_6RD | |
| + MTK_PPE_FLOW_CFG_IP4_NAT | |
| + MTK_PPE_FLOW_CFG_IP4_NAPT | |
| + MTK_PPE_FLOW_CFG_IP4_DSLITE | |
| + MTK_PPE_FLOW_CFG_L2_BRIDGE | |
| + MTK_PPE_FLOW_CFG_IP4_NAT_FRAG; |
| + ppe_w32(ppe, MTK_PPE_FLOW_CFG, val); |
| + |
| + val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) | |
| + FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3); |
| + ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val); |
| + |
| + val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) | |
| + FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1); |
| + ppe_w32(ppe, MTK_PPE_BIND_AGE0, val); |
| + |
| + val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) | |
| + FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30); |
| + ppe_w32(ppe, MTK_PPE_BIND_AGE1, val); |
| + |
| + val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF; |
| + ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val); |
| + |
| + val = MTK_PPE_BIND_LIMIT1_FULL | |
| + FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1); |
| + ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val); |
| + |
| + val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) | |
| + FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1); |
| + ppe_w32(ppe, MTK_PPE_BIND_RATE, val); |
| + |
| + /* enable PPE */ |
| + val = MTK_PPE_GLO_CFG_EN | |
| + MTK_PPE_GLO_CFG_IP4_L4_CS_DROP | |
| + MTK_PPE_GLO_CFG_IP4_CS_DROP | |
| + MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE; |
| + ppe_w32(ppe, MTK_PPE_GLO_CFG, val); |
| + |
| + ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0); |
| + |
| + return 0; |
| +} |
| + |
| +int mtk_ppe_stop(struct mtk_ppe *ppe) |
| +{ |
| + u32 val; |
| + int i; |
| + |
| + for (i = 0; i < MTK_PPE_ENTRIES; i++) |
| + ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE, |
| + MTK_FOE_STATE_INVALID); |
| + |
| + mtk_ppe_cache_enable(ppe, false); |
| + |
| + /* disable offload engine */ |
| + ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); |
| + ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); |
| + |
| + /* disable aging */ |
| + val = MTK_PPE_TB_CFG_AGE_NON_L4 | |
| + MTK_PPE_TB_CFG_AGE_UNBIND | |
| + MTK_PPE_TB_CFG_AGE_TCP | |
| + MTK_PPE_TB_CFG_AGE_UDP | |
| + MTK_PPE_TB_CFG_AGE_TCP_FIN; |
| + ppe_clear(ppe, MTK_PPE_TB_CFG, val); |
| + |
| + return mtk_ppe_wait_busy(ppe); |
| +} |
| diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h |
| new file mode 100644 |
| index 000000000..242fb8f2a |
| --- /dev/null |
| +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h |
| @@ -0,0 +1,288 @@ |
| +// SPDX-License-Identifier: GPL-2.0-only |
| +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ |
| + |
| +#ifndef __MTK_PPE_H |
| +#define __MTK_PPE_H |
| + |
| +#include <linux/kernel.h> |
| +#include <linux/bitfield.h> |
| + |
| +#define MTK_ETH_PPE_BASE 0xc00 |
| + |
| +#define MTK_PPE_ENTRIES_SHIFT 3 |
| +#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT) |
| +#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1) |
| +#define MTK_PPE_WAIT_TIMEOUT_US 1000000 |
| + |
| +#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0) |
| +#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8) |
| +#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24) |
| + |
| +#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0) |
| +#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15) |
| +#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16) |
| +#define MTK_FOE_IB1_BIND_PPPOE BIT(19) |
| +#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20) |
| +#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21) |
| +#define MTK_FOE_IB1_BIND_CACHE BIT(22) |
| +#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23) |
| +#define MTK_FOE_IB1_BIND_TTL BIT(24) |
| + |
| +#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25) |
| +#define MTK_FOE_IB1_STATE GENMASK(29, 28) |
| +#define MTK_FOE_IB1_UDP BIT(30) |
| +#define MTK_FOE_IB1_STATIC BIT(31) |
| + |
| +enum { |
| + MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0, |
| + MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1, |
| + MTK_PPE_PKT_TYPE_BRIDGE = 2, |
| + MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3, |
| + MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4, |
| + MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5, |
| + MTK_PPE_PKT_TYPE_IPV6_6RD = 7, |
| +}; |
| + |
| +#define MTK_FOE_IB2_QID GENMASK(3, 0) |
| +#define MTK_FOE_IB2_PSE_QOS BIT(4) |
| +#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5) |
| +#define MTK_FOE_IB2_MULTICAST BIT(8) |
| + |
| +#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12) |
| +#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16) |
| +#define MTK_FOE_IB2_WHNAT_NAT BIT(17) |
| + |
| +#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12) |
| + |
| +#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18) |
| + |
| +#define MTK_FOE_IB2_DSCP GENMASK(31, 24) |
| + |
| +#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0) |
| +#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6) |
| +#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14) |
| + |
| +enum { |
| + MTK_FOE_STATE_INVALID, |
| + MTK_FOE_STATE_UNBIND, |
| + MTK_FOE_STATE_BIND, |
| + MTK_FOE_STATE_FIN |
| +}; |
| + |
| +struct mtk_foe_mac_info { |
| + u16 vlan1; |
| + u16 etype; |
| + |
| + u32 dest_mac_hi; |
| + |
| + u16 vlan2; |
| + u16 dest_mac_lo; |
| + |
| + u32 src_mac_hi; |
| + |
| + u16 pppoe_id; |
| + u16 src_mac_lo; |
| +}; |
| + |
| +struct mtk_foe_bridge { |
| + u32 dest_mac_hi; |
| + |
| + u16 src_mac_lo; |
| + u16 dest_mac_lo; |
| + |
| + u32 src_mac_hi; |
| + |
| + u32 ib2; |
| + |
| + u32 _rsv[5]; |
| + |
| + u32 udf_tsid; |
| + struct mtk_foe_mac_info l2; |
| +}; |
| + |
| +struct mtk_ipv4_tuple { |
| + u32 src_ip; |
| + u32 dest_ip; |
| + union { |
| + struct { |
| + u16 dest_port; |
| + u16 src_port; |
| + }; |
| + struct { |
| + u8 protocol; |
| + u8 _pad[3]; /* fill with 0xa5a5a5 */ |
| + }; |
| + u32 ports; |
| + }; |
| +}; |
| + |
| +struct mtk_foe_ipv4 { |
| + struct mtk_ipv4_tuple orig; |
| + |
| + u32 ib2; |
| + |
| + struct mtk_ipv4_tuple new; |
| + |
| + u16 timestamp; |
| + u16 _rsv0[3]; |
| + |
| + u32 udf_tsid; |
| + |
| + struct mtk_foe_mac_info l2; |
| +}; |
| + |
| +struct mtk_foe_ipv4_dslite { |
| + struct mtk_ipv4_tuple ip4; |
| + |
| + u32 tunnel_src_ip[4]; |
| + u32 tunnel_dest_ip[4]; |
| + |
| + u8 flow_label[3]; |
| + u8 priority; |
| + |
| + u32 udf_tsid; |
| + |
| + u32 ib2; |
| + |
| + struct mtk_foe_mac_info l2; |
| +}; |
| + |
| +struct mtk_foe_ipv6 { |
| + u32 src_ip[4]; |
| + u32 dest_ip[4]; |
| + |
| + union { |
| + struct { |
| + u8 protocol; |
| + u8 _pad[3]; /* fill with 0xa5a5a5 */ |
| + }; /* 3-tuple */ |
| + struct { |
| + u16 dest_port; |
| + u16 src_port; |
| + }; /* 5-tuple */ |
| + u32 ports; |
| + }; |
| + |
| + u32 _rsv[3]; |
| + |
| + u32 udf; |
| + |
| + u32 ib2; |
| + struct mtk_foe_mac_info l2; |
| +}; |
| + |
| +struct mtk_foe_ipv6_6rd { |
| + u32 src_ip[4]; |
| + u32 dest_ip[4]; |
| + u16 dest_port; |
| + u16 src_port; |
| + |
| + u32 tunnel_src_ip; |
| + u32 tunnel_dest_ip; |
| + |
| + u16 hdr_csum; |
| + u8 dscp; |
| + u8 ttl; |
| + |
| + u8 flag; |
| + u8 pad; |
| + u8 per_flow_6rd_id; |
| + u8 pad2; |
| + |
| + u32 ib2; |
| + struct mtk_foe_mac_info l2; |
| +}; |
| + |
| +struct mtk_foe_entry { |
| + u32 ib1; |
| + |
| + union { |
| + struct mtk_foe_bridge bridge; |
| + struct mtk_foe_ipv4 ipv4; |
| + struct mtk_foe_ipv4_dslite dslite; |
| + struct mtk_foe_ipv6 ipv6; |
| + struct mtk_foe_ipv6_6rd ipv6_6rd; |
| + u32 data[19]; |
| + }; |
| +}; |
| + |
| +enum { |
| + MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02, |
| + MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03, |
| + MTK_PPE_CPU_REASON_NO_FLOW = 0x07, |
| + MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08, |
| + MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09, |
| + MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a, |
| + MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b, |
| + MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c, |
| + MTK_PPE_CPU_REASON_UN_HIT = 0x0d, |
| + MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e, |
| + MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, |
| + MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10, |
| + MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11, |
| + MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12, |
| + MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13, |
| + MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14, |
| + MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15, |
| + MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16, |
| + MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17, |
| + MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18, |
| + MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19, |
| + MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a, |
| + MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b, |
| + MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c, |
| + MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e, |
| + MTK_PPE_CPU_REASON_INVALID = 0x1f, |
| +}; |
| + |
| +struct mtk_ppe { |
| + struct device *dev; |
| + void __iomem *base; |
| + int version; |
| + |
| + struct mtk_foe_entry *foe_table; |
| + dma_addr_t foe_phys; |
| + |
| + void *acct_table; |
| +}; |
| + |
| +int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base, |
| + int version); |
| +int mtk_ppe_start(struct mtk_ppe *ppe); |
| +int mtk_ppe_stop(struct mtk_ppe *ppe); |
| + |
| +static inline void |
| +mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash) |
| +{ |
| + ppe->foe_table[hash].ib1 = 0; |
| + dma_wmb(); |
| +} |
| + |
| +static inline int |
| +mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash) |
| +{ |
| + u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1); |
| + |
| + if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND) |
| + return -1; |
| + |
| + return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1); |
| +} |
| + |
| +int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto, |
| + u8 pse_port, u8 *src_mac, u8 *dest_mac); |
| +int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port); |
| +int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig, |
| + __be32 src_addr, __be16 src_port, |
| + __be32 dest_addr, __be16 dest_port); |
| +int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry, |
| + __be32 *src_addr, __be16 src_port, |
| + __be32 *dest_addr, __be16 dest_port); |
| +int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port); |
| +int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid); |
| +int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid); |
| +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, |
| + u16 timestamp); |
| +int mtk_ppe_debugfs_init(struct mtk_ppe *ppe); |
| + |
| +#endif |
| diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c |
| new file mode 100644 |
| index 000000000..d4b482340 |
| --- /dev/null |
| +++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c |
| @@ -0,0 +1,214 @@ |
| +// SPDX-License-Identifier: GPL-2.0-only |
| +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ |
| + |
| +#include <linux/kernel.h> |
| +#include <linux/debugfs.h> |
| +#include "mtk_eth_soc.h" |
| + |
| +struct mtk_flow_addr_info |
| +{ |
| + void *src, *dest; |
| + u16 *src_port, *dest_port; |
| + bool ipv6; |
| +}; |
| + |
| +static const char *mtk_foe_entry_state_str(int state) |
| +{ |
| + static const char * const state_str[] = { |
| + [MTK_FOE_STATE_INVALID] = "INV", |
| + [MTK_FOE_STATE_UNBIND] = "UNB", |
| + [MTK_FOE_STATE_BIND] = "BND", |
| + [MTK_FOE_STATE_FIN] = "FIN", |
| + }; |
| + |
| + if (state >= ARRAY_SIZE(state_str) || !state_str[state]) |
| + return "UNK"; |
| + |
| + return state_str[state]; |
| +} |
| + |
| +static const char *mtk_foe_pkt_type_str(int type) |
| +{ |
| + static const char * const type_str[] = { |
| + [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T", |
| + [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T", |
| + [MTK_PPE_PKT_TYPE_BRIDGE] = "L2", |
| + [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE", |
| + [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T", |
| + [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T", |
| + [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD", |
| + }; |
| + |
| + if (type >= ARRAY_SIZE(type_str) || !type_str[type]) |
| + return "UNKNOWN"; |
| + |
| + return type_str[type]; |
| +} |
| + |
| +static void |
| +mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6) |
| +{ |
| + u32 n_addr[4]; |
| + int i; |
| + |
| + if (!ipv6) { |
| + seq_printf(m, "%pI4h", addr); |
| + return; |
| + } |
| + |
| + for (i = 0; i < ARRAY_SIZE(n_addr); i++) |
| + n_addr[i] = htonl(addr[i]); |
| + seq_printf(m, "%pI6", n_addr); |
| +} |
| + |
| +static void |
| +mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai) |
| +{ |
| + mtk_print_addr(m, ai->src, ai->ipv6); |
| + if (ai->src_port) |
| + seq_printf(m, ":%d", *ai->src_port); |
| + seq_printf(m, "->"); |
| + mtk_print_addr(m, ai->dest, ai->ipv6); |
| + if (ai->dest_port) |
| + seq_printf(m, ":%d", *ai->dest_port); |
| +} |
| + |
| +static int |
| +mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind) |
| +{ |
| + struct mtk_ppe *ppe = m->private; |
| + int i; |
| + |
| + for (i = 0; i < MTK_PPE_ENTRIES; i++) { |
| + struct mtk_foe_entry *entry = &ppe->foe_table[i]; |
| + struct mtk_foe_mac_info *l2; |
| + struct mtk_flow_addr_info ai = {}; |
| + unsigned char h_source[ETH_ALEN]; |
| + unsigned char h_dest[ETH_ALEN]; |
| + int type, state; |
| + u32 ib2; |
| + |
| + |
| + state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1); |
| + if (!state) |
| + continue; |
| + |
| + if (bind && state != MTK_FOE_STATE_BIND) |
| + continue; |
| + |
| + type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); |
| + seq_printf(m, "%05x %s %7s", i, |
| + mtk_foe_entry_state_str(state), |
| + mtk_foe_pkt_type_str(type)); |
| + |
| + switch (type) { |
| + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: |
| + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: |
| + ai.src_port = &entry->ipv4.orig.src_port; |
| + ai.dest_port = &entry->ipv4.orig.dest_port; |
| + fallthrough; |
| + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: |
| + ai.src = &entry->ipv4.orig.src_ip; |
| + ai.dest = &entry->ipv4.orig.dest_ip; |
| + break; |
| + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T: |
| + ai.src_port = &entry->ipv6.src_port; |
| + ai.dest_port = &entry->ipv6.dest_port; |
| + fallthrough; |
| + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T: |
| + case MTK_PPE_PKT_TYPE_IPV6_6RD: |
| + ai.src = &entry->ipv6.src_ip; |
| + ai.dest = &entry->ipv6.dest_ip; |
| + ai.ipv6 = true; |
| + break; |
| + } |
| + |
| + seq_printf(m, " orig="); |
| + mtk_print_addr_info(m, &ai); |
| + |
| + switch (type) { |
| + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: |
| + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: |
| + ai.src_port = &entry->ipv4.new.src_port; |
| + ai.dest_port = &entry->ipv4.new.dest_port; |
| + fallthrough; |
| + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: |
| + ai.src = &entry->ipv4.new.src_ip; |
| + ai.dest = &entry->ipv4.new.dest_ip; |
| + seq_printf(m, " new="); |
| + mtk_print_addr_info(m, &ai); |
| + break; |
| + } |
| + |
| + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) { |
| + l2 = &entry->ipv6.l2; |
| + ib2 = entry->ipv6.ib2; |
| + } else { |
| + l2 = &entry->ipv4.l2; |
| + ib2 = entry->ipv4.ib2; |
| + } |
| + |
| + *((__be32 *)h_source) = htonl(l2->src_mac_hi); |
| + *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo); |
| + *((__be32 *)h_dest) = htonl(l2->dest_mac_hi); |
| + *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo); |
| + |
| + seq_printf(m, " eth=%pM->%pM etype=%04x" |
| + " vlan=%d,%d ib1=%08x ib2=%08x\n", |
| + h_source, h_dest, ntohs(l2->etype), |
| + l2->vlan1, l2->vlan2, entry->ib1, ib2); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private) |
| +{ |
| + return mtk_ppe_debugfs_foe_show(m, private, false); |
| +} |
| + |
| +static int |
| +mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private) |
| +{ |
| + return mtk_ppe_debugfs_foe_show(m, private, true); |
| +} |
| + |
| +static int |
| +mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file) |
| +{ |
| + return single_open(file, mtk_ppe_debugfs_foe_show_all, |
| + inode->i_private); |
| +} |
| + |
| +static int |
| +mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file) |
| +{ |
| + return single_open(file, mtk_ppe_debugfs_foe_show_bind, |
| + inode->i_private); |
| +} |
| + |
| +int mtk_ppe_debugfs_init(struct mtk_ppe *ppe) |
| +{ |
| + static const struct file_operations fops_all = { |
| + .open = mtk_ppe_debugfs_foe_open_all, |
| + .read = seq_read, |
| + .llseek = seq_lseek, |
| + .release = single_release, |
| + }; |
| + |
| + static const struct file_operations fops_bind = { |
| + .open = mtk_ppe_debugfs_foe_open_bind, |
| + .read = seq_read, |
| + .llseek = seq_lseek, |
| + .release = single_release, |
| + }; |
| + |
| + struct dentry *root; |
| + |
| + root = debugfs_create_dir("mtk_ppe", NULL); |
| + debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all); |
| + debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind); |
| + |
| + return 0; |
| +} |
| diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c |
| new file mode 100644 |
| index 000000000..4294f0c74 |
| --- /dev/null |
| +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c |
| @@ -0,0 +1,541 @@ |
| +// SPDX-License-Identifier: GPL-2.0-only |
| +/* |
| + * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> |
| + */ |
| + |
| +#include <linux/if_ether.h> |
| +#include <linux/rhashtable.h> |
| +#include <linux/ip.h> |
| +#include <linux/ipv6.h> |
| +#include <net/flow_offload.h> |
| +#include <net/pkt_cls.h> |
| +#include <net/dsa.h> |
| +#include "mtk_eth_soc.h" |
| + |
| +struct mtk_flow_data { |
| + struct ethhdr eth; |
| + |
| + union { |
| + struct { |
| + __be32 src_addr; |
| + __be32 dst_addr; |
| + } v4; |
| + |
| + struct { |
| + struct in6_addr src_addr; |
| + struct in6_addr dst_addr; |
| + } v6; |
| + }; |
| + |
| + __be16 src_port; |
| + __be16 dst_port; |
| + |
| + struct { |
| + u16 id; |
| + __be16 proto; |
| + u8 num; |
| + } vlan; |
| + struct { |
| + u16 sid; |
| + u8 num; |
| + } pppoe; |
| +}; |
| + |
| +struct mtk_flow_entry { |
| + struct rhash_head node; |
| + unsigned long cookie; |
| + u16 hash; |
| +}; |
| + |
| +static const struct rhashtable_params mtk_flow_ht_params = { |
| + .head_offset = offsetof(struct mtk_flow_entry, node), |
| + .key_offset = offsetof(struct mtk_flow_entry, cookie), |
| + .key_len = sizeof(unsigned long), |
| + .automatic_shrinking = true, |
| +}; |
| + |
| +static u32 |
| +mtk_eth_timestamp(struct mtk_eth *eth) |
| +{ |
| + return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP; |
| +} |
| + |
| +static int |
| +mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data, |
| + bool egress) |
| +{ |
| + return mtk_foe_entry_set_ipv4_tuple(foe, egress, |
| + data->v4.src_addr, data->src_port, |
| + data->v4.dst_addr, data->dst_port); |
| +} |
| + |
| +static int |
| +mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data) |
| +{ |
| + return mtk_foe_entry_set_ipv6_tuple(foe, |
| + data->v6.src_addr.s6_addr32, data->src_port, |
| + data->v6.dst_addr.s6_addr32, data->dst_port); |
| +} |
| + |
| +static void |
| +mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth) |
| +{ |
| + void *dest = eth + act->mangle.offset; |
| + const void *src = &act->mangle.val; |
| + |
| + if (act->mangle.offset > 8) |
| + return; |
| + |
| + if (act->mangle.mask == 0xffff) { |
| + src += 2; |
| + dest += 2; |
| + } |
| + |
| + memcpy(dest, src, act->mangle.mask ? 2 : 4); |
| +} |
| + |
| + |
| +static int |
| +mtk_flow_mangle_ports(const struct flow_action_entry *act, |
| + struct mtk_flow_data *data) |
| +{ |
| + u32 val = ntohl(act->mangle.val); |
| + |
| + switch (act->mangle.offset) { |
| + case 0: |
| + if (act->mangle.mask == ~htonl(0xffff)) |
| + data->dst_port = cpu_to_be16(val); |
| + else |
| + data->src_port = cpu_to_be16(val >> 16); |
| + break; |
| + case 2: |
| + data->dst_port = cpu_to_be16(val); |
| + break; |
| + default: |
| + return -EINVAL; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +mtk_flow_mangle_ipv4(const struct flow_action_entry *act, |
| + struct mtk_flow_data *data) |
| +{ |
| + __be32 *dest; |
| + |
| + switch (act->mangle.offset) { |
| + case offsetof(struct iphdr, saddr): |
| + dest = &data->v4.src_addr; |
| + break; |
| + case offsetof(struct iphdr, daddr): |
| + dest = &data->v4.dst_addr; |
| + break; |
| + default: |
| + return -EINVAL; |
| + } |
| + |
| + memcpy(dest, &act->mangle.val, sizeof(u32)); |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +mtk_flow_get_dsa_port(struct net_device **dev) |
| +{ |
| +#if IS_ENABLED(CONFIG_NET_DSA) |
| + struct dsa_port *dp; |
| + |
| + dp = dsa_port_from_netdev(*dev); |
| + if (IS_ERR(dp)) |
| + return -ENODEV; |
| + |
| + if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK) |
| + return -ENODEV; |
| + |
| + *dev = dp->cpu_dp->master; |
| + |
| + return dp->index; |
| +#else |
| + return -ENODEV; |
| +#endif |
| +} |
| + |
| +static int |
| +mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, |
| + struct net_device *dev) |
| +{ |
| + int pse_port, dsa_port; |
| + |
| + dsa_port = mtk_flow_get_dsa_port(&dev); |
| + if (dsa_port >= 0) |
| + mtk_foe_entry_set_dsa(foe, dsa_port); |
| + |
| + if (dev == eth->netdev[0]) |
| + pse_port = PSE_GDM1_PORT; |
| + else if (dev == eth->netdev[1]) |
| + pse_port = PSE_GDM2_PORT; |
| + else |
| + return -EOPNOTSUPP; |
| + |
| + mtk_foe_entry_set_pse_port(foe, pse_port); |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) |
| +{ |
| + struct flow_rule *rule = flow_cls_offload_flow_rule(f); |
| + struct flow_action_entry *act; |
| + struct mtk_flow_data data = {}; |
| + struct mtk_foe_entry foe; |
| + struct net_device *odev = NULL; |
| + struct mtk_flow_entry *entry; |
| + int offload_type = 0; |
| + u16 addr_type = 0; |
| + u32 timestamp; |
| + u8 l4proto = 0; |
| + int err = 0; |
| + int hash; |
| + int i; |
| + |
| + if (rhashtable_lookup(ð->flow_table, &f->cookie, mtk_flow_ht_params)) |
| + return -EEXIST; |
| + |
| + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { |
| + struct flow_match_meta match; |
| + |
| + flow_rule_match_meta(rule, &match); |
| + } else { |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { |
| + struct flow_match_control match; |
| + |
| + flow_rule_match_control(rule, &match); |
| + addr_type = match.key->addr_type; |
| + } else { |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { |
| + struct flow_match_basic match; |
| + |
| + flow_rule_match_basic(rule, &match); |
| + l4proto = match.key->ip_proto; |
| + } else { |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + flow_action_for_each(i, act, &rule->action) { |
| + switch (act->id) { |
| + case FLOW_ACTION_MANGLE: |
| + if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH) |
| + mtk_flow_offload_mangle_eth(act, &data.eth); |
| + break; |
| + case FLOW_ACTION_REDIRECT: |
| + odev = act->dev; |
| + break; |
| + case FLOW_ACTION_CSUM: |
| + break; |
| + case FLOW_ACTION_VLAN_PUSH: |
| + if (data.vlan.num == 1 || |
| + act->vlan.proto != htons(ETH_P_8021Q)) |
| + return -EOPNOTSUPP; |
| + |
| + data.vlan.id = act->vlan.vid; |
| + data.vlan.proto = act->vlan.proto; |
| + data.vlan.num++; |
| + break; |
| + case FLOW_ACTION_VLAN_POP: |
| + break; |
| + case FLOW_ACTION_PPPOE_PUSH: |
| + if (data.pppoe.num == 1) |
| + return -EOPNOTSUPP; |
| + |
| + data.pppoe.sid = act->pppoe.sid; |
| + data.pppoe.num++; |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + } |
| + |
| + switch (addr_type) { |
| + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: |
| + offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT; |
| + break; |
| + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: |
| + offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T; |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + if (!is_valid_ether_addr(data.eth.h_source) || |
| + !is_valid_ether_addr(data.eth.h_dest)) |
| + return -EINVAL; |
| + |
| + err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0, |
| + data.eth.h_source, |
| + data.eth.h_dest); |
| + if (err) |
| + return err; |
| + |
| + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { |
| + struct flow_match_ports ports; |
| + |
| + flow_rule_match_ports(rule, &ports); |
| + data.src_port = ports.key->src; |
| + data.dst_port = ports.key->dst; |
| + } else { |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
| + struct flow_match_ipv4_addrs addrs; |
| + |
| + flow_rule_match_ipv4_addrs(rule, &addrs); |
| + |
| + data.v4.src_addr = addrs.key->src; |
| + data.v4.dst_addr = addrs.key->dst; |
| + |
| + mtk_flow_set_ipv4_addr(&foe, &data, false); |
| + } |
| + |
| + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { |
| + struct flow_match_ipv6_addrs addrs; |
| + |
| + flow_rule_match_ipv6_addrs(rule, &addrs); |
| + |
| + data.v6.src_addr = addrs.key->src; |
| + data.v6.dst_addr = addrs.key->dst; |
| + |
| + mtk_flow_set_ipv6_addr(&foe, &data); |
| + } |
| + |
| + flow_action_for_each(i, act, &rule->action) { |
| + if (act->id != FLOW_ACTION_MANGLE) |
| + continue; |
| + |
| + switch (act->mangle.htype) { |
| + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: |
| + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: |
| + err = mtk_flow_mangle_ports(act, &data); |
| + break; |
| + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: |
| + err = mtk_flow_mangle_ipv4(act, &data); |
| + break; |
| + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: |
| + /* handled earlier */ |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + if (err) |
| + return err; |
| + } |
| + |
| + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { |
| + err = mtk_flow_set_ipv4_addr(&foe, &data, true); |
| + if (err) |
| + return err; |
| + } |
| + |
| + if (data.vlan.num == 1) { |
| + if (data.vlan.proto != htons(ETH_P_8021Q)) |
| + return -EOPNOTSUPP; |
| + |
| + mtk_foe_entry_set_vlan(&foe, data.vlan.id); |
| + } |
| + if (data.pppoe.num == 1) |
| + mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid); |
| + |
| + err = mtk_flow_set_output_device(eth, &foe, odev); |
| + if (err) |
| + return err; |
| + |
| + entry = kzalloc(sizeof(*entry), GFP_KERNEL); |
| + if (!entry) |
| + return -ENOMEM; |
| + |
| + entry->cookie = f->cookie; |
| + timestamp = mtk_eth_timestamp(eth); |
| + hash = mtk_foe_entry_commit(ð->ppe, &foe, timestamp); |
| + if (hash < 0) { |
| + err = hash; |
| + goto free; |
| + } |
| + |
| + entry->hash = hash; |
| + err = rhashtable_insert_fast(ð->flow_table, &entry->node, |
| + mtk_flow_ht_params); |
| + if (err < 0) |
| + goto clear_flow; |
| + |
| + return 0; |
| +clear_flow: |
| + mtk_foe_entry_clear(ð->ppe, hash); |
| +free: |
| + kfree(entry); |
| + return err; |
| +} |
| + |
| +static int |
| +mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f) |
| +{ |
| + struct mtk_flow_entry *entry; |
| + |
| + entry = rhashtable_lookup(ð->flow_table, &f->cookie, |
| + mtk_flow_ht_params); |
| + if (!entry) |
| + return -ENOENT; |
| + |
| + mtk_foe_entry_clear(ð->ppe, entry->hash); |
| + rhashtable_remove_fast(ð->flow_table, &entry->node, |
| + mtk_flow_ht_params); |
| + kfree(entry); |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f) |
| +{ |
| + struct mtk_flow_entry *entry; |
| + int timestamp; |
| + u32 idle; |
| + |
| + entry = rhashtable_lookup(ð->flow_table, &f->cookie, |
| + mtk_flow_ht_params); |
| + if (!entry) |
| + return -ENOENT; |
| + |
| + timestamp = mtk_foe_entry_timestamp(ð->ppe, entry->hash); |
| + if (timestamp < 0) |
| + return -ETIMEDOUT; |
| + |
| + idle = mtk_eth_timestamp(eth) - timestamp; |
| + f->stats.lastused = jiffies - idle * HZ; |
| + |
| + return 0; |
| +} |
| + |
| +static DEFINE_MUTEX(mtk_flow_offload_mutex); |
| + |
| +static int |
| +mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) |
| +{ |
| + struct flow_cls_offload *cls = type_data; |
| + struct net_device *dev = cb_priv; |
| + struct mtk_mac *mac = netdev_priv(dev); |
| + struct mtk_eth *eth = mac->hw; |
| + int err; |
| + |
| + if (!tc_can_offload(dev)) |
| + return -EOPNOTSUPP; |
| + |
| + if (type != TC_SETUP_CLSFLOWER) |
| + return -EOPNOTSUPP; |
| + |
| + mutex_lock(&mtk_flow_offload_mutex); |
| + switch (cls->command) { |
| + case FLOW_CLS_REPLACE: |
| + err = mtk_flow_offload_replace(eth, cls); |
| + break; |
| + case FLOW_CLS_DESTROY: |
| + err = mtk_flow_offload_destroy(eth, cls); |
| + break; |
| + case FLOW_CLS_STATS: |
| + err = mtk_flow_offload_stats(eth, cls); |
| + break; |
| + default: |
| + err = -EOPNOTSUPP; |
| + break; |
| + } |
| + mutex_unlock(&mtk_flow_offload_mutex); |
| + |
| + return err; |
| +} |
| + |
| +static int |
| +mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) |
| +{ |
| + struct mtk_mac *mac = netdev_priv(dev); |
| + struct mtk_eth *eth = mac->hw; |
| + struct nf_flowtable *flowtable; |
| + static LIST_HEAD(block_cb_list); |
| + struct flow_block_cb *block_cb; |
| + flow_setup_cb_t *cb; |
| + int err = 0; |
| + |
| + flowtable = container_of(f->block, struct nf_flowtable, flow_block); |
| + |
| + if (!eth->ppe.foe_table) |
| + return -EOPNOTSUPP; |
| + |
| + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) |
| + return -EOPNOTSUPP; |
| + |
| + cb = mtk_eth_setup_tc_block_cb; |
| + f->driver_block_list = &block_cb_list; |
| + |
| + down_write(&flowtable->flow_block_lock); |
| + |
| + switch (f->command) { |
| + case FLOW_BLOCK_BIND: |
| + block_cb = flow_block_cb_lookup(f->block, cb, dev); |
| + if (block_cb) { |
| + flow_block_cb_incref(block_cb); |
| + goto unlock; |
| + } |
| + block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); |
| + if (IS_ERR(block_cb)) { |
| + err = PTR_ERR(block_cb); |
| + goto unlock; |
| + } |
| + |
| + flow_block_cb_add(block_cb, f); |
| + list_add_tail(&block_cb->driver_list, &block_cb_list); |
| + break; |
| + case FLOW_BLOCK_UNBIND: |
| + block_cb = flow_block_cb_lookup(f->block, cb, dev); |
| + if (!block_cb) { |
| + err = -ENOENT; |
| + goto unlock; |
| + } |
| + |
| + if (flow_block_cb_decref(block_cb)) { |
| + flow_block_cb_remove(block_cb, f); |
| + list_del(&block_cb->driver_list); |
| + } |
| + break; |
| + default: |
| + err = -EOPNOTSUPP; |
| + break; |
| + } |
| + |
| +unlock: |
| + up_write(&flowtable->flow_block_lock); |
| + return err; |
| +} |
| + |
| +int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type, |
| + void *type_data) |
| +{ |
| + if (type == TC_SETUP_FT) |
| + return mtk_eth_setup_tc_block(dev, type_data); |
| + |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +int mtk_eth_offload_init(struct mtk_eth *eth) |
| +{ |
| + if (!eth->ppe.foe_table) |
| + return 0; |
| + |
| + return rhashtable_init(ð->flow_table, &mtk_flow_ht_params); |
| +} |
| diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h |
| new file mode 100644 |
| index 000000000..0c45ea090 |
| --- /dev/null |
| +++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h |
| @@ -0,0 +1,144 @@ |
| +// SPDX-License-Identifier: GPL-2.0-only |
| +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ |
| + |
| +#ifndef __MTK_PPE_REGS_H |
| +#define __MTK_PPE_REGS_H |
| + |
| +#define MTK_PPE_GLO_CFG 0x200 |
| +#define MTK_PPE_GLO_CFG_EN BIT(0) |
| +#define MTK_PPE_GLO_CFG_TSID_EN BIT(1) |
| +#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2) |
| +#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3) |
| +#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4) |
| +#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5) |
| +#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6) |
| +#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7) |
| +#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8) |
| +#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9) |
| +#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10) |
| +#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11) |
| +#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12) |
| +#define MTK_PPE_GLO_CFG_BUSY BIT(31) |
| + |
| +#define MTK_PPE_FLOW_CFG 0x204 |
| +#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6) |
| +#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7) |
| +#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8) |
| +#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9) |
| +#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10) |
| +#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12) |
| +#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13) |
| +#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14) |
| +#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15) |
| +#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16) |
| +#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17) |
| +#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18) |
| +#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19) |
| +#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20) |
| + |
| +#define MTK_PPE_IP_PROTO_CHK 0x208 |
| +#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0) |
| +#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16) |
| + |
| +#define MTK_PPE_TB_CFG 0x21c |
| +#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0) |
| +#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3) |
| +#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4) |
| +#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6) |
| +#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7) |
| +#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8) |
| +#define MTK_PPE_TB_CFG_AGE_TCP BIT(9) |
| +#define MTK_PPE_TB_CFG_AGE_UDP BIT(10) |
| +#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11) |
| +#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12) |
| +#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14) |
| +#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16) |
| +#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18) |
| + |
| +enum { |
| + MTK_PPE_SCAN_MODE_DISABLED, |
| + MTK_PPE_SCAN_MODE_CHECK_AGE, |
| + MTK_PPE_SCAN_MODE_KEEPALIVE_AGE, |
| +}; |
| + |
| +enum { |
| + MTK_PPE_KEEPALIVE_DISABLE, |
| + MTK_PPE_KEEPALIVE_UNICAST_CPU, |
| + MTK_PPE_KEEPALIVE_DUP_CPU = 3, |
| +}; |
| + |
| +enum { |
| + MTK_PPE_SEARCH_MISS_ACTION_DROP, |
| + MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2, |
| + MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3, |
| +}; |
| + |
| +#define MTK_PPE_TB_BASE 0x220 |
| + |
| +#define MTK_PPE_TB_USED 0x224 |
| +#define MTK_PPE_TB_USED_NUM GENMASK(13, 0) |
| + |
| +#define MTK_PPE_BIND_RATE 0x228 |
| +#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0) |
| +#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16) |
| + |
| +#define MTK_PPE_BIND_LIMIT0 0x22c |
| +#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0) |
| +#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16) |
| + |
| +#define MTK_PPE_BIND_LIMIT1 0x230 |
| +#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0) |
| +#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16) |
| + |
| +#define MTK_PPE_KEEPALIVE 0x234 |
| +#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0) |
| +#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16) |
| +#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24) |
| + |
| +#define MTK_PPE_UNBIND_AGE 0x238 |
| +#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16) |
| +#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0) |
| + |
| +#define MTK_PPE_BIND_AGE0 0x23c |
| +#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16) |
| +#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0) |
| + |
| +#define MTK_PPE_BIND_AGE1 0x240 |
| +#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16) |
| +#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0) |
| + |
| +#define MTK_PPE_HASH_SEED 0x244 |
| + |
| +#define MTK_PPE_DEFAULT_CPU_PORT 0x248 |
| +#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4)) |
| + |
| +#define MTK_PPE_MTU_DROP 0x308 |
| + |
| +#define MTK_PPE_VLAN_MTU0 0x30c |
| +#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0) |
| +#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16) |
| + |
| +#define MTK_PPE_VLAN_MTU1 0x310 |
| +#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0) |
| +#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16) |
| + |
| +#define MTK_PPE_VPM_TPID 0x318 |
| + |
| +#define MTK_PPE_CACHE_CTL 0x320 |
| +#define MTK_PPE_CACHE_CTL_EN BIT(0) |
| +#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4) |
| +#define MTK_PPE_CACHE_CTL_REQ BIT(8) |
| +#define MTK_PPE_CACHE_CTL_CLEAR BIT(9) |
| +#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12) |
| + |
| +#define MTK_PPE_MIB_CFG 0x334 |
| +#define MTK_PPE_MIB_CFG_EN BIT(0) |
| +#define MTK_PPE_MIB_CFG_RD_CLR BIT(1) |
| + |
| +#define MTK_PPE_MIB_TB_BASE 0x338 |
| + |
| +#define MTK_PPE_MIB_CACHE_CTL 0x350 |
| +#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0) |
| +#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2) |
| + |
| +#endif |
| diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c |
| index a085213dc..813e30495 100644 |
| --- a/drivers/net/ppp/ppp_generic.c |
| +++ b/drivers/net/ppp/ppp_generic.c |
| @@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev) |
| ppp_destroy_interface(ppp); |
| } |
| |
| +static int ppp_fill_forward_path(struct net_device_path_ctx *ctx, |
| + struct net_device_path *path) |
| +{ |
| + struct ppp *ppp = netdev_priv(ctx->dev); |
| + struct ppp_channel *chan; |
| + struct channel *pch; |
| + |
| + if (ppp->flags & SC_MULTILINK) |
| + return -EOPNOTSUPP; |
| + |
| + if (list_empty(&ppp->channels)) |
| + return -ENODEV; |
| + |
| + pch = list_first_entry(&ppp->channels, struct channel, clist); |
| + chan = pch->chan; |
| + if (!chan->ops->fill_forward_path) |
| + return -EOPNOTSUPP; |
| + |
| + return chan->ops->fill_forward_path(ctx, path, chan); |
| +} |
| + |
| static const struct net_device_ops ppp_netdev_ops = { |
| .ndo_init = ppp_dev_init, |
| .ndo_uninit = ppp_dev_uninit, |
| .ndo_start_xmit = ppp_start_xmit, |
| .ndo_do_ioctl = ppp_net_ioctl, |
| .ndo_get_stats64 = ppp_get_stats64, |
| + .ndo_fill_forward_path = ppp_fill_forward_path, |
| }; |
| |
| static struct device_type ppp_type = { |
| diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c |
| index 087b01684..7a8c246ab 100644 |
| --- a/drivers/net/ppp/pppoe.c |
| +++ b/drivers/net/ppp/pppoe.c |
| @@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb) |
| return __pppoe_xmit(sk, skb); |
| } |
| |
| +static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, |
| + struct net_device_path *path, |
| + const struct ppp_channel *chan) |
| +{ |
| + struct sock *sk = (struct sock *)chan->private; |
| + struct pppox_sock *po = pppox_sk(sk); |
| + struct net_device *dev = po->pppoe_dev; |
| + |
| + if (sock_flag(sk, SOCK_DEAD) || |
| + !(sk->sk_state & PPPOX_CONNECTED) || !dev) |
| + return -1; |
| + |
| + path->type = DEV_PATH_PPPOE; |
| + path->encap.proto = htons(ETH_P_PPP_SES); |
| + path->encap.id = be16_to_cpu(po->num); |
| + memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); |
| + memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); |
| + path->dev = ctx->dev; |
| + ctx->dev = dev; |
| + |
| + return 0; |
| +} |
| + |
| static const struct ppp_channel_ops pppoe_chan_ops = { |
| .start_xmit = pppoe_xmit, |
| + .fill_forward_path = pppoe_fill_forward_path, |
| }; |
| |
| static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, |
| diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h |
| index 38af42bf8..9f64504ac 100644 |
| --- a/include/linux/netdevice.h |
| +++ b/include/linux/netdevice.h |
| @@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, |
| struct sk_buff *skb, |
| struct net_device *sb_dev); |
| |
| +enum net_device_path_type { |
| + DEV_PATH_ETHERNET = 0, |
| + DEV_PATH_VLAN, |
| + DEV_PATH_BRIDGE, |
| + DEV_PATH_PPPOE, |
| + DEV_PATH_DSA, |
| +}; |
| + |
| +struct net_device_path { |
| + enum net_device_path_type type; |
| + const struct net_device *dev; |
| + union { |
| + struct { |
| + u16 id; |
| + __be16 proto; |
| + u8 h_dest[ETH_ALEN]; |
| + } encap; |
| + struct { |
| + enum { |
| + DEV_PATH_BR_VLAN_KEEP, |
| + DEV_PATH_BR_VLAN_TAG, |
| + DEV_PATH_BR_VLAN_UNTAG, |
| + DEV_PATH_BR_VLAN_UNTAG_HW, |
| + } vlan_mode; |
| + u16 vlan_id; |
| + __be16 vlan_proto; |
| + } bridge; |
| + struct { |
| + int port; |
| + u16 proto; |
| + } dsa; |
| + }; |
| +}; |
| + |
| +#define NET_DEVICE_PATH_STACK_MAX 5 |
| +#define NET_DEVICE_PATH_VLAN_MAX 2 |
| + |
| +struct net_device_path_stack { |
| + int num_paths; |
| + struct net_device_path path[NET_DEVICE_PATH_STACK_MAX]; |
| +}; |
| + |
| +struct net_device_path_ctx { |
| + const struct net_device *dev; |
| + u8 daddr[ETH_ALEN]; |
| + |
| + int num_vlans; |
| + struct { |
| + u16 id; |
| + __be16 proto; |
| + } vlan[NET_DEVICE_PATH_VLAN_MAX]; |
| +}; |
| + |
| enum tc_setup_type { |
| TC_SETUP_QDISC_MQPRIO, |
| TC_SETUP_CLSU32, |
| @@ -844,6 +897,7 @@ enum tc_setup_type { |
| TC_SETUP_ROOT_QDISC, |
| TC_SETUP_QDISC_GRED, |
| TC_SETUP_QDISC_TAPRIO, |
| + TC_SETUP_FT, |
| }; |
| |
| /* These structures hold the attributes of bpf state that are being passed |
| @@ -1239,6 +1293,8 @@ struct tlsdev_ops; |
| * Get devlink port instance associated with a given netdev. |
| * Called with a reference on the netdevice and devlink locks only, |
| * rtnl_lock is not held. |
| + * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); |
| + * Get the forwarding path to reach the real device from the HW destination address |
| */ |
| struct net_device_ops { |
| int (*ndo_init)(struct net_device *dev); |
| @@ -1436,6 +1492,8 @@ struct net_device_ops { |
| int (*ndo_xsk_wakeup)(struct net_device *dev, |
| u32 queue_id, u32 flags); |
| struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); |
| + int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, |
| + struct net_device_path *path); |
| }; |
| |
| /** |
| @@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po); |
| |
| int dev_get_iflink(const struct net_device *dev); |
| int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); |
| +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, |
| + struct net_device_path_stack *stack); |
| struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, |
| unsigned short mask); |
| struct net_device *dev_get_by_name(struct net *net, const char *name); |
| diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h |
| index 98966064e..91f9a9283 100644 |
| --- a/include/linux/ppp_channel.h |
| +++ b/include/linux/ppp_channel.h |
| @@ -28,6 +28,9 @@ struct ppp_channel_ops { |
| int (*start_xmit)(struct ppp_channel *, struct sk_buff *); |
| /* Handle an ioctl call that has come in via /dev/ppp. */ |
| int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long); |
| + int (*fill_forward_path)(struct net_device_path_ctx *, |
| + struct net_device_path *, |
| + const struct ppp_channel *); |
| }; |
| |
| struct ppp_channel { |
| diff --git a/include/net/dsa.h b/include/net/dsa.h |
| index 05f66d487..cafc74218 100644 |
| --- a/include/net/dsa.h |
| +++ b/include/net/dsa.h |
| @@ -561,6 +561,8 @@ struct dsa_switch_ops { |
| struct sk_buff *skb); |
| }; |
| |
| +struct dsa_port *dsa_port_from_netdev(struct net_device *netdev); |
| + |
| struct dsa_switch_driver { |
| struct list_head list; |
| const struct dsa_switch_ops *ops; |
| @@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, |
| #define BRCM_TAG_GET_PORT(v) ((v) >> 8) |
| #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) |
| |
| +#if IS_ENABLED(CONFIG_NET_DSA) |
| +bool dsa_slave_dev_check(const struct net_device *dev); |
| +#else |
| +static inline bool dsa_slave_dev_check(const struct net_device *dev) |
| +{ |
| + return false; |
| +} |
| +#endif |
| |
| netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); |
| int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); |
| diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h |
| index c6f7bd22d..59b873653 100644 |
| --- a/include/net/flow_offload.h |
| +++ b/include/net/flow_offload.h |
| @@ -138,6 +138,7 @@ enum flow_action_id { |
| FLOW_ACTION_MPLS_PUSH, |
| FLOW_ACTION_MPLS_POP, |
| FLOW_ACTION_MPLS_MANGLE, |
| + FLOW_ACTION_PPPOE_PUSH, |
| NUM_FLOW_ACTIONS, |
| }; |
| |
| @@ -213,6 +214,9 @@ struct flow_action_entry { |
| u8 bos; |
| u8 ttl; |
| } mpls_mangle; |
| + struct { /* FLOW_ACTION_PPPOE_PUSH */ |
| + u16 sid; |
| + } pppoe; |
| }; |
| }; |
| |
| diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h |
| index 2c739fc75..89ab8f180 100644 |
| --- a/include/net/ip6_route.h |
| +++ b/include/net/ip6_route.h |
| @@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info * |
| !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); |
| } |
| |
| -static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) |
| +static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst, |
| + bool forwarding) |
| { |
| struct inet6_dev *idev; |
| unsigned int mtu; |
| |
| - if (dst_metric_locked(dst, RTAX_MTU)) { |
| + if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) { |
| mtu = dst_metric_raw(dst, RTAX_MTU); |
| if (mtu) |
| goto out; |
| diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h |
| index 7b3c873f8..e95483192 100644 |
| --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h |
| +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h |
| @@ -4,7 +4,4 @@ |
| |
| extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; |
| |
| -#include <linux/sysctl.h> |
| -extern struct ctl_table nf_ct_ipv6_sysctl_table[]; |
| - |
| #endif /* _NF_CONNTRACK_IPV6_H*/ |
| diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h |
| index 90690e37a..ce0bc3e62 100644 |
| --- a/include/net/netfilter/nf_conntrack.h |
| +++ b/include/net/netfilter/nf_conntrack.h |
| @@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) |
| !nf_ct_is_dying(ct); |
| } |
| |
| +#define NF_CT_DAY (86400 * HZ) |
| + |
| +/* Set an arbitrary timeout large enough not to ever expire, this save |
| + * us a check for the IPS_OFFLOAD_BIT from the packet path via |
| + * nf_ct_is_expired(). |
| + */ |
| +static inline void nf_ct_offload_timeout(struct nf_conn *ct) |
| +{ |
| + if (nf_ct_expires(ct) < NF_CT_DAY / 2) |
| + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); |
| +} |
| + |
| struct kernel_param; |
| |
| int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); |
| diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h |
| index f7a060c6e..7f44a7715 100644 |
| --- a/include/net/netfilter/nf_conntrack_acct.h |
| +++ b/include/net/netfilter/nf_conntrack_acct.h |
| @@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable) |
| #endif |
| } |
| |
| +void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets, |
| + unsigned int bytes); |
| + |
| +static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir, |
| + unsigned int bytes) |
| +{ |
| +#if IS_ENABLED(CONFIG_NF_CONNTRACK) |
| + nf_ct_acct_add(ct, dir, 1, bytes); |
| +#endif |
| +} |
| + |
| void nf_conntrack_acct_pernet_init(struct net *net); |
| |
| int nf_conntrack_acct_init(void); |
| diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h |
| index 68d7fc92..7cf89767 100644 |
| --- a/include/net/netfilter/nf_flow_table.h |
| +++ b/include/net/netfilter/nf_flow_table.h |
| @@ -8,31 +8,99 @@ |
| #include <linux/rcupdate.h> |
| #include <linux/netfilter.h> |
| #include <linux/netfilter/nf_conntrack_tuple_common.h> |
| +#include <net/flow_offload.h> |
| #include <net/dst.h> |
| +#include <linux/if_pppox.h> |
| +#include <linux/ppp_defs.h> |
| |
| struct nf_flowtable; |
| +struct nf_flow_rule; |
| +struct flow_offload; |
| +enum flow_offload_tuple_dir; |
| + |
| +struct nf_flow_key { |
| + struct flow_dissector_key_meta meta; |
| + struct flow_dissector_key_control control; |
| + struct flow_dissector_key_control enc_control; |
| + struct flow_dissector_key_basic basic; |
| + struct flow_dissector_key_vlan vlan; |
| + struct flow_dissector_key_vlan cvlan; |
| + union { |
| + struct flow_dissector_key_ipv4_addrs ipv4; |
| + struct flow_dissector_key_ipv6_addrs ipv6; |
| + }; |
| + struct flow_dissector_key_keyid enc_key_id; |
| + union { |
| + struct flow_dissector_key_ipv4_addrs enc_ipv4; |
| + struct flow_dissector_key_ipv6_addrs enc_ipv6; |
| + }; |
| + struct flow_dissector_key_tcp tcp; |
| + struct flow_dissector_key_ports tp; |
| +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ |
| + |
| +struct nf_flow_match { |
| + struct flow_dissector dissector; |
| + struct nf_flow_key key; |
| + struct nf_flow_key mask; |
| +}; |
| + |
| +struct nf_flow_rule { |
| + struct nf_flow_match match; |
| + struct flow_rule *rule; |
| +}; |
| |
| struct nf_flowtable_type { |
| struct list_head list; |
| int family; |
| int (*init)(struct nf_flowtable *ft); |
| + int (*setup)(struct nf_flowtable *ft, |
| + struct net_device *dev, |
| + enum flow_block_command cmd); |
| + int (*action)(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule); |
| void (*free)(struct nf_flowtable *ft); |
| nf_hookfn *hook; |
| struct module *owner; |
| }; |
| |
| +enum nf_flowtable_flags { |
| + NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */ |
| + NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */ |
| +}; |
| + |
| struct nf_flowtable { |
| struct list_head list; |
| struct rhashtable rhashtable; |
| + int priority; |
| const struct nf_flowtable_type *type; |
| struct delayed_work gc_work; |
| + unsigned int flags; |
| + struct flow_block flow_block; |
| + struct rw_semaphore flow_block_lock; /* Guards flow_block */ |
| + possible_net_t net; |
| }; |
| |
| +static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable) |
| +{ |
| + return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD; |
| +} |
| + |
| enum flow_offload_tuple_dir { |
| FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, |
| FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, |
| - FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX |
| }; |
| +#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX |
| + |
| +enum flow_offload_xmit_type { |
| + FLOW_OFFLOAD_XMIT_UNSPEC = 0, |
| + FLOW_OFFLOAD_XMIT_NEIGH, |
| + FLOW_OFFLOAD_XMIT_XFRM, |
| + FLOW_OFFLOAD_XMIT_DIRECT, |
| +}; |
| + |
| +#define NF_FLOW_TABLE_ENCAP_MAX 2 |
| |
| struct flow_offload_tuple { |
| union { |
| @@ -52,13 +120,30 @@ struct flow_offload_tuple { |
| |
| u8 l3proto; |
| u8 l4proto; |
| - u8 dir; |
| + struct { |
| + u16 id; |
| + __be16 proto; |
| + } encap[NF_FLOW_TABLE_ENCAP_MAX]; |
| |
| - u16 mtu; |
| + /* All members above are keys for lookups, see flow_offload_hash(). */ |
| + struct { } __hash; |
| |
| - struct { |
| - struct dst_entry *dst_cache; |
| - u32 dst_cookie; |
| + u8 dir:2, |
| + xmit_type:2, |
| + encap_num:2, |
| + in_vlan_ingress:2; |
| + u16 mtu; |
| + union { |
| + struct { |
| + struct dst_entry *dst_cache; |
| + u32 dst_cookie; |
| + }; |
| + struct { |
| + u32 ifidx; |
| + u32 hw_ifidx; |
| + u8 h_source[ETH_ALEN]; |
| + u8 h_dest[ETH_ALEN]; |
| + } out; |
| }; |
| }; |
| |
| @@ -67,52 +152,139 @@ struct flow_offload_tuple_rhash { |
| struct flow_offload_tuple tuple; |
| }; |
| |
| -#define FLOW_OFFLOAD_SNAT 0x1 |
| -#define FLOW_OFFLOAD_DNAT 0x2 |
| -#define FLOW_OFFLOAD_DYING 0x4 |
| -#define FLOW_OFFLOAD_TEARDOWN 0x8 |
| +enum nf_flow_flags { |
| + NF_FLOW_SNAT, |
| + NF_FLOW_DNAT, |
| + NF_FLOW_TEARDOWN, |
| + NF_FLOW_HW, |
| + NF_FLOW_HW_DYING, |
| + NF_FLOW_HW_DEAD, |
| + NF_FLOW_HW_PENDING, |
| +}; |
| + |
| +enum flow_offload_type { |
| + NF_FLOW_OFFLOAD_UNSPEC = 0, |
| + NF_FLOW_OFFLOAD_ROUTE, |
| +}; |
| |
| struct flow_offload { |
| struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; |
| - u32 flags; |
| - union { |
| - /* Your private driver data here. */ |
| - u32 timeout; |
| - }; |
| + struct nf_conn *ct; |
| + unsigned long flags; |
| + u16 type; |
| + u32 timeout; |
| + struct rcu_head rcu_head; |
| }; |
| |
| #define NF_FLOW_TIMEOUT (30 * HZ) |
| +#define nf_flowtable_time_stamp (u32)jiffies |
| + |
| +unsigned long flow_offload_get_timeout(struct flow_offload *flow); |
| + |
| +static inline __s32 nf_flow_timeout_delta(unsigned int timeout) |
| +{ |
| + return (__s32)(timeout - nf_flowtable_time_stamp); |
| +} |
| |
| struct nf_flow_route { |
| struct { |
| - struct dst_entry *dst; |
| + struct dst_entry *dst; |
| + struct { |
| + u32 ifindex; |
| + struct { |
| + u16 id; |
| + __be16 proto; |
| + } encap[NF_FLOW_TABLE_ENCAP_MAX]; |
| + u8 num_encaps:2, |
| + ingress_vlans:2; |
| + } in; |
| + struct { |
| + u32 ifindex; |
| + u32 hw_ifindex; |
| + u8 h_source[ETH_ALEN]; |
| + u8 h_dest[ETH_ALEN]; |
| + } out; |
| + enum flow_offload_xmit_type xmit_type; |
| } tuple[FLOW_OFFLOAD_DIR_MAX]; |
| }; |
| |
| -struct flow_offload *flow_offload_alloc(struct nf_conn *ct, |
| - struct nf_flow_route *route); |
| +struct flow_offload *flow_offload_alloc(struct nf_conn *ct); |
| void flow_offload_free(struct flow_offload *flow); |
| |
| +static inline int |
| +nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, |
| + flow_setup_cb_t *cb, void *cb_priv) |
| +{ |
| + struct flow_block *block = &flow_table->flow_block; |
| + struct flow_block_cb *block_cb; |
| + int err = 0; |
| + |
| + down_write(&flow_table->flow_block_lock); |
| + block_cb = flow_block_cb_lookup(block, cb, cb_priv); |
| + if (block_cb) { |
| + err = -EEXIST; |
| + goto unlock; |
| + } |
| + |
| + block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); |
| + if (IS_ERR(block_cb)) { |
| + err = PTR_ERR(block_cb); |
| + goto unlock; |
| + } |
| + |
| + list_add_tail(&block_cb->list, &block->cb_list); |
| + |
| +unlock: |
| + up_write(&flow_table->flow_block_lock); |
| + return err; |
| +} |
| + |
| +static inline void |
| +nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, |
| + flow_setup_cb_t *cb, void *cb_priv) |
| +{ |
| + struct flow_block *block = &flow_table->flow_block; |
| + struct flow_block_cb *block_cb; |
| + |
| + down_write(&flow_table->flow_block_lock); |
| + block_cb = flow_block_cb_lookup(block, cb, cb_priv); |
| + if (block_cb) { |
| + list_del(&block_cb->list); |
| + flow_block_cb_free(block_cb); |
| + } else { |
| + WARN_ON(true); |
| + } |
| + up_write(&flow_table->flow_block_lock); |
| +} |
| + |
| +int flow_offload_route_init(struct flow_offload *flow, |
| + const struct nf_flow_route *route); |
| + |
| int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); |
| +void flow_offload_refresh(struct nf_flowtable *flow_table, |
| + struct flow_offload *flow); |
| + |
| struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, |
| struct flow_offload_tuple *tuple); |
| +void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, |
| + struct net_device *dev); |
| void nf_flow_table_cleanup(struct net_device *dev); |
| |
| int nf_flow_table_init(struct nf_flowtable *flow_table); |
| void nf_flow_table_free(struct nf_flowtable *flow_table); |
| |
| void flow_offload_teardown(struct flow_offload *flow); |
| -static inline void flow_offload_dead(struct flow_offload *flow) |
| -{ |
| - flow->flags |= FLOW_OFFLOAD_DYING; |
| -} |
| |
| -int nf_flow_snat_port(const struct flow_offload *flow, |
| - struct sk_buff *skb, unsigned int thoff, |
| - u8 protocol, enum flow_offload_tuple_dir dir); |
| -int nf_flow_dnat_port(const struct flow_offload *flow, |
| - struct sk_buff *skb, unsigned int thoff, |
| - u8 protocol, enum flow_offload_tuple_dir dir); |
| +int nf_flow_table_iterate(struct nf_flowtable *flow_table, |
| + void (*iter)(struct flow_offload *flow, void *data), |
| + void *data); |
| + |
| +void nf_flow_snat_port(const struct flow_offload *flow, |
| + struct sk_buff *skb, unsigned int thoff, |
| + u8 protocol, enum flow_offload_tuple_dir dir); |
| +void nf_flow_dnat_port(const struct flow_offload *flow, |
| + struct sk_buff *skb, unsigned int thoff, |
| + u8 protocol, enum flow_offload_tuple_dir dir); |
| |
| struct flow_ports { |
| __be16 source, dest; |
| @@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, |
| #define MODULE_ALIAS_NF_FLOWTABLE(family) \ |
| MODULE_ALIAS("nf-flowtable-" __stringify(family)) |
| |
| +void nf_flow_offload_add(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow); |
| +void nf_flow_offload_del(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow); |
| +void nf_flow_offload_stats(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow); |
| + |
| +void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); |
| +int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, |
| + struct net_device *dev, |
| + enum flow_block_command cmd); |
| +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule); |
| +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule); |
| + |
| +int nf_flow_table_offload_init(void); |
| +void nf_flow_table_offload_exit(void); |
| + |
| +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) |
| +{ |
| + __be16 proto; |
| + |
| + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + |
| + sizeof(struct pppoe_hdr))); |
| + switch (proto) { |
| + case htons(PPP_IP): |
| + return htons(ETH_P_IP); |
| + case htons(PPP_IPV6): |
| + return htons(ETH_P_IPV6); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| #endif /* _NF_FLOW_TABLE_H */ |
| diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h |
| index 806454e76..9e3963c8f 100644 |
| --- a/include/net/netns/conntrack.h |
| +++ b/include/net/netns/conntrack.h |
| @@ -27,6 +27,9 @@ struct nf_tcp_net { |
| int tcp_loose; |
| int tcp_be_liberal; |
| int tcp_max_retrans; |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + unsigned int offload_timeout; |
| +#endif |
| }; |
| |
| enum udp_conntrack { |
| @@ -37,6 +40,9 @@ enum udp_conntrack { |
| |
| struct nf_udp_net { |
| unsigned int timeouts[UDP_CT_MAX]; |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + unsigned int offload_timeout; |
| +#endif |
| }; |
| |
| struct nf_icmp_net { |
| diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h |
| index 336014bf8..ae698d11c 100644 |
| --- a/include/uapi/linux/netfilter/nf_conntrack_common.h |
| +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h |
| @@ -105,14 +105,19 @@ enum ip_conntrack_status { |
| IPS_OFFLOAD_BIT = 14, |
| IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), |
| |
| + /* Conntrack has been offloaded to hardware. */ |
| + IPS_HW_OFFLOAD_BIT = 15, |
| + IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT), |
| + |
| /* Be careful here, modifying these bits can make things messy, |
| * so don't let users modify them directly. |
| */ |
| IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | |
| IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | |
| - IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), |
| + IPS_SEQ_ADJUST | IPS_TEMPLATE | |
| + IPS_OFFLOAD | IPS_HW_OFFLOAD), |
| |
| - __IPS_MAX_BIT = 15, |
| + __IPS_MAX_BIT = 16, |
| }; |
| |
| /* Connection tracking event types */ |
| diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h |
| new file mode 100644 |
| index 000000000..5841bbe0e |
| --- /dev/null |
| +++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h |
| @@ -0,0 +1,17 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
| +#ifndef _XT_FLOWOFFLOAD_H |
| +#define _XT_FLOWOFFLOAD_H |
| + |
| +#include <linux/types.h> |
| + |
| +enum { |
| + XT_FLOWOFFLOAD_HW = 1 << 0, |
| + |
| + XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW |
| +}; |
| + |
| +struct xt_flowoffload_target_info { |
| + __u32 flags; |
| +}; |
| + |
| +#endif /* _XT_FLOWOFFLOAD_H */ |
| diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c |
| index 589615ec4..444ab5fae 100644 |
| --- a/net/8021q/vlan_dev.c |
| +++ b/net/8021q/vlan_dev.c |
| @@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev) |
| return real_dev->ifindex; |
| } |
| |
| +static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx, |
| + struct net_device_path *path) |
| +{ |
| + struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev); |
| + |
| + path->type = DEV_PATH_VLAN; |
| + path->encap.id = vlan->vlan_id; |
| + path->encap.proto = vlan->vlan_proto; |
| + path->dev = ctx->dev; |
| + ctx->dev = vlan->real_dev; |
| + if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan)) |
| + return -ENOSPC; |
| + |
| + ctx->vlan[ctx->num_vlans].id = vlan->vlan_id; |
| + ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto; |
| + ctx->num_vlans++; |
| + |
| + return 0; |
| +} |
| + |
| static const struct ethtool_ops vlan_ethtool_ops = { |
| .get_link_ksettings = vlan_ethtool_get_link_ksettings, |
| .get_drvinfo = vlan_ethtool_get_drvinfo, |
| @@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = { |
| #endif |
| .ndo_fix_features = vlan_dev_fix_features, |
| .ndo_get_iflink = vlan_dev_get_iflink, |
| + .ndo_fill_forward_path = vlan_dev_fill_forward_path, |
| }; |
| |
| static void vlan_dev_free(struct net_device *dev) |
| diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c |
| index 501f77f0f..0940b44cd 100644 |
| --- a/net/bridge/br_device.c |
| +++ b/net/bridge/br_device.c |
| @@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) |
| return br_del_if(br, slave_dev); |
| } |
| |
| +static int br_fill_forward_path(struct net_device_path_ctx *ctx, |
| + struct net_device_path *path) |
| +{ |
| + struct net_bridge_fdb_entry *f; |
| + struct net_bridge_port *dst; |
| + struct net_bridge *br; |
| + |
| + if (netif_is_bridge_port(ctx->dev)) |
| + return -1; |
| + |
| + br = netdev_priv(ctx->dev); |
| + |
| + br_vlan_fill_forward_path_pvid(br, ctx, path); |
| + |
| + f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id); |
| + if (!f || !f->dst) |
| + return -1; |
| + |
| + dst = READ_ONCE(f->dst); |
| + if (!dst) |
| + return -1; |
| + |
| + if (br_vlan_fill_forward_path_mode(br, dst, path)) |
| + return -1; |
| + |
| + path->type = DEV_PATH_BRIDGE; |
| + path->dev = dst->br->dev; |
| + ctx->dev = dst->dev; |
| + |
| + switch (path->bridge.vlan_mode) { |
| + case DEV_PATH_BR_VLAN_TAG: |
| + if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan)) |
| + return -ENOSPC; |
| + ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id; |
| + ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto; |
| + ctx->num_vlans++; |
| + break; |
| + case DEV_PATH_BR_VLAN_UNTAG_HW: |
| + case DEV_PATH_BR_VLAN_UNTAG: |
| + ctx->num_vlans--; |
| + break; |
| + case DEV_PATH_BR_VLAN_KEEP: |
| + break; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| static const struct ethtool_ops br_ethtool_ops = { |
| .get_drvinfo = br_getinfo, |
| .get_link = ethtool_op_get_link, |
| @@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = { |
| .ndo_bridge_setlink = br_setlink, |
| .ndo_bridge_dellink = br_dellink, |
| .ndo_features_check = passthru_features_check, |
| + .ndo_fill_forward_path = br_fill_forward_path, |
| }; |
| |
| static struct device_type br_type = { |
| diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h |
| index a736be8a1..4bd9e9b57 100644 |
| --- a/net/bridge/br_private.h |
| +++ b/net/bridge/br_private.h |
| @@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); |
| int br_vlan_bridge_event(struct net_device *dev, unsigned long event, |
| void *ptr); |
| |
| +void br_vlan_fill_forward_path_pvid(struct net_bridge *br, |
| + struct net_device_path_ctx *ctx, |
| + struct net_device_path *path); |
| +int br_vlan_fill_forward_path_mode(struct net_bridge *br, |
| + struct net_bridge_port *dst, |
| + struct net_device_path *path); |
| + |
| static inline struct net_bridge_vlan_group *br_vlan_group( |
| const struct net_bridge *br) |
| { |
| @@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p, |
| return 0; |
| } |
| |
| +static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br, |
| + struct net_device_path_ctx *ctx, |
| + struct net_device_path *path) |
| +{ |
| +} |
| + |
| +static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br, |
| + struct net_bridge_port *dst, |
| + struct net_device_path *path) |
| +{ |
| + return 0; |
| +} |
| + |
| static inline struct net_bridge_vlan_group *br_vlan_group( |
| const struct net_bridge *br) |
| { |
| diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c |
| index 9257292bd..bcfd16924 100644 |
| --- a/net/bridge/br_vlan.c |
| +++ b/net/bridge/br_vlan.c |
| @@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) |
| } |
| EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); |
| |
| +void br_vlan_fill_forward_path_pvid(struct net_bridge *br, |
| + struct net_device_path_ctx *ctx, |
| + struct net_device_path *path) |
| +{ |
| + struct net_bridge_vlan_group *vg; |
| + int idx = ctx->num_vlans - 1; |
| + u16 vid; |
| + |
| + path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP; |
| + |
| + if (!br_opt_get(br, BROPT_VLAN_ENABLED)) |
| + return; |
| + |
| + vg = br_vlan_group(br); |
| + |
| + if (idx >= 0 && |
| + ctx->vlan[idx].proto == br->vlan_proto) { |
| + vid = ctx->vlan[idx].id; |
| + } else { |
| + path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG; |
| + vid = br_get_pvid(vg); |
| + } |
| + |
| + path->bridge.vlan_id = vid; |
| + path->bridge.vlan_proto = br->vlan_proto; |
| +} |
| + |
| +int br_vlan_fill_forward_path_mode(struct net_bridge *br, |
| + struct net_bridge_port *dst, |
| + struct net_device_path *path) |
| +{ |
| + struct net_bridge_vlan_group *vg; |
| + struct net_bridge_vlan *v; |
| + |
| + if (!br_opt_get(br, BROPT_VLAN_ENABLED)) |
| + return 0; |
| + |
| + vg = nbp_vlan_group_rcu(dst); |
| + v = br_vlan_find(vg, path->bridge.vlan_id); |
| + if (!v || !br_vlan_should_use(v)) |
| + return -EINVAL; |
| + |
| + if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED)) |
| + return 0; |
| + |
| + if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG) |
| + path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP; |
| + else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) |
| + path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW; |
| + else |
| + path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG; |
| + |
| + return 0; |
| +} |
| + |
| int br_vlan_get_info(const struct net_device *dev, u16 vid, |
| struct bridge_vlan_info *p_vinfo) |
| { |
| diff --git a/net/core/dev.c b/net/core/dev.c |
| index fe2c856b9..4f0edb218 100644 |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) |
| } |
| EXPORT_SYMBOL_GPL(dev_fill_metadata_dst); |
| |
| +static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack) |
| +{ |
| + int k = stack->num_paths++; |
| + |
| + if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX)) |
| + return NULL; |
| + |
| + return &stack->path[k]; |
| +} |
| + |
| +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, |
| + struct net_device_path_stack *stack) |
| +{ |
| + const struct net_device *last_dev; |
| + struct net_device_path_ctx ctx = { |
| + .dev = dev, |
| + }; |
| + struct net_device_path *path; |
| + int ret = 0; |
| + |
| + memcpy(ctx.daddr, daddr, sizeof(ctx.daddr)); |
| + stack->num_paths = 0; |
| + while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { |
| + last_dev = ctx.dev; |
| + path = dev_fwd_path(stack); |
| + if (!path) |
| + return -1; |
| + |
| + memset(path, 0, sizeof(struct net_device_path)); |
| + ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path); |
| + if (ret < 0) |
| + return -1; |
| + |
| + if (WARN_ON_ONCE(last_dev == ctx.dev)) |
| + return -1; |
| + } |
| + path = dev_fwd_path(stack); |
| + if (!path) |
| + return -1; |
| + path->type = DEV_PATH_ETHERNET; |
| + path->dev = ctx.dev; |
| + |
| + return ret; |
| +} |
| +EXPORT_SYMBOL_GPL(dev_fill_forward_path); |
| + |
| /** |
| * __dev_get_by_name - find a device by its name |
| * @net: the applicable net namespace |
| diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c |
| index ca80f8699..35a1249a9 100644 |
| --- a/net/dsa/dsa.c |
| +++ b/net/dsa/dsa.c |
| @@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev, |
| } |
| EXPORT_SYMBOL_GPL(call_dsa_notifiers); |
| |
| +struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) |
| +{ |
| + if (!netdev || !dsa_slave_dev_check(netdev)) |
| + return ERR_PTR(-ENODEV); |
| + |
| + return dsa_slave_to_port(netdev); |
| +} |
| +EXPORT_SYMBOL_GPL(dsa_port_from_netdev); |
| + |
| static int __init dsa_init_module(void) |
| { |
| int rc; |
| diff --git a/net/dsa/slave.c b/net/dsa/slave.c |
| index 036fda317..2dfaa1eac 100644 |
| --- a/net/dsa/slave.c |
| +++ b/net/dsa/slave.c |
| @@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, |
| } |
| } |
| |
| +static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port, |
| + void *type_data) |
| +{ |
| + struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp; |
| + struct net_device *master = cpu_dp->master; |
| + |
| + if (!master->netdev_ops->ndo_setup_tc) |
| + return -EOPNOTSUPP; |
| + |
| + return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data); |
| +} |
| + |
| static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, |
| void *type_data) |
| { |
| struct dsa_port *dp = dsa_slave_to_port(dev); |
| struct dsa_switch *ds = dp->ds; |
| |
| - if (type == TC_SETUP_BLOCK) |
| + switch (type) { |
| + case TC_SETUP_BLOCK: |
| return dsa_slave_setup_tc_block(dev, type_data); |
| + case TC_SETUP_FT: |
| + return dsa_slave_setup_ft_block(ds, dp->index, type_data); |
| + default: |
| + break; |
| + } |
| |
| if (!ds->ops->port_setup_tc) |
| return -EOPNOTSUPP; |
| @@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) |
| return dp->ds->devlink ? &dp->devlink_port : NULL; |
| } |
| |
| +static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx, |
| + struct net_device_path *path) |
| +{ |
| + struct dsa_port *dp = dsa_slave_to_port(ctx->dev); |
| + struct dsa_port *cpu_dp = dp->cpu_dp; |
| + |
| + path->dev = ctx->dev; |
| + path->type = DEV_PATH_DSA; |
| + path->dsa.proto = cpu_dp->tag_ops->proto; |
| + path->dsa.port = dp->index; |
| + ctx->dev = cpu_dp->master; |
| + |
| + return 0; |
| +} |
| + |
| static const struct net_device_ops dsa_slave_netdev_ops = { |
| .ndo_open = dsa_slave_open, |
| .ndo_stop = dsa_slave_close, |
| @@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { |
| .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, |
| .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, |
| .ndo_get_devlink_port = dsa_slave_get_devlink_port, |
| + .ndo_fill_forward_path = dsa_slave_fill_forward_path, |
| }; |
| |
| static struct device_type dsa_type = { |
| @@ -1497,7 +1529,8 @@ void dsa_slave_destroy(struct net_device *slave_dev) |
| bool dsa_slave_dev_check(const struct net_device *dev) |
| { |
| return dev->netdev_ops == &dsa_slave_netdev_ops; |
| } |
| +EXPORT_SYMBOL_GPL(dsa_slave_dev_check); |
| |
| static int dsa_slave_changeupper(struct net_device *dev, |
| struct netdev_notifier_changeupper_info *info) |
| diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig |
| index f17b40211..803b92e4c 100644 |
| --- a/net/ipv4/netfilter/Kconfig |
| +++ b/net/ipv4/netfilter/Kconfig |
| @@ -56,8 +56,6 @@ config NF_TABLES_ARP |
| help |
| This option enables the ARP support for nf_tables. |
| |
| -endif # NF_TABLES |
| - |
| config NF_FLOW_TABLE_IPV4 |
| tristate "Netfilter flow table IPv4 module" |
| depends on NF_FLOW_TABLE |
| @@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4 |
| |
| To compile it as a module, choose M here. |
| |
| +endif # NF_TABLES |
| + |
| config NF_DUP_IPV4 |
| tristate "Netfilter IPv4 packet duplication to alternate destination" |
| depends on !NF_CONNTRACK || NF_CONNTRACK |
| diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c |
| index 5585e3a94..bb76f6061 100644 |
| --- a/net/ipv6/ip6_output.c |
| +++ b/net/ipv6/ip6_output.c |
| @@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb) |
| } |
| } |
| |
| - mtu = ip6_dst_mtu_forward(dst); |
| + mtu = ip6_dst_mtu_maybe_forward(dst, true); |
| if (mtu < IPV6_MIN_MTU) |
| mtu = IPV6_MIN_MTU; |
| |
| diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig |
| index 69443e9a3..0b481d236 100644 |
| --- a/net/ipv6/netfilter/Kconfig |
| +++ b/net/ipv6/netfilter/Kconfig |
| @@ -45,7 +45,6 @@ config NFT_FIB_IPV6 |
| multicast or blackhole. |
| |
| endif # NF_TABLES_IPV6 |
| -endif # NF_TABLES |
| |
| config NF_FLOW_TABLE_IPV6 |
| tristate "Netfilter flow table IPv6 module" |
| @@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6 |
| |
| To compile it as a module, choose M here. |
| |
| +endif # NF_TABLES |
| + |
| config NF_DUP_IPV6 |
| tristate "Netfilter IPv6 packet duplication to alternate destination" |
| depends on !NF_CONNTRACK || NF_CONNTRACK |
| diff --git a/net/ipv6/route.c b/net/ipv6/route.c |
| index 98aaf0b79..2b357ac71 100644 |
| --- a/net/ipv6/route.c |
| +++ b/net/ipv6/route.c |
| @@ -83,7 +83,7 @@ enum rt6_nud_state { |
| |
| static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); |
| static unsigned int ip6_default_advmss(const struct dst_entry *dst); |
| -static unsigned int ip6_mtu(const struct dst_entry *dst); |
| +static unsigned int ip6_mtu(const struct dst_entry *dst); |
| static struct dst_entry *ip6_negative_advice(struct dst_entry *); |
| static void ip6_dst_destroy(struct dst_entry *); |
| static void ip6_dst_ifdown(struct dst_entry *, |
| @@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) |
| |
| static unsigned int ip6_mtu(const struct dst_entry *dst) |
| { |
| - struct inet6_dev *idev; |
| - unsigned int mtu; |
| - |
| - mtu = dst_metric_raw(dst, RTAX_MTU); |
| - if (mtu) |
| - goto out; |
| - |
| - mtu = IPV6_MIN_MTU; |
| - |
| - rcu_read_lock(); |
| - idev = __in6_dev_get(dst->dev); |
| - if (idev) |
| - mtu = idev->cnf.mtu6; |
| - rcu_read_unlock(); |
| - |
| -out: |
| - mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); |
| - |
| - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); |
| + return ip6_dst_mtu_maybe_forward(dst, false); |
| } |
| |
| /* MTU selection: |
| diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig |
| index b967763f5..c040e713a 100644 |
| --- a/net/netfilter/Kconfig |
| +++ b/net/netfilter/Kconfig |
| @@ -690,8 +690,6 @@ config NFT_FIB_NETDEV |
| |
| endif # NF_TABLES_NETDEV |
| |
| -endif # NF_TABLES |
| - |
| config NF_FLOW_TABLE_INET |
| tristate "Netfilter flow table mixed IPv4/IPv6 module" |
| depends on NF_FLOW_TABLE |
| @@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET |
| |
| To compile it as a module, choose M here. |
| |
| +endif # NF_TABLES |
| + |
| config NF_FLOW_TABLE |
| tristate "Netfilter flow table module" |
| depends on NETFILTER_INGRESS |
| depends on NF_CONNTRACK |
| - depends on NF_TABLES |
| help |
| This option adds the flow table core infrastructure. |
| |
| @@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK |
| depends on NETFILTER_ADVANCED |
| select NETFILTER_XT_TARGET_CT |
| |
| +config NETFILTER_XT_TARGET_FLOWOFFLOAD |
| + tristate '"FLOWOFFLOAD" target support' |
| + depends on NF_FLOW_TABLE |
| + depends on NETFILTER_INGRESS |
| + help |
| + This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload |
| + module to speed up processing of packets by bypassing the usual |
| + netfilter chains |
| + |
| config NETFILTER_XT_TARGET_RATEEST |
| tristate '"RATEEST" target support' |
| depends on NETFILTER_ADVANCED |
| diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile |
| index 4fc075b61..d93a121bc 100644 |
| --- a/net/netfilter/Makefile |
| +++ b/net/netfilter/Makefile |
| @@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o |
| |
| # flow table infrastructure |
| obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o |
| -nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o |
| +nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ |
| + nf_flow_table_offload.o |
| |
| obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o |
| |
| @@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o |
| obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o |
| obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o |
| obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o |
| +obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o |
| obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o |
| obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o |
| obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o |
| diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c |
| index f6ab6f484..f689e19d8 100644 |
| --- a/net/netfilter/nf_conntrack_core.c |
| +++ b/net/netfilter/nf_conntrack_core.c |
| @@ -864,9 +864,8 @@ out: |
| } |
| EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); |
| |
| -static inline void nf_ct_acct_update(struct nf_conn *ct, |
| - enum ip_conntrack_info ctinfo, |
| - unsigned int len) |
| +void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets, |
| + unsigned int bytes) |
| { |
| struct nf_conn_acct *acct; |
| |
| @@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, |
| if (acct) { |
| struct nf_conn_counter *counter = acct->counter; |
| |
| - atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); |
| - atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); |
| + atomic64_add(packets, &counter[dir].packets); |
| + atomic64_add(bytes, &counter[dir].bytes); |
| } |
| } |
| +EXPORT_SYMBOL_GPL(nf_ct_acct_add); |
| |
| static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, |
| const struct nf_conn *loser_ct) |
| @@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, |
| |
| /* u32 should be fine since we must have seen one packet. */ |
| bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); |
| - nf_ct_acct_update(ct, ctinfo, bytes); |
| + nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes); |
| } |
| } |
| |
| @@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work) |
| |
| tmp = nf_ct_tuplehash_to_ctrack(h); |
| |
| - if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) |
| + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { |
| + nf_ct_offload_timeout(tmp); |
| continue; |
| + } |
| |
| if (nf_ct_is_expired(tmp)) { |
| nf_ct_gc_expired(tmp); |
| @@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, |
| WRITE_ONCE(ct->timeout, extra_jiffies); |
| acct: |
| if (do_acct) |
| - nf_ct_acct_update(ct, ctinfo, skb->len); |
| + nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len); |
| } |
| EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); |
| |
| @@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct, |
| enum ip_conntrack_info ctinfo, |
| const struct sk_buff *skb) |
| { |
| - nf_ct_acct_update(ct, ctinfo, skb->len); |
| + nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len); |
| |
| return nf_ct_delete(ct, 0, 0); |
| } |
| diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c |
| index 7204f0366..3742bae21 100644 |
| --- a/net/netfilter/nf_conntrack_proto_tcp.c |
| +++ b/net/netfilter/nf_conntrack_proto_tcp.c |
| @@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net) |
| tn->tcp_loose = nf_ct_tcp_loose; |
| tn->tcp_be_liberal = nf_ct_tcp_be_liberal; |
| tn->tcp_max_retrans = nf_ct_tcp_max_retrans; |
| + |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + tn->offload_timeout = 30 * HZ; |
| +#endif |
| } |
| |
| const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = |
| diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c |
| index e3a2d018f..a1579d6c3 100644 |
| --- a/net/netfilter/nf_conntrack_proto_udp.c |
| +++ b/net/netfilter/nf_conntrack_proto_udp.c |
| @@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net) |
| |
| for (i = 0; i < UDP_CT_MAX; i++) |
| un->timeouts[i] = udp_timeouts[i]; |
| + |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + un->offload_timeout = 30 * HZ; |
| +#endif |
| } |
| |
| const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = |
| diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c |
| index 9c6259c28..10d9f93ce 100644 |
| --- a/net/netfilter/nf_conntrack_standalone.c |
| +++ b/net/netfilter/nf_conntrack_standalone.c |
| @@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v) |
| if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) |
| goto release; |
| |
| - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
| + if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status)) |
| + seq_puts(s, "[HW_OFFLOAD] "); |
| + else if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
| seq_puts(s, "[OFFLOAD] "); |
| else if (test_bit(IPS_ASSURED_BIT, &ct->status)) |
| seq_puts(s, "[ASSURED] "); |
| @@ -620,11 +622,17 @@ enum nf_ct_sysctl_index { |
| NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE, |
| NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS, |
| NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD, |
| +#endif |
| NF_SYSCTL_CT_PROTO_TCP_LOOSE, |
| NF_SYSCTL_CT_PROTO_TCP_LIBERAL, |
| NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, |
| NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, |
| NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD, |
| +#endif |
| NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, |
| NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, |
| #ifdef CONFIG_NF_CT_PROTO_SCTP |
| @@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = { |
| .mode = 0644, |
| .proc_handler = proc_dointvec_jiffies, |
| }, |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = { |
| + .procname = "nf_flowtable_tcp_timeout", |
| + .maxlen = sizeof(unsigned int), |
| + .mode = 0644, |
| + .proc_handler = proc_dointvec_jiffies, |
| + }, |
| +#endif |
| [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { |
| .procname = "nf_conntrack_tcp_loose", |
| .maxlen = sizeof(int), |
| @@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = { |
| .mode = 0644, |
| .proc_handler = proc_dointvec_jiffies, |
| }, |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = { |
| + .procname = "nf_flowtable_udp_timeout", |
| + .maxlen = sizeof(unsigned int), |
| + .mode = 0644, |
| + .proc_handler = proc_dointvec_jiffies, |
| + }, |
| +#endif |
| [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { |
| .procname = "nf_conntrack_icmp_timeout", |
| .maxlen = sizeof(unsigned int), |
| @@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, |
| XASSIGN(LIBERAL, &tn->tcp_be_liberal); |
| XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); |
| #undef XASSIGN |
| + |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout; |
| +#endif |
| + |
| } |
| |
| static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, |
| @@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) |
| table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout; |
| table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED]; |
| table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; |
| +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) |
| + table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout; |
| +#endif |
| |
| nf_conntrack_standalone_init_tcp_sysctl(net, table); |
| nf_conntrack_standalone_init_sctp_sysctl(net, table); |
| diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c |
| index f212cec0..10365581 100644 |
| --- a/net/netfilter/nf_flow_table_core.c |
| +++ b/net/netfilter/nf_flow_table_core.c |
| @@ -7,43 +7,21 @@ |
| #include <linux/netdevice.h> |
| #include <net/ip.h> |
| #include <net/ip6_route.h> |
| -#include <net/netfilter/nf_tables.h> |
| #include <net/netfilter/nf_flow_table.h> |
| #include <net/netfilter/nf_conntrack.h> |
| #include <net/netfilter/nf_conntrack_core.h> |
| #include <net/netfilter/nf_conntrack_l4proto.h> |
| #include <net/netfilter/nf_conntrack_tuple.h> |
| |
| -struct flow_offload_entry { |
| - struct flow_offload flow; |
| - struct nf_conn *ct; |
| - struct rcu_head rcu_head; |
| -}; |
| - |
| static DEFINE_MUTEX(flowtable_lock); |
| static LIST_HEAD(flowtables); |
| |
| -static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) |
| -{ |
| - const struct rt6_info *rt; |
| - |
| - if (flow_tuple->l3proto == NFPROTO_IPV6) { |
| - rt = (const struct rt6_info *)flow_tuple->dst_cache; |
| - return rt6_get_cookie(rt); |
| - } |
| - |
| - return 0; |
| -} |
| - |
| static void |
| -flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, |
| - struct nf_flow_route *route, |
| +flow_offload_fill_dir(struct flow_offload *flow, |
| enum flow_offload_tuple_dir dir) |
| { |
| struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; |
| - struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; |
| - struct dst_entry *other_dst = route->tuple[!dir].dst; |
| - struct dst_entry *dst = route->tuple[dir].dst; |
| + struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; |
| |
| ft->dir = dir; |
| |
| @@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, |
| case NFPROTO_IPV4: |
| ft->src_v4 = ctt->src.u3.in; |
| ft->dst_v4 = ctt->dst.u3.in; |
| - ft->mtu = ip_dst_mtu_maybe_forward(dst, true); |
| break; |
| case NFPROTO_IPV6: |
| ft->src_v6 = ctt->src.u3.in6; |
| ft->dst_v6 = ctt->dst.u3.in6; |
| - ft->mtu = ip6_dst_mtu_forward(dst); |
| break; |
| } |
| |
| @@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, |
| ft->l4proto = ctt->dst.protonum; |
| ft->src_port = ctt->src.u.tcp.port; |
| ft->dst_port = ctt->dst.u.tcp.port; |
| - |
| - ft->iifidx = other_dst->dev->ifindex; |
| - ft->dst_cache = dst; |
| - ft->dst_cookie = flow_offload_dst_cookie(ft); |
| } |
| |
| -struct flow_offload * |
| -flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) |
| +struct flow_offload *flow_offload_alloc(struct nf_conn *ct) |
| { |
| - struct flow_offload_entry *entry; |
| struct flow_offload *flow; |
| |
| if (unlikely(nf_ct_is_dying(ct) || |
| !atomic_inc_not_zero(&ct->ct_general.use))) |
| return NULL; |
| |
| - entry = kzalloc(sizeof(*entry), GFP_ATOMIC); |
| - if (!entry) |
| + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); |
| + if (!flow) |
| goto err_ct_refcnt; |
| |
| - flow = &entry->flow; |
| - |
| - if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) |
| - goto err_dst_cache_original; |
| - |
| - if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) |
| - goto err_dst_cache_reply; |
| + flow->ct = ct; |
| |
| - entry->ct = ct; |
| - |
| - flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL); |
| - flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY); |
| + flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); |
| + flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); |
| |
| if (ct->status & IPS_SRC_NAT) |
| - flow->flags |= FLOW_OFFLOAD_SNAT; |
| + __set_bit(NF_FLOW_SNAT, &flow->flags); |
| if (ct->status & IPS_DST_NAT) |
| - flow->flags |= FLOW_OFFLOAD_DNAT; |
| + __set_bit(NF_FLOW_DNAT, &flow->flags); |
| |
| return flow; |
| |
| -err_dst_cache_reply: |
| - dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); |
| -err_dst_cache_original: |
| - kfree(entry); |
| err_ct_refcnt: |
| nf_ct_put(ct); |
| |
| @@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) |
| } |
| EXPORT_SYMBOL_GPL(flow_offload_alloc); |
| |
| -static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) |
| +static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) |
| { |
| - tcp->state = TCP_CONNTRACK_ESTABLISHED; |
| - tcp->seen[0].td_maxwin = 0; |
| - tcp->seen[1].td_maxwin = 0; |
| + const struct rt6_info *rt; |
| + |
| + if (flow_tuple->l3proto == NFPROTO_IPV6) { |
| + rt = (const struct rt6_info *)flow_tuple->dst_cache; |
| + return rt6_get_cookie(rt); |
| + } |
| + |
| + return 0; |
| } |
| |
| -#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) |
| -#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) |
| +static int flow_offload_fill_route(struct flow_offload *flow, |
| + const struct nf_flow_route *route, |
| + enum flow_offload_tuple_dir dir) |
| +{ |
| + struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; |
| + struct dst_entry *dst = route->tuple[dir].dst; |
| + int i, j = 0; |
| + |
| + switch (flow_tuple->l3proto) { |
| + case NFPROTO_IPV4: |
| + flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); |
| + break; |
| + case NFPROTO_IPV6: |
| + flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); |
| + break; |
| + } |
| + |
| + flow_tuple->iifidx = route->tuple[dir].in.ifindex; |
| + for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) { |
| + flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id; |
| + flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto; |
| + if (route->tuple[dir].in.ingress_vlans & BIT(i)) |
| + flow_tuple->in_vlan_ingress |= BIT(j); |
| + j++; |
| + } |
| + flow_tuple->encap_num = route->tuple[dir].in.num_encaps; |
| + |
| + switch (route->tuple[dir].xmit_type) { |
| + case FLOW_OFFLOAD_XMIT_DIRECT: |
| + memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest, |
| + ETH_ALEN); |
| + memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source, |
| + ETH_ALEN); |
| + flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; |
| + flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; |
| + break; |
| + case FLOW_OFFLOAD_XMIT_XFRM: |
| + case FLOW_OFFLOAD_XMIT_NEIGH: |
| + if (!dst_hold_safe(route->tuple[dir].dst)) |
| + return -1; |
| + |
| + flow_tuple->dst_cache = dst; |
| + flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); |
| + break; |
| + default: |
| + WARN_ON_ONCE(1); |
| + break; |
| + } |
| + flow_tuple->xmit_type = route->tuple[dir].xmit_type; |
| + |
| + return 0; |
| +} |
| + |
| +static void nft_flow_dst_release(struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir) |
| +{ |
| + if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || |
| + flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) |
| + dst_release(flow->tuplehash[dir].tuple.dst_cache); |
| +} |
| + |
| +int flow_offload_route_init(struct flow_offload *flow, |
| + const struct nf_flow_route *route) |
| +{ |
| + int err; |
| + |
| + err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); |
| + if (err < 0) |
| + return err; |
| + |
| + err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); |
| + if (err < 0) |
| + goto err_route_reply; |
| + |
| + flow->type = NF_FLOW_OFFLOAD_ROUTE; |
| + |
| + return 0; |
| + |
| +err_route_reply: |
| + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); |
| + |
| + return err; |
| +} |
| +EXPORT_SYMBOL_GPL(flow_offload_route_init); |
| |
| -static inline __s32 nf_flow_timeout_delta(unsigned int timeout) |
| +static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) |
| { |
| - return (__s32)(timeout - (u32)jiffies); |
| + tcp->state = TCP_CONNTRACK_ESTABLISHED; |
| + tcp->seen[0].td_maxwin = 0; |
| + tcp->seen[1].td_maxwin = 0; |
| } |
| |
| static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) |
| { |
| - const struct nf_conntrack_l4proto *l4proto; |
| + struct net *net = nf_ct_net(ct); |
| int l4num = nf_ct_protonum(ct); |
| - unsigned int timeout; |
| + s32 timeout; |
| |
| - l4proto = nf_ct_l4proto_find(l4num); |
| - if (!l4proto) |
| - return; |
| + if (l4num == IPPROTO_TCP) { |
| + struct nf_tcp_net *tn = nf_tcp_pernet(net); |
| |
| - if (l4num == IPPROTO_TCP) |
| - timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT; |
| - else if (l4num == IPPROTO_UDP) |
| - timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT; |
| - else |
| + timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; |
| + timeout -= tn->offload_timeout; |
| + } else if (l4num == IPPROTO_UDP) { |
| + struct nf_udp_net *tn = nf_udp_pernet(net); |
| + |
| + timeout = tn->timeouts[UDP_CT_REPLIED]; |
| + timeout -= tn->offload_timeout; |
| + } else { |
| return; |
| + } |
| + |
| + if (timeout < 0) |
| + timeout = 0; |
| |
| - if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) |
| - ct->timeout = nfct_time_stamp + timeout; |
| + if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) |
| + WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); |
| } |
| |
| static void flow_offload_fixup_ct_state(struct nf_conn *ct) |
| @@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct) |
| flow_offload_fixup_ct_timeout(ct); |
| } |
| |
| -void flow_offload_free(struct flow_offload *flow) |
| +static void flow_offload_route_release(struct flow_offload *flow) |
| { |
| - struct flow_offload_entry *e; |
| + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); |
| + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY); |
| +} |
| |
| - dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); |
| - dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); |
| - e = container_of(flow, struct flow_offload_entry, flow); |
| - if (flow->flags & FLOW_OFFLOAD_DYING) |
| - nf_ct_delete(e->ct, 0, 0); |
| - nf_ct_put(e->ct); |
| - kfree_rcu(e, rcu_head); |
| +void flow_offload_free(struct flow_offload *flow) |
| +{ |
| + switch (flow->type) { |
| + case NF_FLOW_OFFLOAD_ROUTE: |
| + flow_offload_route_release(flow); |
| + break; |
| + default: |
| + break; |
| + } |
| + nf_ct_put(flow->ct); |
| + kfree_rcu(flow, rcu_head); |
| } |
| EXPORT_SYMBOL_GPL(flow_offload_free); |
| |
| @@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed) |
| { |
| const struct flow_offload_tuple *tuple = data; |
| |
| - return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); |
| + return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); |
| } |
| |
| static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) |
| { |
| const struct flow_offload_tuple_rhash *tuplehash = data; |
| |
| - return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); |
| + return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); |
| } |
| |
| static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, |
| @@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, |
| const struct flow_offload_tuple *tuple = arg->key; |
| const struct flow_offload_tuple_rhash *x = ptr; |
| |
| - if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) |
| + if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) |
| return 1; |
| |
| return 0; |
| @@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = { |
| .automatic_shrinking = true, |
| }; |
| |
| -#define DAY (86400 * HZ) |
| - |
| -/* Set an arbitrary timeout large enough not to ever expire, this save |
| - * us a check for the IPS_OFFLOAD_BIT from the packet path via |
| - * nf_ct_is_expired(). |
| - */ |
| -static void nf_ct_offload_timeout(struct flow_offload *flow) |
| +unsigned long flow_offload_get_timeout(struct flow_offload *flow) |
| { |
| - struct flow_offload_entry *entry; |
| - struct nf_conn *ct; |
| + unsigned long timeout = NF_FLOW_TIMEOUT; |
| + struct net *net = nf_ct_net(flow->ct); |
| + int l4num = nf_ct_protonum(flow->ct); |
| |
| - entry = container_of(flow, struct flow_offload_entry, flow); |
| - ct = entry->ct; |
| + if (l4num == IPPROTO_TCP) { |
| + struct nf_tcp_net *tn = nf_tcp_pernet(net); |
| |
| - if (nf_ct_expires(ct) < DAY / 2) |
| - ct->timeout = nfct_time_stamp + DAY; |
| + timeout = tn->offload_timeout; |
| + } else if (l4num == IPPROTO_UDP) { |
| + struct nf_udp_net *tn = nf_udp_pernet(net); |
| + |
| + timeout = tn->offload_timeout; |
| + } |
| + |
| + return timeout; |
| } |
| |
| int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) |
| { |
| int err; |
| |
| - nf_ct_offload_timeout(flow); |
| - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; |
| + flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); |
| |
| err = rhashtable_insert_fast(&flow_table->rhashtable, |
| &flow->tuplehash[0].node, |
| @@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) |
| return err; |
| } |
| |
| + nf_ct_offload_timeout(flow->ct); |
| + |
| + if (nf_flowtable_hw_offload(flow_table)) { |
| + __set_bit(NF_FLOW_HW, &flow->flags); |
| + nf_flow_offload_add(flow_table, flow); |
| + } |
| + |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(flow_offload_add); |
| |
| +void flow_offload_refresh(struct nf_flowtable *flow_table, |
| + struct flow_offload *flow) |
| +{ |
| + u32 timeout; |
| + |
| + timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); |
| + if (timeout - READ_ONCE(flow->timeout) > HZ) |
| + WRITE_ONCE(flow->timeout, timeout); |
| + else |
| + return; |
| + |
| + if (likely(!nf_flowtable_hw_offload(flow_table))) |
| + return; |
| + |
| + nf_flow_offload_add(flow_table, flow); |
| +} |
| +EXPORT_SYMBOL_GPL(flow_offload_refresh); |
| + |
| static inline bool nf_flow_has_expired(const struct flow_offload *flow) |
| { |
| return nf_flow_timeout_delta(flow->timeout) <= 0; |
| @@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow) |
| static void flow_offload_del(struct nf_flowtable *flow_table, |
| struct flow_offload *flow) |
| { |
| - struct flow_offload_entry *e; |
| - |
| rhashtable_remove_fast(&flow_table->rhashtable, |
| &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, |
| nf_flow_offload_rhash_params); |
| @@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table, |
| &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, |
| nf_flow_offload_rhash_params); |
| |
| - e = container_of(flow, struct flow_offload_entry, flow); |
| - clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); |
| + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); |
| |
| if (nf_flow_has_expired(flow)) |
| - flow_offload_fixup_ct(e->ct); |
| - else if (flow->flags & FLOW_OFFLOAD_TEARDOWN) |
| - flow_offload_fixup_ct_timeout(e->ct); |
| - |
| - if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN)) |
| - flow_offload_fixup_ct_state(e->ct); |
| + flow_offload_fixup_ct(flow->ct); |
| + else |
| + flow_offload_fixup_ct_timeout(flow->ct); |
| |
| flow_offload_free(flow); |
| } |
| |
| void flow_offload_teardown(struct flow_offload *flow) |
| { |
| - struct flow_offload_entry *e; |
| - |
| - flow->flags |= FLOW_OFFLOAD_TEARDOWN; |
| + set_bit(NF_FLOW_TEARDOWN, &flow->flags); |
| |
| - e = container_of(flow, struct flow_offload_entry, flow); |
| - flow_offload_fixup_ct_state(e->ct); |
| + flow_offload_fixup_ct_state(flow->ct); |
| } |
| EXPORT_SYMBOL_GPL(flow_offload_teardown); |
| |
| @@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table, |
| { |
| struct flow_offload_tuple_rhash *tuplehash; |
| struct flow_offload *flow; |
| - struct flow_offload_entry *e; |
| int dir; |
| |
| tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, |
| @@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table, |
| |
| dir = tuplehash->tuple.dir; |
| flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| - if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)) |
| + if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) |
| return NULL; |
| |
| - e = container_of(flow, struct flow_offload_entry, flow); |
| - if (unlikely(nf_ct_is_dying(e->ct))) |
| + if (unlikely(nf_ct_is_dying(flow->ct))) |
| return NULL; |
| |
| return tuplehash; |
| } |
| EXPORT_SYMBOL_GPL(flow_offload_lookup); |
| |
| -static int |
| -nf_flow_table_iterate(struct nf_flowtable *flow_table, |
| +int nf_flow_table_iterate(struct nf_flowtable *flow_table, |
| void (*iter)(struct flow_offload *flow, void *data), |
| void *data) |
| { |
| @@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, |
| rhashtable_walk_start(&hti); |
| |
| while ((tuplehash = rhashtable_walk_next(&hti))) { |
| - |
| if (IS_ERR(tuplehash)) { |
| if (PTR_ERR(tuplehash) != -EAGAIN) { |
| err = PTR_ERR(tuplehash); |
| @@ -359,23 +430,49 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, |
| |
| return err; |
| } |
| +EXPORT_SYMBOL_GPL(nf_flow_table_iterate); |
| |
| -static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) |
| +static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple) |
| { |
| - struct nf_flowtable *flow_table = data; |
| - struct flow_offload_entry *e; |
| - bool teardown; |
| + struct dst_entry *dst; |
| |
| - e = container_of(flow, struct flow_offload_entry, flow); |
| + if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || |
| + tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) { |
| + dst = tuple->dst_cache; |
| + if (!dst_check(dst, tuple->dst_cookie)) |
| + return true; |
| + } |
| |
| - teardown = flow->flags & (FLOW_OFFLOAD_DYING | |
| - FLOW_OFFLOAD_TEARDOWN); |
| + return false; |
| +} |
| |
| - if (!teardown) |
| - nf_ct_offload_timeout(flow); |
| +static bool nf_flow_has_stale_dst(struct flow_offload *flow) |
| +{ |
| + return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) || |
| + flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple); |
| +} |
| |
| - if (nf_flow_has_expired(flow) || teardown) |
| - flow_offload_del(flow_table, flow); |
| +static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) |
| +{ |
| + struct nf_flowtable *flow_table = data; |
| + |
| + if (nf_flow_has_expired(flow) || |
| + nf_ct_is_dying(flow->ct) || |
| + nf_flow_has_stale_dst(flow)) |
| + set_bit(NF_FLOW_TEARDOWN, &flow->flags); |
| + |
| + if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { |
| + if (test_bit(NF_FLOW_HW, &flow->flags)) { |
| + if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) |
| + nf_flow_offload_del(flow_table, flow); |
| + else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) |
| + flow_offload_del(flow_table, flow); |
| + } else { |
| + flow_offload_del(flow_table, flow); |
| + } |
| + } else if (test_bit(NF_FLOW_HW, &flow->flags)) { |
| + nf_flow_offload_stats(flow_table, flow); |
| + } |
| } |
| |
| static void nf_flow_offload_work_gc(struct work_struct *work) |
| @@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work) |
| queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); |
| } |
| |
| -static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, |
| - __be16 port, __be16 new_port) |
| +static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, |
| + __be16 port, __be16 new_port) |
| { |
| struct tcphdr *tcph; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*tcph))) |
| - return -1; |
| - |
| tcph = (void *)(skb_network_header(skb) + thoff); |
| inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, |
| - __be16 port, __be16 new_port) |
| +static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, |
| + __be16 port, __be16 new_port) |
| { |
| struct udphdr *udph; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*udph))) |
| - return -1; |
| - |
| udph = (void *)(skb_network_header(skb) + thoff); |
| if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { |
| inet_proto_csum_replace2(&udph->check, skb, port, |
| @@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, |
| if (!udph->check) |
| udph->check = CSUM_MANGLED_0; |
| } |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, |
| - u8 protocol, __be16 port, __be16 new_port) |
| +static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, |
| + u8 protocol, __be16 port, __be16 new_port) |
| { |
| switch (protocol) { |
| case IPPROTO_TCP: |
| - if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) |
| - return NF_DROP; |
| + nf_flow_nat_port_tcp(skb, thoff, port, new_port); |
| break; |
| case IPPROTO_UDP: |
| - if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) |
| - return NF_DROP; |
| + nf_flow_nat_port_udp(skb, thoff, port, new_port); |
| break; |
| } |
| - |
| - return 0; |
| } |
| |
| -int nf_flow_snat_port(const struct flow_offload *flow, |
| - struct sk_buff *skb, unsigned int thoff, |
| - u8 protocol, enum flow_offload_tuple_dir dir) |
| +void nf_flow_snat_port(const struct flow_offload *flow, |
| + struct sk_buff *skb, unsigned int thoff, |
| + u8 protocol, enum flow_offload_tuple_dir dir) |
| { |
| struct flow_ports *hdr; |
| __be16 port, new_port; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*hdr))) |
| - return -1; |
| - |
| hdr = (void *)(skb_network_header(skb) + thoff); |
| |
| switch (dir) { |
| @@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow, |
| new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; |
| hdr->dest = new_port; |
| break; |
| - default: |
| - return -1; |
| } |
| |
| - return nf_flow_nat_port(skb, thoff, protocol, port, new_port); |
| + nf_flow_nat_port(skb, thoff, protocol, port, new_port); |
| } |
| EXPORT_SYMBOL_GPL(nf_flow_snat_port); |
| |
| -int nf_flow_dnat_port(const struct flow_offload *flow, |
| - struct sk_buff *skb, unsigned int thoff, |
| - u8 protocol, enum flow_offload_tuple_dir dir) |
| +void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, |
| + unsigned int thoff, u8 protocol, |
| + enum flow_offload_tuple_dir dir) |
| { |
| struct flow_ports *hdr; |
| __be16 port, new_port; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*hdr))) |
| - return -1; |
| - |
| hdr = (void *)(skb_network_header(skb) + thoff); |
| |
| switch (dir) { |
| @@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow, |
| new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; |
| hdr->source = new_port; |
| break; |
| - default: |
| - return -1; |
| } |
| |
| - return nf_flow_nat_port(skb, thoff, protocol, port, new_port); |
| + nf_flow_nat_port(skb, thoff, protocol, port, new_port); |
| } |
| EXPORT_SYMBOL_GPL(nf_flow_dnat_port); |
| |
| @@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) |
| { |
| int err; |
| |
| - INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); |
| + INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); |
| + flow_block_init(&flowtable->flow_block); |
| + init_rwsem(&flowtable->flow_block_lock); |
| |
| err = rhashtable_init(&flowtable->rhashtable, |
| &nf_flow_offload_rhash_params); |
| @@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init); |
| static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) |
| { |
| struct net_device *dev = data; |
| - struct flow_offload_entry *e; |
| - |
| - e = container_of(flow, struct flow_offload_entry, flow); |
| |
| if (!dev) { |
| flow_offload_teardown(flow); |
| return; |
| } |
| - if (net_eq(nf_ct_net(e->ct), dev_net(dev)) && |
| + |
| + if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) && |
| (flow->tuplehash[0].tuple.iifidx == dev->ifindex || |
| flow->tuplehash[1].tuple.iifidx == dev->ifindex)) |
| - flow_offload_dead(flow); |
| + flow_offload_teardown(flow); |
| } |
| |
| -static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, |
| - struct net_device *dev) |
| +void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, |
| + struct net_device *dev) |
| { |
| nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); |
| flush_delayed_work(&flowtable->gc_work); |
| + nf_flow_table_offload_flush(flowtable); |
| } |
| |
| void nf_flow_table_cleanup(struct net_device *dev) |
| @@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev) |
| |
| mutex_lock(&flowtable_lock); |
| list_for_each_entry(flowtable, &flowtables, list) |
| - nf_flow_table_iterate_cleanup(flowtable, dev); |
| + nf_flow_table_gc_cleanup(flowtable, dev); |
| mutex_unlock(&flowtable_lock); |
| } |
| EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); |
| @@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) |
| mutex_lock(&flowtable_lock); |
| list_del(&flow_table->list); |
| mutex_unlock(&flowtable_lock); |
| + |
| cancel_delayed_work_sync(&flow_table->gc_work); |
| nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); |
| nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); |
| + nf_flow_table_offload_flush(flow_table); |
| + if (nf_flowtable_hw_offload(flow_table)) |
| + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, |
| + flow_table); |
| rhashtable_destroy(&flow_table->rhashtable); |
| } |
| EXPORT_SYMBOL_GPL(nf_flow_table_free); |
| @@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = { |
| |
| static int __init nf_flow_table_module_init(void) |
| { |
| - return register_netdevice_notifier(&flow_offload_netdev_notifier); |
| + int ret; |
| + |
| + ret = nf_flow_table_offload_init(); |
| + if (ret) |
| + return ret; |
| + |
| + ret = register_netdevice_notifier(&flow_offload_netdev_notifier); |
| + if (ret) |
| + nf_flow_table_offload_exit(); |
| + |
| + return ret; |
| } |
| |
| static void __exit nf_flow_table_module_exit(void) |
| { |
| unregister_netdevice_notifier(&flow_offload_netdev_notifier); |
| + nf_flow_table_offload_exit(); |
| } |
| |
| module_init(nf_flow_table_module_init); |
| @@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit); |
| |
| MODULE_LICENSE("GPL"); |
| MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); |
| +MODULE_DESCRIPTION("Netfilter flow table module"); |
| diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c |
| index 397129b2..6257d87c 100644 |
| --- a/net/netfilter/nf_flow_table_ip.c |
| +++ b/net/netfilter/nf_flow_table_ip.c |
| @@ -7,11 +7,13 @@ |
| #include <linux/ip.h> |
| #include <linux/ipv6.h> |
| #include <linux/netdevice.h> |
| +#include <linux/if_ether.h> |
| #include <net/ip.h> |
| #include <net/ipv6.h> |
| #include <net/ip6_route.h> |
| #include <net/neighbour.h> |
| #include <net/netfilter/nf_flow_table.h> |
| +#include <net/netfilter/nf_conntrack_acct.h> |
| /* For layer 4 checksum field offset. */ |
| #include <linux/tcp.h> |
| #include <linux/udp.h> |
| @@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto, |
| if (proto != IPPROTO_TCP) |
| return 0; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) |
| - return -1; |
| - |
| tcph = (void *)(skb_network_header(skb) + thoff); |
| if (unlikely(tcph->fin || tcph->rst)) { |
| flow_offload_teardown(flow); |
| @@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto, |
| return 0; |
| } |
| |
| -static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, |
| - __be32 addr, __be32 new_addr) |
| +static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, |
| + __be32 addr, __be32 new_addr) |
| { |
| struct tcphdr *tcph; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*tcph))) |
| - return -1; |
| - |
| tcph = (void *)(skb_network_header(skb) + thoff); |
| inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, |
| - __be32 addr, __be32 new_addr) |
| +static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, |
| + __be32 addr, __be32 new_addr) |
| { |
| struct udphdr *udph; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*udph))) |
| - return -1; |
| - |
| udph = (void *)(skb_network_header(skb) + thoff); |
| if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { |
| inet_proto_csum_replace4(&udph->check, skb, addr, |
| @@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, |
| if (!udph->check) |
| udph->check = CSUM_MANGLED_0; |
| } |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, |
| - unsigned int thoff, __be32 addr, |
| - __be32 new_addr) |
| +static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, |
| + unsigned int thoff, __be32 addr, |
| + __be32 new_addr) |
| { |
| switch (iph->protocol) { |
| case IPPROTO_TCP: |
| - if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) |
| - return NF_DROP; |
| + nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr); |
| break; |
| case IPPROTO_UDP: |
| - if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) |
| - return NF_DROP; |
| + nf_flow_nat_ip_udp(skb, thoff, addr, new_addr); |
| break; |
| } |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| - struct iphdr *iph, unsigned int thoff, |
| - enum flow_offload_tuple_dir dir) |
| +static void nf_flow_snat_ip(const struct flow_offload *flow, |
| + struct sk_buff *skb, struct iphdr *iph, |
| + unsigned int thoff, enum flow_offload_tuple_dir dir) |
| { |
| __be32 addr, new_addr; |
| |
| @@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; |
| iph->daddr = new_addr; |
| break; |
| - default: |
| - return -1; |
| } |
| csum_replace4(&iph->check, addr, new_addr); |
| |
| - return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
| + nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
| } |
| |
| -static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| - struct iphdr *iph, unsigned int thoff, |
| - enum flow_offload_tuple_dir dir) |
| +static void nf_flow_dnat_ip(const struct flow_offload *flow, |
| + struct sk_buff *skb, struct iphdr *iph, |
| + unsigned int thoff, enum flow_offload_tuple_dir dir) |
| { |
| __be32 addr, new_addr; |
| |
| @@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; |
| iph->saddr = new_addr; |
| break; |
| - default: |
| - return -1; |
| } |
| csum_replace4(&iph->check, addr, new_addr); |
| |
| - return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
| + nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
| } |
| |
| -static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| - unsigned int thoff, enum flow_offload_tuple_dir dir) |
| +static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
| + unsigned int thoff, enum flow_offload_tuple_dir dir, |
| + struct iphdr *iph) |
| { |
| - struct iphdr *iph = ip_hdr(skb); |
| - |
| - if (flow->flags & FLOW_OFFLOAD_SNAT && |
| - (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || |
| - nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) |
| - return -1; |
| - if (flow->flags & FLOW_OFFLOAD_DNAT && |
| - (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || |
| - nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) |
| - return -1; |
| - |
| - return 0; |
| + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { |
| + nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir); |
| + nf_flow_snat_ip(flow, skb, iph, thoff, dir); |
| + } |
| + if (test_bit(NF_FLOW_DNAT, &flow->flags)) { |
| + nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir); |
| + nf_flow_dnat_ip(flow, skb, iph, thoff, dir); |
| + } |
| } |
| |
| static bool ip_has_options(unsigned int thoff) |
| @@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff) |
| return thoff != sizeof(struct iphdr); |
| } |
| |
| +static void nf_flow_tuple_encap(struct sk_buff *skb, |
| + struct flow_offload_tuple *tuple) |
| +{ |
| + struct vlan_ethhdr *veth; |
| + struct pppoe_hdr *phdr; |
| + int i = 0; |
| + |
| + if (skb_vlan_tag_present(skb)) { |
| + tuple->encap[i].id = skb_vlan_tag_get(skb); |
| + tuple->encap[i].proto = skb->vlan_proto; |
| + i++; |
| + } |
| + switch (skb->protocol) { |
| + case htons(ETH_P_8021Q): |
| + veth = (struct vlan_ethhdr *)skb_mac_header(skb); |
| + tuple->encap[i].id = ntohs(veth->h_vlan_TCI); |
| + tuple->encap[i].proto = skb->protocol; |
| + break; |
| + case htons(ETH_P_PPP_SES): |
| + phdr = (struct pppoe_hdr *)skb_mac_header(skb); |
| + tuple->encap[i].id = ntohs(phdr->sid); |
| + tuple->encap[i].proto = skb->protocol; |
| + break; |
| + } |
| +} |
| + |
| static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, |
| - struct flow_offload_tuple *tuple) |
| + struct flow_offload_tuple *tuple, u32 *hdrsize, |
| + u32 offset) |
| { |
| struct flow_ports *ports; |
| unsigned int thoff; |
| struct iphdr *iph; |
| |
| - if (!pskb_may_pull(skb, sizeof(*iph))) |
| + if (!pskb_may_pull(skb, sizeof(*iph) + offset)) |
| return -1; |
| |
| - iph = ip_hdr(skb); |
| - thoff = iph->ihl * 4; |
| + iph = (struct iphdr *)(skb_network_header(skb) + offset); |
| + thoff = (iph->ihl * 4); |
| |
| if (ip_is_fragment(iph) || |
| unlikely(ip_has_options(thoff))) |
| return -1; |
| |
| - if (iph->protocol != IPPROTO_TCP && |
| - iph->protocol != IPPROTO_UDP) |
| + thoff += offset; |
| + |
| + switch (iph->protocol) { |
| + case IPPROTO_TCP: |
| + *hdrsize = sizeof(struct tcphdr); |
| + break; |
| + case IPPROTO_UDP: |
| + *hdrsize = sizeof(struct udphdr); |
| + break; |
| + default: |
| return -1; |
| + } |
| |
| if (iph->ttl <= 1) |
| return -1; |
| |
| - thoff = iph->ihl * 4; |
| - if (!pskb_may_pull(skb, thoff + sizeof(*ports))) |
| + if (!pskb_may_pull(skb, thoff + *hdrsize)) |
| return -1; |
| |
| - iph = ip_hdr(skb); |
| + iph = (struct iphdr *)(skb_network_header(skb) + offset); |
| ports = (struct flow_ports *)(skb_network_header(skb) + thoff); |
| |
| tuple->src_v4.s_addr = iph->saddr; |
| @@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, |
| tuple->l3proto = AF_INET; |
| tuple->l4proto = iph->protocol; |
| tuple->iifidx = dev->ifindex; |
| + nf_flow_tuple_encap(skb, tuple); |
| |
| return 0; |
| } |
| @@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, |
| return NF_STOLEN; |
| } |
| |
| +static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, |
| + u32 *offset) |
| +{ |
| + struct vlan_ethhdr *veth; |
| + |
| + switch (skb->protocol) { |
| + case htons(ETH_P_8021Q): |
| + veth = (struct vlan_ethhdr *)skb_mac_header(skb); |
| + if (veth->h_vlan_encapsulated_proto == proto) { |
| + *offset += VLAN_HLEN; |
| + return true; |
| + } |
| + break; |
| + case htons(ETH_P_PPP_SES): |
| + if (nf_flow_pppoe_proto(skb) == proto) { |
| + *offset += PPPOE_SES_HLEN; |
| + return true; |
| + } |
| + break; |
| + } |
| + |
| + return false; |
| +} |
| + |
| +static void nf_flow_encap_pop(struct sk_buff *skb, |
| + struct flow_offload_tuple_rhash *tuplehash) |
| +{ |
| + struct vlan_hdr *vlan_hdr; |
| + int i; |
| + |
| + for (i = 0; i < tuplehash->tuple.encap_num; i++) { |
| + if (skb_vlan_tag_present(skb)) { |
| + __vlan_hwaccel_clear_tag(skb); |
| + continue; |
| + } |
| + switch (skb->protocol) { |
| + case htons(ETH_P_8021Q): |
| + vlan_hdr = (struct vlan_hdr *)skb->data; |
| + __skb_pull(skb, VLAN_HLEN); |
| + vlan_set_encap_proto(skb, vlan_hdr); |
| + skb_reset_network_header(skb); |
| + break; |
| + case htons(ETH_P_PPP_SES): |
| + skb->protocol = nf_flow_pppoe_proto(skb); |
| + skb_pull(skb, PPPOE_SES_HLEN); |
| + skb_reset_network_header(skb); |
| + break; |
| + } |
| + } |
| +} |
| + |
| +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, |
| + const struct flow_offload_tuple_rhash *tuplehash, |
| + unsigned short type) |
| +{ |
| + struct net_device *outdev; |
| + |
| + outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx); |
| + if (!outdev) |
| + return NF_DROP; |
| + |
| + skb->dev = outdev; |
| + dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest, |
| + tuplehash->tuple.out.h_source, skb->len); |
| + dev_queue_xmit(skb); |
| + |
| + return NF_STOLEN; |
| +} |
| + |
| unsigned int |
| nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, |
| const struct nf_hook_state *state) |
| @@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, |
| enum flow_offload_tuple_dir dir; |
| struct flow_offload *flow; |
| struct net_device *outdev; |
| + u32 hdrsize, offset = 0; |
| + unsigned int thoff, mtu; |
| struct rtable *rt; |
| - unsigned int thoff; |
| struct iphdr *iph; |
| __be32 nexthop; |
| + int ret; |
| |
| - if (skb->protocol != htons(ETH_P_IP)) |
| + if (skb->protocol != htons(ETH_P_IP) && |
| + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset)) |
| return NF_ACCEPT; |
| |
| - if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) |
| + if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0) |
| return NF_ACCEPT; |
| |
| tuplehash = flow_offload_lookup(flow_table, &tuple); |
| @@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, |
| |
| dir = tuplehash->tuple.dir; |
| flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| - rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; |
| - outdev = rt->dst.dev; |
| - |
| - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) |
| - return NF_ACCEPT; |
| |
| - if (skb_try_make_writable(skb, sizeof(*iph))) |
| - return NF_DROP; |
| - |
| - thoff = ip_hdr(skb)->ihl * 4; |
| - if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) |
| + mtu = flow->tuplehash[dir].tuple.mtu + offset; |
| + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) |
| return NF_ACCEPT; |
| |
| - if (!dst_check(&rt->dst, 0)) { |
| - flow_offload_teardown(flow); |
| + iph = (struct iphdr *)(skb_network_header(skb) + offset); |
| + thoff = (iph->ihl * 4) + offset; |
| + if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) |
| return NF_ACCEPT; |
| - } |
| |
| - if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) |
| + if (skb_try_make_writable(skb, thoff + hdrsize)) |
| return NF_DROP; |
| |
| - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; |
| + flow_offload_refresh(flow_table, flow); |
| + |
| + nf_flow_encap_pop(skb, tuplehash); |
| + thoff -= offset; |
| + |
| iph = ip_hdr(skb); |
| + nf_flow_nat_ip(flow, skb, thoff, dir, iph); |
| + |
| ip_decrease_ttl(iph); |
| skb->tstamp = 0; |
| |
| - if (unlikely(dst_xfrm(&rt->dst))) { |
| + if (flow_table->flags & NF_FLOWTABLE_COUNTER) |
| + nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); |
| + |
| + if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { |
| + rt = (struct rtable *)tuplehash->tuple.dst_cache; |
| memset(skb->cb, 0, sizeof(struct inet_skb_parm)); |
| IPCB(skb)->iif = skb->dev->ifindex; |
| IPCB(skb)->flags = IPSKB_FORWARDED; |
| return nf_flow_xmit_xfrm(skb, state, &rt->dst); |
| } |
| |
| - skb->dev = outdev; |
| - nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); |
| - skb_dst_set_noref(skb, &rt->dst); |
| - neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); |
| + switch (tuplehash->tuple.xmit_type) { |
| + case FLOW_OFFLOAD_XMIT_NEIGH: |
| + rt = (struct rtable *)tuplehash->tuple.dst_cache; |
| + outdev = rt->dst.dev; |
| + skb->dev = outdev; |
| + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); |
| + skb_dst_set_noref(skb, &rt->dst); |
| + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); |
| + ret = NF_STOLEN; |
| + break; |
| + case FLOW_OFFLOAD_XMIT_DIRECT: |
| + ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP); |
| + if (ret == NF_DROP) |
| + flow_offload_teardown(flow); |
| + break; |
| + } |
| |
| - return NF_STOLEN; |
| + return ret; |
| } |
| EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); |
| |
| -static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, |
| - struct in6_addr *addr, |
| - struct in6_addr *new_addr) |
| +static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, |
| + struct in6_addr *addr, |
| + struct in6_addr *new_addr, |
| + struct ipv6hdr *ip6h) |
| { |
| struct tcphdr *tcph; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*tcph))) |
| - return -1; |
| - |
| tcph = (void *)(skb_network_header(skb) + thoff); |
| inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, |
| new_addr->s6_addr32, true); |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, |
| - struct in6_addr *addr, |
| - struct in6_addr *new_addr) |
| +static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, |
| + struct in6_addr *addr, |
| + struct in6_addr *new_addr) |
| { |
| struct udphdr *udph; |
| |
| - if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || |
| - skb_try_make_writable(skb, thoff + sizeof(*udph))) |
| - return -1; |
| - |
| udph = (void *)(skb_network_header(skb) + thoff); |
| if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { |
| inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, |
| @@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, |
| if (!udph->check) |
| udph->check = CSUM_MANGLED_0; |
| } |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, |
| - unsigned int thoff, struct in6_addr *addr, |
| - struct in6_addr *new_addr) |
| +static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, |
| + unsigned int thoff, struct in6_addr *addr, |
| + struct in6_addr *new_addr) |
| { |
| switch (ip6h->nexthdr) { |
| case IPPROTO_TCP: |
| - if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) |
| - return NF_DROP; |
| + nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h); |
| break; |
| case IPPROTO_UDP: |
| - if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) |
| - return NF_DROP; |
| + nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr); |
| break; |
| } |
| - |
| - return 0; |
| } |
| |
| -static int nf_flow_snat_ipv6(const struct flow_offload *flow, |
| - struct sk_buff *skb, struct ipv6hdr *ip6h, |
| - unsigned int thoff, |
| - enum flow_offload_tuple_dir dir) |
| +static void nf_flow_snat_ipv6(const struct flow_offload *flow, |
| + struct sk_buff *skb, struct ipv6hdr *ip6h, |
| + unsigned int thoff, |
| + enum flow_offload_tuple_dir dir) |
| { |
| struct in6_addr addr, new_addr; |
| |
| @@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow, |
| new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; |
| ip6h->daddr = new_addr; |
| break; |
| - default: |
| - return -1; |
| } |
| |
| - return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); |
| + nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); |
| } |
| |
| -static int nf_flow_dnat_ipv6(const struct flow_offload *flow, |
| - struct sk_buff *skb, struct ipv6hdr *ip6h, |
| - unsigned int thoff, |
| - enum flow_offload_tuple_dir dir) |
| +static void nf_flow_dnat_ipv6(const struct flow_offload *flow, |
| + struct sk_buff *skb, struct ipv6hdr *ip6h, |
| + unsigned int thoff, |
| + enum flow_offload_tuple_dir dir) |
| { |
| struct in6_addr addr, new_addr; |
| |
| @@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow, |
| new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; |
| ip6h->saddr = new_addr; |
| break; |
| - default: |
| - return -1; |
| } |
| |
| - return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); |
| + nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); |
| } |
| |
| -static int nf_flow_nat_ipv6(const struct flow_offload *flow, |
| - struct sk_buff *skb, |
| - enum flow_offload_tuple_dir dir) |
| +static void nf_flow_nat_ipv6(const struct flow_offload *flow, |
| + struct sk_buff *skb, |
| + enum flow_offload_tuple_dir dir, |
| + struct ipv6hdr *ip6h) |
| { |
| - struct ipv6hdr *ip6h = ipv6_hdr(skb); |
| unsigned int thoff = sizeof(*ip6h); |
| |
| - if (flow->flags & FLOW_OFFLOAD_SNAT && |
| - (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || |
| - nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) |
| - return -1; |
| - if (flow->flags & FLOW_OFFLOAD_DNAT && |
| - (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || |
| - nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) |
| - return -1; |
| - |
| - return 0; |
| + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { |
| + nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir); |
| + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir); |
| + } |
| + if (test_bit(NF_FLOW_DNAT, &flow->flags)) { |
| + nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir); |
| + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir); |
| + } |
| } |
| |
| static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, |
| - struct flow_offload_tuple *tuple) |
| + struct flow_offload_tuple *tuple, u32 *hdrsize, |
| + u32 offset) |
| { |
| struct flow_ports *ports; |
| struct ipv6hdr *ip6h; |
| unsigned int thoff; |
| |
| - if (!pskb_may_pull(skb, sizeof(*ip6h))) |
| + thoff = sizeof(*ip6h) + offset; |
| + if (!pskb_may_pull(skb, thoff)) |
| return -1; |
| |
| - ip6h = ipv6_hdr(skb); |
| + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); |
| |
| - if (ip6h->nexthdr != IPPROTO_TCP && |
| - ip6h->nexthdr != IPPROTO_UDP) |
| + switch (ip6h->nexthdr) { |
| + case IPPROTO_TCP: |
| + *hdrsize = sizeof(struct tcphdr); |
| + break; |
| + case IPPROTO_UDP: |
| + *hdrsize = sizeof(struct udphdr); |
| + break; |
| + default: |
| return -1; |
| + } |
| |
| if (ip6h->hop_limit <= 1) |
| return -1; |
| |
| - thoff = sizeof(*ip6h); |
| - if (!pskb_may_pull(skb, thoff + sizeof(*ports))) |
| + if (!pskb_may_pull(skb, thoff + *hdrsize)) |
| return -1; |
| |
| - ip6h = ipv6_hdr(skb); |
| + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); |
| ports = (struct flow_ports *)(skb_network_header(skb) + thoff); |
| |
| tuple->src_v6 = ip6h->saddr; |
| @@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, |
| tuple->l3proto = AF_INET6; |
| tuple->l4proto = ip6h->nexthdr; |
| tuple->iifidx = dev->ifindex; |
| + nf_flow_tuple_encap(skb, tuple); |
| |
| return 0; |
| } |
| @@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, |
| const struct in6_addr *nexthop; |
| struct flow_offload *flow; |
| struct net_device *outdev; |
| + unsigned int thoff, mtu; |
| + u32 hdrsize, offset = 0; |
| struct ipv6hdr *ip6h; |
| struct rt6_info *rt; |
| + int ret; |
| |
| - if (skb->protocol != htons(ETH_P_IPV6)) |
| + if (skb->protocol != htons(ETH_P_IPV6) && |
| + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset)) |
| return NF_ACCEPT; |
| |
| - if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) |
| + if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0) |
| return NF_ACCEPT; |
| |
| tuplehash = flow_offload_lookup(flow_table, &tuple); |
| @@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, |
| |
| dir = tuplehash->tuple.dir; |
| flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
| - rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; |
| - outdev = rt->dst.dev; |
| |
| - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) |
| + mtu = flow->tuplehash[dir].tuple.mtu + offset; |
| + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) |
| return NF_ACCEPT; |
| |
| - if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb, |
| - sizeof(*ip6h))) |
| + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); |
| + thoff = sizeof(*ip6h) + offset; |
| + if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) |
| return NF_ACCEPT; |
| |
| - if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) { |
| - flow_offload_teardown(flow); |
| - return NF_ACCEPT; |
| - } |
| - |
| - if (skb_try_make_writable(skb, sizeof(*ip6h))) |
| + if (skb_try_make_writable(skb, thoff + hdrsize)) |
| return NF_DROP; |
| |
| - if (nf_flow_nat_ipv6(flow, skb, dir) < 0) |
| - return NF_DROP; |
| + flow_offload_refresh(flow_table, flow); |
| + |
| + nf_flow_encap_pop(skb, tuplehash); |
| |
| - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; |
| ip6h = ipv6_hdr(skb); |
| + nf_flow_nat_ipv6(flow, skb, dir, ip6h); |
| + |
| ip6h->hop_limit--; |
| skb->tstamp = 0; |
| |
| - if (unlikely(dst_xfrm(&rt->dst))) { |
| + if (flow_table->flags & NF_FLOWTABLE_COUNTER) |
| + nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); |
| + |
| + if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { |
| + rt = (struct rt6_info *)tuplehash->tuple.dst_cache; |
| memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); |
| IP6CB(skb)->iif = skb->dev->ifindex; |
| IP6CB(skb)->flags = IP6SKB_FORWARDED; |
| return nf_flow_xmit_xfrm(skb, state, &rt->dst); |
| } |
| |
| - skb->dev = outdev; |
| - nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); |
| - skb_dst_set_noref(skb, &rt->dst); |
| - neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); |
| + switch (tuplehash->tuple.xmit_type) { |
| + case FLOW_OFFLOAD_XMIT_NEIGH: |
| + rt = (struct rt6_info *)tuplehash->tuple.dst_cache; |
| + outdev = rt->dst.dev; |
| + skb->dev = outdev; |
| + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); |
| + skb_dst_set_noref(skb, &rt->dst); |
| + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); |
| + ret = NF_STOLEN; |
| + break; |
| + case FLOW_OFFLOAD_XMIT_DIRECT: |
| + ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6); |
| + if (ret == NF_DROP) |
| + flow_offload_teardown(flow); |
| + break; |
| + } |
| |
| - return NF_STOLEN; |
| + return ret; |
| } |
| EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); |
| diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c |
| new file mode 100644 |
| index 000000000..d94c6fb92 |
| --- /dev/null |
| +++ b/net/netfilter/nf_flow_table_offload.c |
| @@ -0,0 +1,1195 @@ |
| +#include <linux/kernel.h> |
| +#include <linux/init.h> |
| +#include <linux/module.h> |
| +#include <linux/netfilter.h> |
| +#include <linux/rhashtable.h> |
| +#include <linux/netdevice.h> |
| +#include <linux/tc_act/tc_csum.h> |
| +#include <net/flow_offload.h> |
| +#include <net/netfilter/nf_flow_table.h> |
| +#include <net/netfilter/nf_tables.h> |
| +#include <net/netfilter/nf_conntrack.h> |
| +#include <net/netfilter/nf_conntrack_acct.h> |
| +#include <net/netfilter/nf_conntrack_core.h> |
| +#include <net/netfilter/nf_conntrack_tuple.h> |
| + |
| +static struct workqueue_struct *nf_flow_offload_add_wq; |
| +static struct workqueue_struct *nf_flow_offload_del_wq; |
| +static struct workqueue_struct *nf_flow_offload_stats_wq; |
| + |
| +struct flow_offload_work { |
| + struct list_head list; |
| + enum flow_cls_command cmd; |
| + int priority; |
| + struct nf_flowtable *flowtable; |
| + struct flow_offload *flow; |
| + struct work_struct work; |
| +}; |
| + |
| +#define NF_FLOW_DISSECTOR(__match, __type, __field) \ |
| + (__match)->dissector.offset[__type] = \ |
| + offsetof(struct nf_flow_key, __field) |
| + |
| +static void nf_flow_rule_lwt_match(struct nf_flow_match *match, |
| + struct ip_tunnel_info *tun_info) |
| +{ |
| + struct nf_flow_key *mask = &match->mask; |
| + struct nf_flow_key *key = &match->key; |
| + unsigned int enc_keys; |
| + |
| + if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) |
| + return; |
| + |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); |
| + key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); |
| + mask->enc_key_id.keyid = 0xffffffff; |
| + enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | |
| + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL); |
| + |
| + if (ip_tunnel_info_af(tun_info) == AF_INET) { |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, |
| + enc_ipv4); |
| + key->enc_ipv4.src = tun_info->key.u.ipv4.dst; |
| + key->enc_ipv4.dst = tun_info->key.u.ipv4.src; |
| + if (key->enc_ipv4.src) |
| + mask->enc_ipv4.src = 0xffffffff; |
| + if (key->enc_ipv4.dst) |
| + mask->enc_ipv4.dst = 0xffffffff; |
| + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); |
| + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
| + } else { |
| + memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, |
| + sizeof(struct in6_addr)); |
| + memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, |
| + sizeof(struct in6_addr)); |
| + if (memcmp(&key->enc_ipv6.src, &in6addr_any, |
| + sizeof(struct in6_addr))) |
| + memset(&mask->enc_ipv6.src, 0xff, |
| + sizeof(struct in6_addr)); |
| + if (memcmp(&key->enc_ipv6.dst, &in6addr_any, |
| + sizeof(struct in6_addr))) |
| + memset(&mask->enc_ipv6.dst, 0xff, |
| + sizeof(struct in6_addr)); |
| + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); |
| + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
| + } |
| + |
| + match->dissector.used_keys |= enc_keys; |
| +} |
| + |
| +static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key, |
| + struct flow_dissector_key_vlan *mask, |
| + u16 vlan_id, __be16 proto) |
| +{ |
| + key->vlan_id = vlan_id; |
| + mask->vlan_id = VLAN_VID_MASK; |
| + key->vlan_tpid = proto; |
| + mask->vlan_tpid = 0xffff; |
| +} |
| + |
| +static int nf_flow_rule_match(struct nf_flow_match *match, |
| + const struct flow_offload_tuple *tuple, |
| + struct dst_entry *other_dst) |
| +{ |
| + struct nf_flow_key *mask = &match->mask; |
| + struct nf_flow_key *key = &match->key; |
| + struct ip_tunnel_info *tun_info; |
| + bool vlan_encap = false; |
| + |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); |
| + |
| + if (other_dst && other_dst->lwtstate) { |
| + tun_info = lwt_tun_info(other_dst->lwtstate); |
| + nf_flow_rule_lwt_match(match, tun_info); |
| + } |
| + |
| + key->meta.ingress_ifindex = tuple->iifidx; |
| + mask->meta.ingress_ifindex = 0xffffffff; |
| + |
| + if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) && |
| + tuple->encap[0].proto == htons(ETH_P_8021Q)) { |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan); |
| + nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, |
| + tuple->encap[0].id, |
| + tuple->encap[0].proto); |
| + vlan_encap = true; |
| + } |
| + |
| + if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) && |
| + tuple->encap[1].proto == htons(ETH_P_8021Q)) { |
| + if (vlan_encap) { |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN, |
| + cvlan); |
| + nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan, |
| + tuple->encap[1].id, |
| + tuple->encap[1].proto); |
| + } else { |
| + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, |
| + vlan); |
| + nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, |
| + tuple->encap[1].id, |
| + tuple->encap[1].proto); |
| + } |
| + } |
| + |
| + switch (tuple->l3proto) { |
| + case AF_INET: |
| + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
| + key->basic.n_proto = htons(ETH_P_IP); |
| + key->ipv4.src = tuple->src_v4.s_addr; |
| + mask->ipv4.src = 0xffffffff; |
| + key->ipv4.dst = tuple->dst_v4.s_addr; |
| + mask->ipv4.dst = 0xffffffff; |
| + break; |
| + case AF_INET6: |
| + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
| + key->basic.n_proto = htons(ETH_P_IPV6); |
| + key->ipv6.src = tuple->src_v6; |
| + memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src)); |
| + key->ipv6.dst = tuple->dst_v6; |
| + memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst)); |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + mask->control.addr_type = 0xffff; |
| + match->dissector.used_keys |= BIT(key->control.addr_type); |
| + mask->basic.n_proto = 0xffff; |
| + |
| + switch (tuple->l4proto) { |
| + case IPPROTO_TCP: |
| + key->tcp.flags = 0; |
| + mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16); |
| + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); |
| + break; |
| + case IPPROTO_UDP: |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + key->basic.ip_proto = tuple->l4proto; |
| + mask->basic.ip_proto = 0xff; |
| + |
| + key->tp.src = tuple->src_port; |
| + mask->tp.src = 0xffff; |
| + key->tp.dst = tuple->dst_port; |
| + mask->tp.dst = 0xffff; |
| + |
| + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | |
| + BIT(FLOW_DISSECTOR_KEY_CONTROL) | |
| + BIT(FLOW_DISSECTOR_KEY_BASIC) | |
| + BIT(FLOW_DISSECTOR_KEY_PORTS); |
| + return 0; |
| +} |
| + |
| +static void flow_offload_mangle(struct flow_action_entry *entry, |
| + enum flow_action_mangle_base htype, u32 offset, |
| + const __be32 *value, const __be32 *mask) |
| +{ |
| + entry->id = FLOW_ACTION_MANGLE; |
| + entry->mangle.htype = htype; |
| + entry->mangle.offset = offset; |
| + memcpy(&entry->mangle.mask, mask, sizeof(u32)); |
| + memcpy(&entry->mangle.val, value, sizeof(u32)); |
| +} |
| + |
| +static inline struct flow_action_entry * |
| +flow_action_entry_next(struct nf_flow_rule *flow_rule) |
| +{ |
| + int i = flow_rule->rule->action.num_entries++; |
| + |
| + return &flow_rule->rule->action.entries[i]; |
| +} |
| + |
| +static int flow_offload_eth_src(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); |
| + struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); |
| + const struct flow_offload_tuple *other_tuple, *this_tuple; |
| + struct net_device *dev = NULL; |
| + const unsigned char *addr; |
| + u32 mask, val; |
| + u16 val16; |
| + |
| + this_tuple = &flow->tuplehash[dir].tuple; |
| + |
| + switch (this_tuple->xmit_type) { |
| + case FLOW_OFFLOAD_XMIT_DIRECT: |
| + addr = this_tuple->out.h_source; |
| + break; |
| + case FLOW_OFFLOAD_XMIT_NEIGH: |
| + other_tuple = &flow->tuplehash[!dir].tuple; |
| + dev = dev_get_by_index(net, other_tuple->iifidx); |
| + if (!dev) |
| + return -ENOENT; |
| + |
| + addr = dev->dev_addr; |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + mask = ~0xffff0000; |
| + memcpy(&val16, addr, 2); |
| + val = val16 << 16; |
| + flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, |
| + &val, &mask); |
| + |
| + mask = ~0xffffffff; |
| + memcpy(&val, addr + 2, 4); |
| + flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, |
| + &val, &mask); |
| + |
| + if (dev) |
| + dev_put(dev); |
| + |
| + return 0; |
| +} |
| + |
| +static int flow_offload_eth_dst(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); |
| + struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); |
| + const struct flow_offload_tuple *other_tuple, *this_tuple; |
| + const struct dst_entry *dst_cache; |
| + unsigned char ha[ETH_ALEN]; |
| + struct neighbour *n; |
| + const void *daddr; |
| + u32 mask, val; |
| + u8 nud_state; |
| + u16 val16; |
| + |
| + this_tuple = &flow->tuplehash[dir].tuple; |
| + |
| + switch (this_tuple->xmit_type) { |
| + case FLOW_OFFLOAD_XMIT_DIRECT: |
| + ether_addr_copy(ha, this_tuple->out.h_dest); |
| + break; |
| + case FLOW_OFFLOAD_XMIT_NEIGH: |
| + other_tuple = &flow->tuplehash[!dir].tuple; |
| + daddr = &other_tuple->src_v4; |
| + dst_cache = this_tuple->dst_cache; |
| + n = dst_neigh_lookup(dst_cache, daddr); |
| + if (!n) |
| + return -ENOENT; |
| + |
| + read_lock_bh(&n->lock); |
| + nud_state = n->nud_state; |
| + ether_addr_copy(ha, n->ha); |
| + read_unlock_bh(&n->lock); |
| + neigh_release(n); |
| + |
| + if (!(nud_state & NUD_VALID)) |
| + return -ENOENT; |
| + break; |
| + default: |
| + return -EOPNOTSUPP; |
| + } |
| + |
| + mask = ~0xffffffff; |
| + memcpy(&val, ha, 4); |
| + flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0, |
| + &val, &mask); |
| + |
| + mask = ~0x0000ffff; |
| + memcpy(&val16, ha + 4, 2); |
| + val = val16; |
| + flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, |
| + &val, &mask); |
| + |
| + return 0; |
| +} |
| + |
| +static void flow_offload_ipv4_snat(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); |
| + u32 mask = ~htonl(0xffffffff); |
| + __be32 addr; |
| + u32 offset; |
| + |
| + switch (dir) { |
| + case FLOW_OFFLOAD_DIR_ORIGINAL: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; |
| + offset = offsetof(struct iphdr, saddr); |
| + break; |
| + case FLOW_OFFLOAD_DIR_REPLY: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; |
| + offset = offsetof(struct iphdr, daddr); |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, |
| + &addr, &mask); |
| +} |
| + |
| +static void flow_offload_ipv4_dnat(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); |
| + u32 mask = ~htonl(0xffffffff); |
| + __be32 addr; |
| + u32 offset; |
| + |
| + switch (dir) { |
| + case FLOW_OFFLOAD_DIR_ORIGINAL: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; |
| + offset = offsetof(struct iphdr, daddr); |
| + break; |
| + case FLOW_OFFLOAD_DIR_REPLY: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; |
| + offset = offsetof(struct iphdr, saddr); |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, |
| + &addr, &mask); |
| +} |
| + |
| +static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, |
| + unsigned int offset, |
| + const __be32 *addr, const __be32 *mask) |
| +{ |
| + struct flow_action_entry *entry; |
| + int i, j; |
| + |
| + for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { |
| + entry = flow_action_entry_next(flow_rule); |
| + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, |
| + offset + i, &addr[j], mask); |
| + } |
| +} |
| + |
| +static void flow_offload_ipv6_snat(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + u32 mask = ~htonl(0xffffffff); |
| + const __be32 *addr; |
| + u32 offset; |
| + |
| + switch (dir) { |
| + case FLOW_OFFLOAD_DIR_ORIGINAL: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32; |
| + offset = offsetof(struct ipv6hdr, saddr); |
| + break; |
| + case FLOW_OFFLOAD_DIR_REPLY: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32; |
| + offset = offsetof(struct ipv6hdr, daddr); |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); |
| +} |
| + |
| +static void flow_offload_ipv6_dnat(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + u32 mask = ~htonl(0xffffffff); |
| + const __be32 *addr; |
| + u32 offset; |
| + |
| + switch (dir) { |
| + case FLOW_OFFLOAD_DIR_ORIGINAL: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32; |
| + offset = offsetof(struct ipv6hdr, daddr); |
| + break; |
| + case FLOW_OFFLOAD_DIR_REPLY: |
| + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32; |
| + offset = offsetof(struct ipv6hdr, saddr); |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); |
| +} |
| + |
| +static int flow_offload_l4proto(const struct flow_offload *flow) |
| +{ |
| + u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; |
| + u8 type = 0; |
| + |
| + switch (protonum) { |
| + case IPPROTO_TCP: |
| + type = FLOW_ACT_MANGLE_HDR_TYPE_TCP; |
| + break; |
| + case IPPROTO_UDP: |
| + type = FLOW_ACT_MANGLE_HDR_TYPE_UDP; |
| + break; |
| + default: |
| + break; |
| + } |
| + |
| + return type; |
| +} |
| + |
| +static void flow_offload_port_snat(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); |
| + u32 mask, port; |
| + u32 offset; |
| + |
| + switch (dir) { |
| + case FLOW_OFFLOAD_DIR_ORIGINAL: |
| + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); |
| + offset = 0; /* offsetof(struct tcphdr, source); */ |
| + port = htonl(port << 16); |
| + mask = ~htonl(0xffff0000); |
| + break; |
| + case FLOW_OFFLOAD_DIR_REPLY: |
| + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port); |
| + offset = 0; /* offsetof(struct tcphdr, dest); */ |
| + port = htonl(port); |
| + mask = ~htonl(0xffff); |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, |
| + &port, &mask); |
| +} |
| + |
| +static void flow_offload_port_dnat(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); |
| + u32 mask, port; |
| + u32 offset; |
| + |
| + switch (dir) { |
| + case FLOW_OFFLOAD_DIR_ORIGINAL: |
| + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); |
| + offset = 0; /* offsetof(struct tcphdr, dest); */ |
| + port = htonl(port); |
| + mask = ~htonl(0xffff); |
| + break; |
| + case FLOW_OFFLOAD_DIR_REPLY: |
| + port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port); |
| + offset = 0; /* offsetof(struct tcphdr, source); */ |
| + port = htonl(port << 16); |
| + mask = ~htonl(0xffff0000); |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, |
| + &port, &mask); |
| +} |
| + |
| +static void flow_offload_ipv4_checksum(struct net *net, |
| + const struct flow_offload *flow, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; |
| + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); |
| + |
| + entry->id = FLOW_ACTION_CSUM; |
| + entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; |
| + |
| + switch (protonum) { |
| + case IPPROTO_TCP: |
| + entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP; |
| + break; |
| + case IPPROTO_UDP: |
| + entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; |
| + break; |
| + } |
| +} |
| + |
| +static void flow_offload_redirect(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + const struct flow_offload_tuple *this_tuple, *other_tuple; |
| + struct flow_action_entry *entry; |
| + struct net_device *dev; |
| + int ifindex; |
| + |
| + this_tuple = &flow->tuplehash[dir].tuple; |
| + switch (this_tuple->xmit_type) { |
| + case FLOW_OFFLOAD_XMIT_DIRECT: |
| + this_tuple = &flow->tuplehash[dir].tuple; |
| + ifindex = this_tuple->out.hw_ifidx; |
| + break; |
| + case FLOW_OFFLOAD_XMIT_NEIGH: |
| + other_tuple = &flow->tuplehash[!dir].tuple; |
| + ifindex = other_tuple->iifidx; |
| + break; |
| + default: |
| + return; |
| + } |
| + |
| + dev = dev_get_by_index(net, ifindex); |
| + if (!dev) |
| + return; |
| + |
| + entry = flow_action_entry_next(flow_rule); |
| + entry->id = FLOW_ACTION_REDIRECT; |
| + entry->dev = dev; |
| +} |
| + |
| +static void flow_offload_encap_tunnel(const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + const struct flow_offload_tuple *this_tuple; |
| + struct flow_action_entry *entry; |
| + struct dst_entry *dst; |
| + |
| + this_tuple = &flow->tuplehash[dir].tuple; |
| + if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) |
| + return; |
| + |
| + dst = this_tuple->dst_cache; |
| + if (dst && dst->lwtstate) { |
| + struct ip_tunnel_info *tun_info; |
| + |
| + tun_info = lwt_tun_info(dst->lwtstate); |
| + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { |
| + entry = flow_action_entry_next(flow_rule); |
| + entry->id = FLOW_ACTION_TUNNEL_ENCAP; |
| + entry->tunnel = tun_info; |
| + } |
| + } |
| +} |
| + |
| +static void flow_offload_decap_tunnel(const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + const struct flow_offload_tuple *other_tuple; |
| + struct flow_action_entry *entry; |
| + struct dst_entry *dst; |
| + |
| + other_tuple = &flow->tuplehash[!dir].tuple; |
| + if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) |
| + return; |
| + |
| + dst = other_tuple->dst_cache; |
| + if (dst && dst->lwtstate) { |
| + struct ip_tunnel_info *tun_info; |
| + |
| + tun_info = lwt_tun_info(dst->lwtstate); |
| + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { |
| + entry = flow_action_entry_next(flow_rule); |
| + entry->id = FLOW_ACTION_TUNNEL_DECAP; |
| + } |
| + } |
| +} |
| + |
| +static int |
| +nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + const struct flow_offload_tuple *other_tuple; |
| + const struct flow_offload_tuple *tuple; |
| + int i; |
| + |
| + flow_offload_decap_tunnel(flow, dir, flow_rule); |
| + flow_offload_encap_tunnel(flow, dir, flow_rule); |
| + |
| + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || |
| + flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) |
| + return -1; |
| + |
| + tuple = &flow->tuplehash[dir].tuple; |
| + |
| + for (i = 0; i < tuple->encap_num; i++) { |
| + struct flow_action_entry *entry; |
| + |
| + if (tuple->in_vlan_ingress & BIT(i)) |
| + continue; |
| + |
| + if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { |
| + entry = flow_action_entry_next(flow_rule); |
| + entry->id = FLOW_ACTION_VLAN_POP; |
| + } |
| + } |
| + |
| + other_tuple = &flow->tuplehash[!dir].tuple; |
| + |
| + for (i = 0; i < other_tuple->encap_num; i++) { |
| + struct flow_action_entry *entry; |
| + |
| + if (other_tuple->in_vlan_ingress & BIT(i)) |
| + continue; |
| + |
| + entry = flow_action_entry_next(flow_rule); |
| + |
| + switch (other_tuple->encap[i].proto) { |
| + case htons(ETH_P_PPP_SES): |
| + entry->id = FLOW_ACTION_PPPOE_PUSH; |
| + entry->pppoe.sid = other_tuple->encap[i].id; |
| + break; |
| + case htons(ETH_P_8021Q): |
| + entry->id = FLOW_ACTION_VLAN_PUSH; |
| + entry->vlan.vid = other_tuple->encap[i].id; |
| + entry->vlan.proto = other_tuple->encap[i].proto; |
| + break; |
| + } |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) |
| + return -1; |
| + |
| + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { |
| + flow_offload_ipv4_snat(net, flow, dir, flow_rule); |
| + flow_offload_port_snat(net, flow, dir, flow_rule); |
| + } |
| + if (test_bit(NF_FLOW_DNAT, &flow->flags)) { |
| + flow_offload_ipv4_dnat(net, flow, dir, flow_rule); |
| + flow_offload_port_dnat(net, flow, dir, flow_rule); |
| + } |
| + if (test_bit(NF_FLOW_SNAT, &flow->flags) || |
| + test_bit(NF_FLOW_DNAT, &flow->flags)) |
| + flow_offload_ipv4_checksum(net, flow, flow_rule); |
| + |
| + flow_offload_redirect(net, flow, dir, flow_rule); |
| + |
| + return 0; |
| +} |
| +EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4); |
| + |
| +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) |
| + return -1; |
| + |
| + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { |
| + flow_offload_ipv6_snat(net, flow, dir, flow_rule); |
| + flow_offload_port_snat(net, flow, dir, flow_rule); |
| + } |
| + if (test_bit(NF_FLOW_DNAT, &flow->flags)) { |
| + flow_offload_ipv6_dnat(net, flow, dir, flow_rule); |
| + flow_offload_port_dnat(net, flow, dir, flow_rule); |
| + } |
| + |
| + flow_offload_redirect(net, flow, dir, flow_rule); |
| + |
| + return 0; |
| +} |
| +EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); |
| + |
| +#define NF_FLOW_RULE_ACTION_MAX 16 |
| + |
| +static struct nf_flow_rule * |
| +nf_flow_offload_rule_alloc(struct net *net, |
| + const struct flow_offload_work *offload, |
| + enum flow_offload_tuple_dir dir) |
| +{ |
| + const struct nf_flowtable *flowtable = offload->flowtable; |
| + const struct flow_offload_tuple *tuple, *other_tuple; |
| + const struct flow_offload *flow = offload->flow; |
| + struct dst_entry *other_dst = NULL; |
| + struct nf_flow_rule *flow_rule; |
| + int err = -ENOMEM; |
| + |
| + flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); |
| + if (!flow_rule) |
| + goto err_flow; |
| + |
| + flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX); |
| + if (!flow_rule->rule) |
| + goto err_flow_rule; |
| + |
| + flow_rule->rule->match.dissector = &flow_rule->match.dissector; |
| + flow_rule->rule->match.mask = &flow_rule->match.mask; |
| + flow_rule->rule->match.key = &flow_rule->match.key; |
| + |
| + tuple = &flow->tuplehash[dir].tuple; |
| + other_tuple = &flow->tuplehash[!dir].tuple; |
| + if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) |
| + other_dst = other_tuple->dst_cache; |
| + |
| + err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); |
| + if (err < 0) |
| + goto err_flow_match; |
| + |
| + flow_rule->rule->action.num_entries = 0; |
| + if (flowtable->type->action(net, flow, dir, flow_rule) < 0) |
| + goto err_flow_match; |
| + |
| + return flow_rule; |
| + |
| +err_flow_match: |
| + kfree(flow_rule->rule); |
| +err_flow_rule: |
| + kfree(flow_rule); |
| +err_flow: |
| + return NULL; |
| +} |
| + |
| +static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule) |
| +{ |
| + struct flow_action_entry *entry; |
| + int i; |
| + |
| + for (i = 0; i < flow_rule->rule->action.num_entries; i++) { |
| + entry = &flow_rule->rule->action.entries[i]; |
| + if (entry->id != FLOW_ACTION_REDIRECT) |
| + continue; |
| + |
| + dev_put(entry->dev); |
| + } |
| + kfree(flow_rule->rule); |
| + kfree(flow_rule); |
| +} |
| + |
| +static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[]) |
| +{ |
| + int i; |
| + |
| + for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++) |
| + __nf_flow_offload_destroy(flow_rule[i]); |
| +} |
| + |
| +static int nf_flow_offload_alloc(const struct flow_offload_work *offload, |
| + struct nf_flow_rule *flow_rule[]) |
| +{ |
| + struct net *net = read_pnet(&offload->flowtable->net); |
| + |
| + flow_rule[0] = nf_flow_offload_rule_alloc(net, offload, |
| + FLOW_OFFLOAD_DIR_ORIGINAL); |
| + if (!flow_rule[0]) |
| + return -ENOMEM; |
| + |
| + flow_rule[1] = nf_flow_offload_rule_alloc(net, offload, |
| + FLOW_OFFLOAD_DIR_REPLY); |
| + if (!flow_rule[1]) { |
| + __nf_flow_offload_destroy(flow_rule[0]); |
| + return -ENOMEM; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static void nf_flow_offload_init(struct flow_cls_offload *cls_flow, |
| + __be16 proto, int priority, |
| + enum flow_cls_command cmd, |
| + const struct flow_offload_tuple *tuple, |
| + struct netlink_ext_ack *extack) |
| +{ |
| + cls_flow->common.protocol = proto; |
| + cls_flow->common.prio = priority; |
| + cls_flow->common.extack = extack; |
| + cls_flow->command = cmd; |
| + cls_flow->cookie = (unsigned long)tuple; |
| +} |
| + |
| +static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow, |
| + struct nf_flow_rule *flow_rule, |
| + enum flow_offload_tuple_dir dir, |
| + int priority, int cmd, |
| + struct flow_stats *stats, |
| + struct list_head *block_cb_list) |
| +{ |
| + struct flow_cls_offload cls_flow = {}; |
| + struct flow_block_cb *block_cb; |
| + struct netlink_ext_ack extack; |
| + __be16 proto = ETH_P_ALL; |
| + int err, i = 0; |
| + |
| + nf_flow_offload_init(&cls_flow, proto, priority, cmd, |
| + &flow->tuplehash[dir].tuple, &extack); |
| + if (cmd == FLOW_CLS_REPLACE) |
| + cls_flow.rule = flow_rule->rule; |
| + |
| + down_write(&flowtable->flow_block_lock); |
| + list_for_each_entry(block_cb, block_cb_list, list) { |
| + err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, |
| + block_cb->cb_priv); |
| + if (err < 0) |
| + continue; |
| + |
| + i++; |
| + } |
| + up_write(&flowtable->flow_block_lock); |
| + |
| + if (cmd == FLOW_CLS_STATS) |
| + memcpy(stats, &cls_flow.stats, sizeof(*stats)); |
| + |
| + return i; |
| +} |
| + |
| +static int flow_offload_tuple_add(struct flow_offload_work *offload, |
| + struct nf_flow_rule *flow_rule, |
| + enum flow_offload_tuple_dir dir) |
| +{ |
| + return nf_flow_offload_tuple(offload->flowtable, offload->flow, |
| + flow_rule, dir, offload->priority, |
| + FLOW_CLS_REPLACE, NULL, |
| + &offload->flowtable->flow_block.cb_list); |
| +} |
| + |
| +static void flow_offload_tuple_del(struct flow_offload_work *offload, |
| + enum flow_offload_tuple_dir dir) |
| +{ |
| + nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, |
| + offload->priority, FLOW_CLS_DESTROY, NULL, |
| + &offload->flowtable->flow_block.cb_list); |
| +} |
| + |
| +static int flow_offload_rule_add(struct flow_offload_work *offload, |
| + struct nf_flow_rule *flow_rule[]) |
| +{ |
| + int ok_count = 0; |
| + |
| + ok_count += flow_offload_tuple_add(offload, flow_rule[0], |
| + FLOW_OFFLOAD_DIR_ORIGINAL); |
| + ok_count += flow_offload_tuple_add(offload, flow_rule[1], |
| + FLOW_OFFLOAD_DIR_REPLY); |
| + if (ok_count == 0) |
| + return -ENOENT; |
| + |
| + return 0; |
| +} |
| + |
| +static void flow_offload_work_add(struct flow_offload_work *offload) |
| +{ |
| + struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX]; |
| + int err; |
| + |
| + err = nf_flow_offload_alloc(offload, flow_rule); |
| + if (err < 0) |
| + return; |
| + |
| + err = flow_offload_rule_add(offload, flow_rule); |
| + if (err < 0) |
| + goto out; |
| + |
| + set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); |
| + |
| +out: |
| + nf_flow_offload_destroy(flow_rule); |
| +} |
| + |
| +static void flow_offload_work_del(struct flow_offload_work *offload) |
| +{ |
| + clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); |
| + flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); |
| + flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); |
| + set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); |
| +} |
| + |
| +static void flow_offload_tuple_stats(struct flow_offload_work *offload, |
| + enum flow_offload_tuple_dir dir, |
| + struct flow_stats *stats) |
| +{ |
| + nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, |
| + offload->priority, FLOW_CLS_STATS, stats, |
| + &offload->flowtable->flow_block.cb_list); |
| +} |
| + |
| +static void flow_offload_work_stats(struct flow_offload_work *offload) |
| +{ |
| + struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {}; |
| + u64 lastused; |
| + |
| + flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]); |
| + flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]); |
| + |
| + lastused = max_t(u64, stats[0].lastused, stats[1].lastused); |
| + offload->flow->timeout = max_t(u64, offload->flow->timeout, |
| + lastused + flow_offload_get_timeout(offload->flow)); |
| + |
| + if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) { |
| + if (stats[0].pkts) |
| + nf_ct_acct_add(offload->flow->ct, |
| + FLOW_OFFLOAD_DIR_ORIGINAL, |
| + stats[0].pkts, stats[0].bytes); |
| + if (stats[1].pkts) |
| + nf_ct_acct_add(offload->flow->ct, |
| + FLOW_OFFLOAD_DIR_REPLY, |
| + stats[1].pkts, stats[1].bytes); |
| + } |
| +} |
| + |
| +static void flow_offload_work_handler(struct work_struct *work) |
| +{ |
| + struct flow_offload_work *offload; |
| + |
| + offload = container_of(work, struct flow_offload_work, work); |
| + switch (offload->cmd) { |
| + case FLOW_CLS_REPLACE: |
| + flow_offload_work_add(offload); |
| + break; |
| + case FLOW_CLS_DESTROY: |
| + flow_offload_work_del(offload); |
| + break; |
| + case FLOW_CLS_STATS: |
| + flow_offload_work_stats(offload); |
| + break; |
| + default: |
| + WARN_ON_ONCE(1); |
| + } |
| + |
| + clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags); |
| + kfree(offload); |
| +} |
| + |
| +static void flow_offload_queue_work(struct flow_offload_work *offload) |
| +{ |
| + if (offload->cmd == FLOW_CLS_REPLACE) |
| + queue_work(nf_flow_offload_add_wq, &offload->work); |
| + else if (offload->cmd == FLOW_CLS_DESTROY) |
| + queue_work(nf_flow_offload_del_wq, &offload->work); |
| + else |
| + queue_work(nf_flow_offload_stats_wq, &offload->work); |
| +} |
| + |
| +static struct flow_offload_work * |
| +nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow, unsigned int cmd) |
| +{ |
| + struct flow_offload_work *offload; |
| + |
| + if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags)) |
| + return NULL; |
| + |
| + offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); |
| + if (!offload) { |
| + clear_bit(NF_FLOW_HW_PENDING, &flow->flags); |
| + return NULL; |
| + } |
| + |
| + offload->cmd = cmd; |
| + offload->flow = flow; |
| + offload->priority = flowtable->priority; |
| + offload->flowtable = flowtable; |
| + INIT_WORK(&offload->work, flow_offload_work_handler); |
| + |
| + return offload; |
| +} |
| + |
| + |
| +void nf_flow_offload_add(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow) |
| +{ |
| + struct flow_offload_work *offload; |
| + |
| + offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE); |
| + if (!offload) |
| + return; |
| + |
| + flow_offload_queue_work(offload); |
| +} |
| + |
| +void nf_flow_offload_del(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow) |
| +{ |
| + struct flow_offload_work *offload; |
| + |
| + offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY); |
| + if (!offload) |
| + return; |
| + |
| + set_bit(NF_FLOW_HW_DYING, &flow->flags); |
| + flow_offload_queue_work(offload); |
| +} |
| + |
| +void nf_flow_offload_stats(struct nf_flowtable *flowtable, |
| + struct flow_offload *flow) |
| +{ |
| + struct flow_offload_work *offload; |
| + __s32 delta; |
| + |
| + delta = nf_flow_timeout_delta(flow->timeout); |
| + if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10)) |
| + return; |
| + |
| + offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS); |
| + if (!offload) |
| + return; |
| + |
| + flow_offload_queue_work(offload); |
| +} |
| + |
| +void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) |
| +{ |
| + if (nf_flowtable_hw_offload(flowtable)) { |
| + flush_workqueue(nf_flow_offload_add_wq); |
| + flush_workqueue(nf_flow_offload_del_wq); |
| + flush_workqueue(nf_flow_offload_stats_wq); |
| + } |
| +} |
| + |
| +static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, |
| + struct flow_block_offload *bo, |
| + enum flow_block_command cmd) |
| +{ |
| + struct flow_block_cb *block_cb, *next; |
| + int err = 0; |
| + |
| + down_read(&flowtable->flow_block_lock); |
| + |
| + switch (cmd) { |
| + case FLOW_BLOCK_BIND: |
| + list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); |
| + break; |
| + case FLOW_BLOCK_UNBIND: |
| + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { |
| + list_del(&block_cb->list); |
| + flow_block_cb_free(block_cb); |
| + } |
| + break; |
| + default: |
| + WARN_ON_ONCE(1); |
| + err = -EOPNOTSUPP; |
| + } |
| + |
| + up_read(&flowtable->flow_block_lock); |
| + |
| + return err; |
| +} |
| + |
| +static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, |
| + struct net *net, |
| + enum flow_block_command cmd, |
| + struct nf_flowtable *flowtable, |
| + struct netlink_ext_ack *extack) |
| +{ |
| + memset(bo, 0, sizeof(*bo)); |
| + bo->net = net; |
| + bo->block = &flowtable->flow_block; |
| + bo->command = cmd; |
| + bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; |
| + bo->extack = extack; |
| + INIT_LIST_HEAD(&bo->cb_list); |
| +} |
| + |
| +static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, |
| + struct nf_flowtable *flowtable, |
| + struct net_device *dev, |
| + enum flow_block_command cmd, |
| + struct netlink_ext_ack *extack) |
| +{ |
| + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, |
| + extack); |
| + flow_indr_block_call(dev, bo, cmd); |
| + |
| + if (list_empty(&bo->cb_list)) |
| + return -EOPNOTSUPP; |
| + |
| + return 0; |
| +} |
| + |
| +static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, |
| + struct nf_flowtable *flowtable, |
| + struct net_device *dev, |
| + enum flow_block_command cmd, |
| + struct netlink_ext_ack *extack) |
| +{ |
| + int err; |
| + |
| + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, |
| + extack); |
| + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); |
| + if (err < 0) |
| + return err; |
| + |
| + return 0; |
| +} |
| + |
| +int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, |
| + struct net_device *dev, |
| + enum flow_block_command cmd) |
| +{ |
| + struct netlink_ext_ack extack = {}; |
| + struct flow_block_offload bo; |
| + int err; |
| + |
| + if (!nf_flowtable_hw_offload(flowtable)) |
| + return 0; |
| + |
| + if (dev->netdev_ops->ndo_setup_tc) |
| + err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, |
| + &extack); |
| + else |
| + err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, |
| + &extack); |
| + if (err < 0) |
| + return err; |
| + |
| + return nf_flow_table_block_setup(flowtable, &bo, cmd); |
| +} |
| +EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); |
| + |
| +int nf_flow_table_offload_init(void) |
| +{ |
| + nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add", |
| + WQ_UNBOUND | WQ_SYSFS, 0); |
| + if (!nf_flow_offload_add_wq) |
| + return -ENOMEM; |
| + |
| + nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del", |
| + WQ_UNBOUND | WQ_SYSFS, 0); |
| + if (!nf_flow_offload_del_wq) |
| + goto err_del_wq; |
| + |
| + nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats", |
| + WQ_UNBOUND | WQ_SYSFS, 0); |
| + if (!nf_flow_offload_stats_wq) |
| + goto err_stats_wq; |
| + |
| + return 0; |
| + |
| +err_stats_wq: |
| + destroy_workqueue(nf_flow_offload_del_wq); |
| +err_del_wq: |
| + destroy_workqueue(nf_flow_offload_add_wq); |
| + return -ENOMEM; |
| +} |
| + |
| +void nf_flow_table_offload_exit(void) |
| +{ |
| + destroy_workqueue(nf_flow_offload_add_wq); |
| + destroy_workqueue(nf_flow_offload_del_wq); |
| + destroy_workqueue(nf_flow_offload_stats_wq); |
| +} |
| diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c |
| new file mode 100644 |
| index 000000000..ae1eb2656 |
| --- /dev/null |
| +++ b/net/netfilter/xt_FLOWOFFLOAD.c |
| @@ -0,0 +1,776 @@ |
| +/* |
| + * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name> |
| + * |
| + * This program is free software; you can redistribute it and/or modify |
| + * it under the terms of the GNU General Public License version 2 as |
| + * published by the Free Software Foundation. |
| + */ |
| +#include <linux/module.h> |
| +#include <linux/init.h> |
| +#include <linux/netfilter.h> |
| +#include <linux/netfilter/xt_FLOWOFFLOAD.h> |
| +#include <linux/if_vlan.h> |
| +#include <net/ip.h> |
| +#include <net/netfilter/nf_conntrack.h> |
| +#include <net/netfilter/nf_conntrack_extend.h> |
| +#include <net/netfilter/nf_conntrack_helper.h> |
| +#include <net/netfilter/nf_flow_table.h> |
| + |
| +struct xt_flowoffload_hook { |
| + struct hlist_node list; |
| + struct nf_hook_ops ops; |
| + struct net *net; |
| + bool registered; |
| + bool used; |
| +}; |
| + |
| +struct xt_flowoffload_table { |
| + struct nf_flowtable ft; |
| + struct hlist_head hooks; |
| + struct delayed_work work; |
| +}; |
| + |
| +struct nf_forward_info { |
| + const struct net_device *indev; |
| + const struct net_device *outdev; |
| + const struct net_device *hw_outdev; |
| + struct id { |
| + __u16 id; |
| + __be16 proto; |
| + } encap[NF_FLOW_TABLE_ENCAP_MAX]; |
| + u8 num_encaps; |
| + u8 ingress_vlans; |
| + u8 h_source[ETH_ALEN]; |
| + u8 h_dest[ETH_ALEN]; |
| + enum flow_offload_xmit_type xmit_type; |
| +}; |
| + |
| +static DEFINE_SPINLOCK(hooks_lock); |
| + |
| +struct xt_flowoffload_table flowtable[2]; |
| + |
| +static unsigned int |
| +xt_flowoffload_net_hook(void *priv, struct sk_buff *skb, |
| + const struct nf_hook_state *state) |
| +{ |
| + struct vlan_ethhdr *veth; |
| + __be16 proto; |
| + |
| + switch (skb->protocol) { |
| + case htons(ETH_P_8021Q): |
| + veth = (struct vlan_ethhdr *)skb_mac_header(skb); |
| + proto = veth->h_vlan_encapsulated_proto; |
| + break; |
| + case htons(ETH_P_PPP_SES): |
| + proto = nf_flow_pppoe_proto(skb); |
| + break; |
| + default: |
| + proto = skb->protocol; |
| + break; |
| + } |
| + |
| + switch (proto) { |
| + case htons(ETH_P_IP): |
| + return nf_flow_offload_ip_hook(priv, skb, state); |
| + case htons(ETH_P_IPV6): |
| + return nf_flow_offload_ipv6_hook(priv, skb, state); |
| + } |
| + |
| + return NF_ACCEPT; |
| +} |
| + |
| +static int |
| +xt_flowoffload_create_hook(struct xt_flowoffload_table *table, |
| + struct net_device *dev) |
| +{ |
| + struct xt_flowoffload_hook *hook; |
| + struct nf_hook_ops *ops; |
| + |
| + hook = kzalloc(sizeof(*hook), GFP_ATOMIC); |
| + if (!hook) |
| + return -ENOMEM; |
| + |
| + ops = &hook->ops; |
| + ops->pf = NFPROTO_NETDEV; |
| + ops->hooknum = NF_NETDEV_INGRESS; |
| + ops->priority = 10; |
| + ops->priv = &table->ft; |
| + ops->hook = xt_flowoffload_net_hook; |
| + ops->dev = dev; |
| + |
| + hlist_add_head(&hook->list, &table->hooks); |
| + mod_delayed_work(system_power_efficient_wq, &table->work, 0); |
| + |
| + return 0; |
| +} |
| + |
| +static struct xt_flowoffload_hook * |
| +flow_offload_lookup_hook(struct xt_flowoffload_table *table, |
| + struct net_device *dev) |
| +{ |
| + struct xt_flowoffload_hook *hook; |
| + |
| + hlist_for_each_entry(hook, &table->hooks, list) { |
| + if (hook->ops.dev == dev) |
| + return hook; |
| + } |
| + |
| + return NULL; |
| +} |
| + |
| +static void |
| +xt_flowoffload_check_device(struct xt_flowoffload_table *table, |
| + struct net_device *dev) |
| +{ |
| + struct xt_flowoffload_hook *hook; |
| + |
| + if (!dev) |
| + return; |
| + |
| + spin_lock_bh(&hooks_lock); |
| + hook = flow_offload_lookup_hook(table, dev); |
| + if (hook) |
| + hook->used = true; |
| + else |
| + xt_flowoffload_create_hook(table, dev); |
| + spin_unlock_bh(&hooks_lock); |
| +} |
| + |
| +static void |
| +xt_flowoffload_register_hooks(struct xt_flowoffload_table *table) |
| +{ |
| + struct xt_flowoffload_hook *hook; |
| + |
| +restart: |
| + hlist_for_each_entry(hook, &table->hooks, list) { |
| + if (hook->registered) |
| + continue; |
| + |
| + hook->registered = true; |
| + hook->net = dev_net(hook->ops.dev); |
| + spin_unlock_bh(&hooks_lock); |
| + nf_register_net_hook(hook->net, &hook->ops); |
| + if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD) |
| + table->ft.type->setup(&table->ft, hook->ops.dev, |
| + FLOW_BLOCK_BIND); |
| + spin_lock_bh(&hooks_lock); |
| + goto restart; |
| + } |
| + |
| +} |
| + |
| +static bool |
| +xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table) |
| +{ |
| + struct xt_flowoffload_hook *hook; |
| + bool active = false; |
| + |
| +restart: |
| + spin_lock_bh(&hooks_lock); |
| + hlist_for_each_entry(hook, &table->hooks, list) { |
| + if (hook->used || !hook->registered) { |
| + active = true; |
| + continue; |
| + } |
| + |
| + hlist_del(&hook->list); |
| + spin_unlock_bh(&hooks_lock); |
| + if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD) |
| + table->ft.type->setup(&table->ft, hook->ops.dev, |
| + FLOW_BLOCK_UNBIND); |
| + nf_unregister_net_hook(hook->net, &hook->ops); |
| + kfree(hook); |
| + goto restart; |
| + } |
| + spin_unlock_bh(&hooks_lock); |
| + |
| + return active; |
| +} |
| + |
| +static void |
| +xt_flowoffload_check_hook(struct flow_offload *flow, void *data) |
| +{ |
| + struct xt_flowoffload_table *table = data; |
| + struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple; |
| + struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple; |
| + struct xt_flowoffload_hook *hook; |
| + |
| + spin_lock_bh(&hooks_lock); |
| + hlist_for_each_entry(hook, &table->hooks, list) { |
| + if (hook->ops.dev->ifindex != tuple0->iifidx && |
| + hook->ops.dev->ifindex != tuple1->iifidx) |
| + continue; |
| + |
| + hook->used = true; |
| + } |
| + spin_unlock_bh(&hooks_lock); |
| +} |
| + |
| +static void |
| +xt_flowoffload_hook_work(struct work_struct *work) |
| +{ |
| + struct xt_flowoffload_table *table; |
| + struct xt_flowoffload_hook *hook; |
| + int err; |
| + |
| + table = container_of(work, struct xt_flowoffload_table, work.work); |
| + |
| + spin_lock_bh(&hooks_lock); |
| + xt_flowoffload_register_hooks(table); |
| + hlist_for_each_entry(hook, &table->hooks, list) |
| + hook->used = false; |
| + spin_unlock_bh(&hooks_lock); |
| + |
| + err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook, |
| + table); |
| + if (err && err != -EAGAIN) |
| + goto out; |
| + |
| + if (!xt_flowoffload_cleanup_hooks(table)) |
| + return; |
| + |
| +out: |
| + queue_delayed_work(system_power_efficient_wq, &table->work, HZ); |
| +} |
| + |
| +static bool |
| +xt_flowoffload_skip(struct sk_buff *skb, int family) |
| +{ |
| + if (skb_sec_path(skb)) |
| + return true; |
| + |
| + if (family == NFPROTO_IPV4) { |
| + const struct ip_options *opt = &(IPCB(skb)->opt); |
| + |
| + if (unlikely(opt->optlen)) |
| + return true; |
| + } |
| + |
| + return false; |
| +} |
| + |
| +static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst) |
| +{ |
| + if (dst_xfrm(dst)) |
| + return FLOW_OFFLOAD_XMIT_XFRM; |
| + |
| + return FLOW_OFFLOAD_XMIT_NEIGH; |
| +} |
| + |
| +static void nf_default_forward_path(struct nf_flow_route *route, |
| + struct dst_entry *dst_cache, |
| + enum ip_conntrack_dir dir, |
| + struct net_device **dev) |
| +{ |
| + route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex; |
| + route->tuple[dir].dst = dst_cache; |
| + route->tuple[dir].xmit_type = nf_xmit_type(dst_cache); |
| +} |
| + |
| +static bool nf_is_valid_ether_device(const struct net_device *dev) |
| +{ |
| + if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || |
| + dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) |
| + return false; |
| + |
| + return true; |
| +} |
| + |
| +static void nf_dev_path_info(const struct net_device_path_stack *stack, |
| + struct nf_forward_info *info, |
| + unsigned char *ha) |
| +{ |
| + const struct net_device_path *path; |
| + int i; |
| + |
| + memcpy(info->h_dest, ha, ETH_ALEN); |
| + |
| + for (i = 0; i < stack->num_paths; i++) { |
| + path = &stack->path[i]; |
| + |
| + info->indev = path->dev; |
| + |
| + switch (path->type) { |
| + case DEV_PATH_ETHERNET: |
| + case DEV_PATH_DSA: |
| + case DEV_PATH_VLAN: |
| + case DEV_PATH_PPPOE: |
| + if (is_zero_ether_addr(info->h_source)) |
| + memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); |
| + |
| + if (path->type == DEV_PATH_ETHERNET) |
| + break; |
| + if (path->type == DEV_PATH_DSA) { |
| + i = stack->num_paths; |
| + break; |
| + } |
| + |
| + /* DEV_PATH_VLAN and DEV_PATH_PPPOE */ |
| + if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) { |
| + info->indev = NULL; |
| + break; |
| + } |
| + if (!info->outdev) |
| + info->outdev = path->dev; |
| + info->encap[info->num_encaps].id = path->encap.id; |
| + info->encap[info->num_encaps].proto = path->encap.proto; |
| + info->num_encaps++; |
| + if (path->type == DEV_PATH_PPPOE) |
| + memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); |
| + break; |
| + case DEV_PATH_BRIDGE: |
| + if (is_zero_ether_addr(info->h_source)) |
| + memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); |
| + |
| + switch (path->bridge.vlan_mode) { |
| + case DEV_PATH_BR_VLAN_UNTAG_HW: |
| + info->ingress_vlans |= BIT(info->num_encaps - 1); |
| + break; |
| + case DEV_PATH_BR_VLAN_TAG: |
| + info->encap[info->num_encaps].id = path->bridge.vlan_id; |
| + info->encap[info->num_encaps].proto = path->bridge.vlan_proto; |
| + info->num_encaps++; |
| + break; |
| + case DEV_PATH_BR_VLAN_UNTAG: |
| + info->num_encaps--; |
| + break; |
| + case DEV_PATH_BR_VLAN_KEEP: |
| + break; |
| + } |
| + break; |
| + default: |
| + break; |
| + } |
| + } |
| + if (!info->outdev) |
| + info->outdev = info->indev; |
| + |
| + info->hw_outdev = info->indev; |
| + |
| + if (nf_is_valid_ether_device(info->indev)) |
| + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; |
| +} |
| + |
| +static int nf_dev_fill_forward_path(const struct nf_flow_route *route, |
| + const struct dst_entry *dst_cache, |
| + const struct nf_conn *ct, |
| + enum ip_conntrack_dir dir, u8 *ha, |
| + struct net_device_path_stack *stack) |
| +{ |
| + const void *daddr = &ct->tuplehash[!dir].tuple.src.u3; |
| + struct net_device *dev = dst_cache->dev; |
| + struct neighbour *n; |
| + u8 nud_state; |
| + |
| + if (!nf_is_valid_ether_device(dev)) |
| + goto out; |
| + |
| + n = dst_neigh_lookup(dst_cache, daddr); |
| + if (!n) |
| + return -1; |
| + |
| + read_lock_bh(&n->lock); |
| + nud_state = n->nud_state; |
| + ether_addr_copy(ha, n->ha); |
| + read_unlock_bh(&n->lock); |
| + neigh_release(n); |
| + |
| + if (!(nud_state & NUD_VALID)) |
| + return -1; |
| + |
| +out: |
| + return dev_fill_forward_path(dev, ha, stack); |
| +} |
| + |
| +static int nf_dev_forward_path(struct nf_flow_route *route, |
| + const struct nf_conn *ct, |
| + enum ip_conntrack_dir dir, |
| + struct net_device **devs) |
| +{ |
| + const struct dst_entry *dst = route->tuple[dir].dst; |
| + struct net_device_path_stack stack; |
| + struct nf_forward_info info = {}; |
| + unsigned char ha[ETH_ALEN]; |
| + int i; |
| + |
| + if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0) |
| + nf_dev_path_info(&stack, &info, ha); |
| + |
| + devs[!dir] = (struct net_device *)info.indev; |
| + if (!info.indev) |
| + return -1; |
| + |
| + route->tuple[!dir].in.ifindex = info.indev->ifindex; |
| + for (i = 0; i < info.num_encaps; i++) { |
| + route->tuple[!dir].in.encap[i].id = info.encap[i].id; |
| + route->tuple[!dir].in.encap[i].proto = info.encap[i].proto; |
| + } |
| + route->tuple[!dir].in.num_encaps = info.num_encaps; |
| + route->tuple[!dir].in.ingress_vlans = info.ingress_vlans; |
| + |
| + if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { |
| + memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); |
| + memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); |
| + route->tuple[dir].out.ifindex = info.outdev->ifindex; |
| + route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex; |
| + route->tuple[dir].xmit_type = info.xmit_type; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct, |
| + enum ip_conntrack_dir dir, |
| + const struct xt_action_param *par, int ifindex, |
| + struct net_device **devs) |
| +{ |
| + struct dst_entry *dst = NULL; |
| + struct flowi fl; |
| + |
| + memset(&fl, 0, sizeof(fl)); |
| + switch (xt_family(par)) { |
| + case NFPROTO_IPV4: |
| + fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip; |
| + fl.u.ip4.flowi4_oif = ifindex; |
| + break; |
| + case NFPROTO_IPV6: |
| + fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6; |
| + fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6; |
| + fl.u.ip6.flowi6_oif = ifindex; |
| + break; |
| + } |
| + |
| + nf_route(xt_net(par), &dst, &fl, false, xt_family(par)); |
| + if (!dst) |
| + return -ENOENT; |
| + |
| + nf_default_forward_path(route, dst, dir, devs); |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct, |
| + const struct xt_action_param *par, |
| + struct nf_flow_route *route, enum ip_conntrack_dir dir, |
| + struct net_device **devs) |
| +{ |
| + struct dst_entry *this_dst = skb_dst(skb); |
| + struct dst_entry *other_dst = NULL; |
| + struct flowi fl; |
| + |
| + memset(&fl, 0, sizeof(fl)); |
| + switch (xt_family(par)) { |
| + case NFPROTO_IPV4: |
| + fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; |
| + fl.u.ip4.flowi4_oif = xt_in(par)->ifindex; |
| + break; |
| + case NFPROTO_IPV6: |
| + fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6; |
| + fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; |
| + fl.u.ip6.flowi6_oif = xt_in(par)->ifindex; |
| + break; |
| + } |
| + |
| + nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par)); |
| + if (!other_dst) |
| + return -ENOENT; |
| + |
| + nf_default_forward_path(route, this_dst, dir, devs); |
| + nf_default_forward_path(route, other_dst, !dir, devs); |
| + |
| + if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH && |
| + route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) { |
| + if (nf_dev_forward_path(route, ct, dir, devs)) |
| + return -1; |
| + if (nf_dev_forward_path(route, ct, !dir, devs)) |
| + return -1; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int |
| +xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct, |
| + const struct xt_action_param *par, |
| + struct nf_flow_route *route, enum ip_conntrack_dir dir, |
| + struct net_device **devs) |
| +{ |
| + int ret; |
| + |
| + ret = xt_flowoffload_route_dir(route, ct, dir, par, |
| + devs[dir]->ifindex, |
| + devs); |
| + if (ret) |
| + return ret; |
| + |
| + ret = xt_flowoffload_route_dir(route, ct, !dir, par, |
| + devs[!dir]->ifindex, |
| + devs); |
| + if (ret) |
| + goto err_route_dir1; |
| + |
| + if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH && |
| + route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) { |
| + if (nf_dev_forward_path(route, ct, dir, devs) || |
| + nf_dev_forward_path(route, ct, !dir, devs)) { |
| + ret = -1; |
| + goto err_route_dir2; |
| + } |
| + } |
| + |
| + return 0; |
| + |
| +err_route_dir2: |
| + dst_release(route->tuple[!dir].dst); |
| +err_route_dir1: |
| + dst_release(route->tuple[dir].dst); |
| + return ret; |
| +} |
| + |
| +static unsigned int |
| +flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| +{ |
| + struct xt_flowoffload_table *table; |
| + const struct xt_flowoffload_target_info *info = par->targinfo; |
| + struct tcphdr _tcph, *tcph = NULL; |
| + enum ip_conntrack_info ctinfo; |
| + enum ip_conntrack_dir dir; |
| + struct nf_flow_route route = {}; |
| + struct flow_offload *flow = NULL; |
| + struct net_device *devs[2] = {}; |
| + struct nf_conn *ct; |
| + struct net *net; |
| + |
| + if (xt_flowoffload_skip(skb, xt_family(par))) |
| + return XT_CONTINUE; |
| + |
| + ct = nf_ct_get(skb, &ctinfo); |
| + if (ct == NULL) |
| + return XT_CONTINUE; |
| + |
| + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { |
| + case IPPROTO_TCP: |
| + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) |
| + return XT_CONTINUE; |
| + |
| + tcph = skb_header_pointer(skb, par->thoff, |
| + sizeof(_tcph), &_tcph); |
| + if (unlikely(!tcph || tcph->fin || tcph->rst)) |
| + return XT_CONTINUE; |
| + break; |
| + case IPPROTO_UDP: |
| + break; |
| + default: |
| + return XT_CONTINUE; |
| + } |
| + |
| + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || |
| + ct->status & IPS_SEQ_ADJUST) |
| + return XT_CONTINUE; |
| + |
| + if (!nf_ct_is_confirmed(ct)) |
| + return XT_CONTINUE; |
| + |
| + devs[dir] = xt_out(par); |
| + devs[!dir] = xt_in(par); |
| + |
| + if (!devs[dir] || !devs[!dir]) |
| + return XT_CONTINUE; |
| + |
| + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) |
| + return XT_CONTINUE; |
| + |
| + dir = CTINFO2DIR(ctinfo); |
| + |
| + if (ct->status & IPS_NAT_MASK) { |
| + if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0) |
| + goto err_flow_route; |
| + } else { |
| + if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0) |
| + goto err_flow_route; |
| + } |
| + |
| + flow = flow_offload_alloc(ct); |
| + if (!flow) |
| + goto err_flow_alloc; |
| + |
| + if (flow_offload_route_init(flow, &route) < 0) |
| + goto err_flow_add; |
| + |
| + if (tcph) { |
| + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; |
| + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; |
| + } |
| + |
| + table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)]; |
| + |
| + net = read_pnet(&table->ft.net); |
| + if (!net) |
| + write_pnet(&table->ft.net, xt_net(par)); |
| + |
| + if (flow_offload_add(&table->ft, flow) < 0) |
| + goto err_flow_add; |
| + |
| + xt_flowoffload_check_device(table, devs[0]); |
| + xt_flowoffload_check_device(table, devs[1]); |
| + |
| + if (!(ct->status & IPS_NAT_MASK)) |
| + dst_release(route.tuple[dir].dst); |
| + dst_release(route.tuple[!dir].dst); |
| + |
| + return XT_CONTINUE; |
| + |
| +err_flow_add: |
| + flow_offload_free(flow); |
| +err_flow_alloc: |
| + if (!(ct->status & IPS_NAT_MASK)) |
| + dst_release(route.tuple[dir].dst); |
| + dst_release(route.tuple[!dir].dst); |
| +err_flow_route: |
| + clear_bit(IPS_OFFLOAD_BIT, &ct->status); |
| + |
| + return XT_CONTINUE; |
| +} |
| + |
| +static int flowoffload_chk(const struct xt_tgchk_param *par) |
| +{ |
| + struct xt_flowoffload_target_info *info = par->targinfo; |
| + |
| + if (info->flags & ~XT_FLOWOFFLOAD_MASK) |
| + return -EINVAL; |
| + |
| + return 0; |
| +} |
| + |
| +static struct xt_target offload_tg_reg __read_mostly = { |
| + .family = NFPROTO_UNSPEC, |
| + .name = "FLOWOFFLOAD", |
| + .revision = 0, |
| + .targetsize = sizeof(struct xt_flowoffload_target_info), |
| + .usersize = sizeof(struct xt_flowoffload_target_info), |
| + .checkentry = flowoffload_chk, |
| + .target = flowoffload_tg, |
| + .me = THIS_MODULE, |
| +}; |
| + |
| +static int flow_offload_netdev_event(struct notifier_block *this, |
| + unsigned long event, void *ptr) |
| +{ |
| + struct xt_flowoffload_hook *hook0, *hook1; |
| + struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
| + |
| + if (event != NETDEV_UNREGISTER) |
| + return NOTIFY_DONE; |
| + |
| + spin_lock_bh(&hooks_lock); |
| + hook0 = flow_offload_lookup_hook(&flowtable[0], dev); |
| + if (hook0) |
| + hlist_del(&hook0->list); |
| + |
| + hook1 = flow_offload_lookup_hook(&flowtable[1], dev); |
| + if (hook1) |
| + hlist_del(&hook1->list); |
| + spin_unlock_bh(&hooks_lock); |
| + |
| + if (hook0) { |
| + nf_unregister_net_hook(hook0->net, &hook0->ops); |
| + kfree(hook0); |
| + } |
| + |
| + if (hook1) { |
| + nf_unregister_net_hook(hook1->net, &hook1->ops); |
| + kfree(hook1); |
| + } |
| + |
| + nf_flow_table_cleanup(dev); |
| + |
| + return NOTIFY_DONE; |
| +} |
| + |
| +static struct notifier_block flow_offload_netdev_notifier = { |
| + .notifier_call = flow_offload_netdev_event, |
| +}; |
| + |
| +static int nf_flow_rule_route_inet(struct net *net, |
| + const struct flow_offload *flow, |
| + enum flow_offload_tuple_dir dir, |
| + struct nf_flow_rule *flow_rule) |
| +{ |
| + const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; |
| + int err; |
| + |
| + switch (flow_tuple->l3proto) { |
| + case NFPROTO_IPV4: |
| + err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule); |
| + break; |
| + case NFPROTO_IPV6: |
| + err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule); |
| + break; |
| + default: |
| + err = -1; |
| + break; |
| + } |
| + |
| + return err; |
| +} |
| + |
| +static struct nf_flowtable_type flowtable_inet = { |
| + .family = NFPROTO_INET, |
| + .init = nf_flow_table_init, |
| + .setup = nf_flow_table_offload_setup, |
| + .action = nf_flow_rule_route_inet, |
| + .free = nf_flow_table_free, |
| + .hook = xt_flowoffload_net_hook, |
| + .owner = THIS_MODULE, |
| +}; |
| + |
| +static int init_flowtable(struct xt_flowoffload_table *tbl) |
| +{ |
| + INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work); |
| + tbl->ft.type = &flowtable_inet; |
| + |
| + return nf_flow_table_init(&tbl->ft); |
| +} |
| + |
| +static int __init xt_flowoffload_tg_init(void) |
| +{ |
| + int ret; |
| + |
| + register_netdevice_notifier(&flow_offload_netdev_notifier); |
| + |
| + ret = init_flowtable(&flowtable[0]); |
| + if (ret) |
| + return ret; |
| + |
| + ret = init_flowtable(&flowtable[1]); |
| + if (ret) |
| + goto cleanup; |
| + |
| + flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD; |
| + |
| + ret = xt_register_target(&offload_tg_reg); |
| + if (ret) |
| + goto cleanup2; |
| + |
| + return 0; |
| + |
| +cleanup2: |
| + nf_flow_table_free(&flowtable[1].ft); |
| +cleanup: |
| + nf_flow_table_free(&flowtable[0].ft); |
| + return ret; |
| +} |
| + |
| +static void __exit xt_flowoffload_tg_exit(void) |
| +{ |
| + xt_unregister_target(&offload_tg_reg); |
| + unregister_netdevice_notifier(&flow_offload_netdev_notifier); |
| + nf_flow_table_free(&flowtable[0].ft); |
| + nf_flow_table_free(&flowtable[1].ft); |
| +} |
| + |
| +MODULE_LICENSE("GPL"); |
| +module_init(xt_flowoffload_tg_init); |
| +module_exit(xt_flowoffload_tg_exit); |
| -- |
| 2.18.0 |
| |