blob: 9eb9347db04bd12a3312a2f7ef0c50462a716c0c [file] [log] [blame]
developer4f0d2ba2023-08-21 17:33:25 +08001--- a/include/linux/netdevice.h
2+++ b/include/linux/netdevice.h
3@@ -919,6 +919,10 @@ struct xfrmdev_ops {
4 bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
5 struct xfrm_state *x);
6 void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
7+ void (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
8+ int (*xdo_dev_policy_add) (struct xfrm_policy *x);
9+ void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
10+ void (*xdo_dev_policy_free) (struct xfrm_policy *x);
11 };
12 #endif
13
14--- a/include/net/xfrm.h
15+++ b/include/net/xfrm.h
16@@ -125,11 +125,25 @@ struct xfrm_state_walk {
17 struct xfrm_address_filter *filter;
18 };
19
20+enum {
21+ XFRM_DEV_OFFLOAD_IN = 1,
22+ XFRM_DEV_OFFLOAD_OUT,
23+ XFRM_DEV_OFFLOAD_FWD,
24+};
25+
26+enum {
27+ XFRM_DEV_OFFLOAD_UNSPECIFIED,
28+ XFRM_DEV_OFFLOAD_CRYPTO,
29+ XFRM_DEV_OFFLOAD_PACKET,
30+};
31+
32 struct xfrm_state_offload {
33 struct net_device *dev;
34 unsigned long offload_handle;
35 unsigned int num_exthdrs;
36 u8 flags;
37+ u8 dir : 2;
38+ u8 type : 2;
39 };
40
41 struct xfrm_mode {
42@@ -527,6 +541,8 @@ struct xfrm_policy {
43 struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
44 struct hlist_node bydst_inexact_list;
45 struct rcu_head rcu;
46+
47+ struct xfrm_state_offload xdo;
48 };
49
50 static inline struct net *xp_net(const struct xfrm_policy *xp)
51@@ -1084,6 +1100,29 @@ xfrm_state_addr_cmp(const struct xfrm_tm
52 }
53
54 #ifdef CONFIG_XFRM
55+static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
56+{
57+ struct sec_path *sp = skb_sec_path(skb);
58+
59+ return sp->xvec[sp->len - 1];
60+}
61+#endif
62+
63+static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
64+{
65+#ifdef CONFIG_XFRM
66+ struct sec_path *sp = skb_sec_path(skb);
67+
68+ if (!sp || !sp->olen || sp->len != sp->olen)
69+ return NULL;
70+
71+ return &sp->ovec[sp->olen - 1];
72+#else
73+ return NULL;
74+#endif
75+}
76+
77+#ifdef CONFIG_XFRM
78 int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
79 unsigned short family);
80
81@@ -1093,10 +1132,19 @@ static inline int __xfrm_policy_check2(s
82 {
83 struct net *net = dev_net(skb->dev);
84 int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
85+ struct xfrm_offload *xo = xfrm_offload(skb);
86+ struct xfrm_state *x;
87
88 if (sk && sk->sk_policy[XFRM_POLICY_IN])
89 return __xfrm_policy_check(sk, ndir, skb, family);
90
91+ if (xo) {
92+ x = xfrm_input_state(skb);
93+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
94+ return (xo->flags & CRYPTO_DONE) &&
95+ (xo->status & CRYPTO_SUCCESS);
96+ }
97+
98 return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
99 (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
100 __xfrm_policy_check(sk, ndir, skb, family);
101@@ -1490,6 +1538,23 @@ struct xfrm_state *xfrm_stateonly_find(s
102 struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
103 unsigned short family);
104 int xfrm_state_check_expire(struct xfrm_state *x);
105+#ifdef CONFIG_XFRM_OFFLOAD
106+static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x)
107+{
108+ struct xfrm_state_offload *xdo = &x->xso;
109+ struct net_device *dev = xdo->dev;
110+
111+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
112+ return;
113+
114+ if (dev && dev->xfrmdev_ops &&
115+ dev->xfrmdev_ops->xdo_dev_state_update_curlft)
116+ dev->xfrmdev_ops->xdo_dev_state_update_curlft(x);
117+
118+}
119+#else
120+static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {}
121+#endif
122 void xfrm_state_insert(struct xfrm_state *x);
123 int xfrm_state_add(struct xfrm_state *x);
124 int xfrm_state_update(struct xfrm_state *x);
125@@ -1539,6 +1604,8 @@ struct xfrm_state *xfrm_find_acq_byseq(s
126 int xfrm_state_delete(struct xfrm_state *x);
127 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
128 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
129+int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
130+ bool task_valid);
131 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
132 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
133 u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
134@@ -1820,29 +1887,6 @@ static inline void xfrm_states_delete(st
135 }
136 #endif
137
138-#ifdef CONFIG_XFRM
139-static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
140-{
141- struct sec_path *sp = skb_sec_path(skb);
142-
143- return sp->xvec[sp->len - 1];
144-}
145-#endif
146-
147-static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
148-{
149-#ifdef CONFIG_XFRM
150- struct sec_path *sp = skb_sec_path(skb);
151-
152- if (!sp || !sp->olen || sp->len != sp->olen)
153- return NULL;
154-
155- return &sp->ovec[sp->olen - 1];
156-#else
157- return NULL;
158-#endif
159-}
160-
161 void __init xfrm_dev_init(void);
162
163 #ifdef CONFIG_XFRM_OFFLOAD
164@@ -1851,6 +1895,9 @@ void xfrm_dev_backlog(struct softnet_dat
165 struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again);
166 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
167 struct xfrm_user_offload *xuo);
168+int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
169+ struct xfrm_user_offload *xuo, u8 dir,
170+ struct netlink_ext_ack *extack);
171 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
172
173 static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
174@@ -1899,6 +1946,27 @@ static inline void xfrm_dev_state_free(s
175 dev_put(dev);
176 }
177 }
178+
179+static inline void xfrm_dev_policy_delete(struct xfrm_policy *x)
180+{
181+ struct xfrm_state_offload *xdo = &x->xdo;
182+ struct net_device *dev = xdo->dev;
183+
184+ if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_policy_delete)
185+ dev->xfrmdev_ops->xdo_dev_policy_delete(x);
186+}
187+
188+static inline void xfrm_dev_policy_free(struct xfrm_policy *x)
189+{
190+ struct xfrm_state_offload *xdo = &x->xdo;
191+ struct net_device *dev = xdo->dev;
192+
193+ if (dev && dev->xfrmdev_ops) {
194+ if (dev->xfrmdev_ops->xdo_dev_policy_free)
195+ dev->xfrmdev_ops->xdo_dev_policy_free(x);
196+ xdo->dev = NULL;
197+ }
198+}
199 #else
200 static inline void xfrm_dev_resume(struct sk_buff *skb)
201 {
202@@ -1931,6 +1999,21 @@ static inline bool xfrm_dev_offload_ok(s
203 return false;
204 }
205
206+static inline int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
207+ struct xfrm_user_offload *xuo, u8 dir,
208+ struct netlink_ext_ack *extack)
209+{
210+ return 0;
211+}
212+
213+static inline void xfrm_dev_policy_delete(struct xfrm_policy *x)
214+{
215+}
216+
217+static inline void xfrm_dev_policy_free(struct xfrm_policy *x)
218+{
219+}
220+
221 static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
222 {
223 }
224--- a/include/uapi/linux/xfrm.h
225+++ b/include/uapi/linux/xfrm.h
226@@ -512,6 +512,12 @@ struct xfrm_user_offload {
227 */
228 #define XFRM_OFFLOAD_IPV6 1
229 #define XFRM_OFFLOAD_INBOUND 2
230+/* Two bits above are relevant for state path only, while
231+ * offload is used for both policy and state flows.
232+ *
233+ * In policy offload mode, they are free and can be safely reused.
234+ */
235+#define XFRM_OFFLOAD_PACKET 4
236
237 #ifndef __KERNEL__
238 /* backwards compatibility for userspace */
239--- a/net/xfrm/xfrm_device.c
240+++ b/net/xfrm/xfrm_device.c
241@@ -80,6 +80,7 @@ struct sk_buff *validate_xmit_xfrm(struc
242 struct softnet_data *sd;
243 netdev_features_t esp_features = features;
244 struct xfrm_offload *xo = xfrm_offload(skb);
245+ struct net_device *dev = skb->dev;
246 struct sec_path *sp;
247
248 if (!xo || (xo->flags & XFRM_XMIT))
249@@ -93,6 +94,17 @@ struct sk_buff *validate_xmit_xfrm(struc
250 if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
251 return skb;
252
253+ /* The packet was sent to HW IPsec packet offload engine,
254+ * but to wrong device. Drop the packet, so it won't skip
255+ * XFRM stack.
256+ */
257+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) {
258+ kfree_skb(skb);
259+ //dev_core_stats_tx_dropped_inc(dev);
260+ atomic_long_inc(&dev->tx_dropped);
261+ return NULL;
262+ }
263+
264 local_irq_save(flags);
265 sd = this_cpu_ptr(&softnet_data);
266 err = !skb_queue_empty(&sd->xfrm_backlog);
267@@ -198,6 +210,7 @@ int xfrm_dev_state_add(struct net *net,
268 struct xfrm_state_offload *xso = &x->xso;
269 xfrm_address_t *saddr;
270 xfrm_address_t *daddr;
271+ bool is_packet_offload;
272
273 if (!x->type_offload)
274 return -EINVAL;
275@@ -206,9 +219,11 @@ int xfrm_dev_state_add(struct net *net,
276 if (x->encap || x->tfcpad)
277 return -EINVAL;
278
279- if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND))
280+ if (xuo->flags &
281+ ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET))
282 return -EINVAL;
283
284+ is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
285 dev = dev_get_by_index(net, xuo->ifindex);
286 if (!dev) {
287 if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
288@@ -223,7 +238,7 @@ int xfrm_dev_state_add(struct net *net,
289 x->props.family,
290 xfrm_smark_get(0, x));
291 if (IS_ERR(dst))
292- return 0;
293+ return (is_packet_offload) ? -EINVAL : 0;
294
295 dev = dst->dev;
296
297@@ -234,7 +249,7 @@ int xfrm_dev_state_add(struct net *net,
298 if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
299 xso->dev = NULL;
300 dev_put(dev);
301- return 0;
302+ return (is_packet_offload) ? -EINVAL : 0;
303 }
304
305 if (x->props.flags & XFRM_STATE_ESN &&
306@@ -249,14 +264,28 @@ int xfrm_dev_state_add(struct net *net,
307 /* Don't forward bit that is not implemented */
308 xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
309
310+ if (is_packet_offload)
311+ xso->type = XFRM_DEV_OFFLOAD_PACKET;
312+ else
313+ xso->type = XFRM_DEV_OFFLOAD_CRYPTO;
314+
315 err = dev->xfrmdev_ops->xdo_dev_state_add(x);
316 if (err) {
317 xso->num_exthdrs = 0;
318 xso->flags = 0;
319 xso->dev = NULL;
320 dev_put(dev);
321+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
322
323- if (err != -EOPNOTSUPP)
324+ /* User explicitly requested packet offload mode and configured
325+ * policy in addition to the XFRM state. So be civil to users,
326+ * and return an error instead of taking fallback path.
327+ *
328+ * This WARN_ON() can be seen as a documentation for driver
329+ * authors to do not return -EOPNOTSUPP in packet offload mode.
330+ */
331+ WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
332+ if (err != -EOPNOTSUPP || is_packet_offload)
333 return err;
334 }
335
336@@ -264,6 +293,65 @@ int xfrm_dev_state_add(struct net *net,
337 }
338 EXPORT_SYMBOL_GPL(xfrm_dev_state_add);
339
340+int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
341+ struct xfrm_user_offload *xuo, u8 dir,
342+ struct netlink_ext_ack *extack)
343+{
344+ struct xfrm_state_offload *xdo = &xp->xdo;
345+ struct net_device *dev;
346+ int err;
347+
348+ if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) {
349+ /* We support only packet offload mode and it means
350+ * that user must set XFRM_OFFLOAD_PACKET bit.
351+ */
352+ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
353+ return -EINVAL;
354+ }
355+
356+ dev = dev_get_by_index(net, xuo->ifindex);
357+ if (!dev)
358+ return -EINVAL;
359+
360+ if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) {
361+ xdo->dev = NULL;
362+ dev_put(dev);
363+ NL_SET_ERR_MSG(extack, "Policy offload is not supported");
364+ return -EINVAL;
365+ }
366+
367+ xdo->dev = dev;
368+ xdo->type = XFRM_DEV_OFFLOAD_PACKET;
369+ switch (dir) {
370+ case XFRM_POLICY_IN:
371+ xdo->dir = XFRM_DEV_OFFLOAD_IN;
372+ break;
373+ case XFRM_POLICY_OUT:
374+ xdo->dir = XFRM_DEV_OFFLOAD_OUT;
375+ break;
376+ case XFRM_POLICY_FWD:
377+ xdo->dir = XFRM_DEV_OFFLOAD_FWD;
378+ break;
379+ default:
380+ xdo->dev = NULL;
381+ dev_put(dev);
382+ NL_SET_ERR_MSG(extack, "Unrecognized oflload direction");
383+ return -EINVAL;
384+ }
385+
386+ err = dev->xfrmdev_ops->xdo_dev_policy_add(xp);
387+ if (err) {
388+ xdo->dev = NULL;
389+ xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
390+ xdo->dir = 0;
391+ NL_SET_ERR_MSG(extack, "Device failed to offload this policy");
392+ return err;
393+ }
394+
395+ return 0;
396+}
397+EXPORT_SYMBOL_GPL(xfrm_dev_policy_add);
398+
399 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
400 {
401 int mtu;
402@@ -274,8 +362,9 @@ bool xfrm_dev_offload_ok(struct sk_buff
403 if (!x->type_offload || x->encap)
404 return false;
405
406- if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
407- (!xdst->child->xfrm)) {
408+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
409+ ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
410+ !xdst->child->xfrm)) {
411 mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
412 if (skb->len <= mtu)
413 goto ok;
414@@ -376,8 +465,10 @@ static int xfrm_dev_feat_change(struct n
415
416 static int xfrm_dev_down(struct net_device *dev)
417 {
418- if (dev->features & NETIF_F_HW_ESP)
419+ if (dev->features & NETIF_F_HW_ESP) {
420 xfrm_dev_state_flush(dev_net(dev), dev, true);
421+ xfrm_dev_policy_flush(dev_net(dev), dev, true);
422+ }
423
424 return NOTIFY_DONE;
425 }
426--- a/net/xfrm/xfrm_output.c
427+++ b/net/xfrm/xfrm_output.c
428@@ -410,7 +410,7 @@ static int xfrm_output_one(struct sk_buf
429 struct xfrm_state *x = dst->xfrm;
430 struct net *net = xs_net(x);
431
432- if (err <= 0)
433+ if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
434 goto resume;
435
436 do {
437@@ -568,6 +568,16 @@ int xfrm_output(struct sock *sk, struct
438 struct xfrm_state *x = skb_dst(skb)->xfrm;
439 int err;
440
441+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
442+ if (!xfrm_dev_offload_ok(skb, x)) {
443+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
444+ kfree_skb(skb);
445+ return -EHOSTUNREACH;
446+ }
447+
448+ return xfrm_output_resume(skb, 0);
449+ }
450+
451 secpath_reset(skb);
452
453 if (xfrm_dev_offload_ok(skb, x)) {
454--- a/net/xfrm/xfrm_policy.c
455+++ b/net/xfrm/xfrm_policy.c
456@@ -423,6 +423,7 @@ void xfrm_policy_destroy(struct xfrm_pol
457 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
458 BUG();
459
460+ xfrm_dev_policy_free(policy);
461 call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
462 }
463 EXPORT_SYMBOL(xfrm_policy_destroy);
464@@ -533,7 +534,7 @@ redo:
465 __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
466 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
467 pol->family, nhashmask, dbits, sbits);
468- if (!entry0) {
469+ if (!entry0 || pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
470 hlist_del_rcu(&pol->bydst);
471 hlist_add_head_rcu(&pol->bydst, ndsttable + h);
472 h0 = h;
473@@ -864,7 +865,7 @@ static void xfrm_policy_inexact_list_rei
474 break;
475 }
476
477- if (newpos)
478+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
479 hlist_add_behind_rcu(&policy->bydst, newpos);
480 else
481 hlist_add_head_rcu(&policy->bydst, &n->hhead);
482@@ -1345,7 +1346,7 @@ static void xfrm_hash_rebuild(struct wor
483 else
484 break;
485 }
486- if (newpos)
487+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
488 hlist_add_behind_rcu(&policy->bydst, newpos);
489 else
490 hlist_add_head_rcu(&policy->bydst, chain);
491@@ -1522,7 +1523,7 @@ static void xfrm_policy_insert_inexact_l
492 break;
493 }
494
495- if (newpos)
496+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
497 hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
498 else
499 hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
500@@ -1559,9 +1560,12 @@ static struct xfrm_policy *xfrm_policy_i
501 break;
502 }
503
504- if (newpos)
505+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
506 hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
507 else
508+ /* Packet offload policies enter to the head
509+ * to speed-up lookups.
510+ */
511 hlist_add_head_rcu(&policy->bydst, chain);
512
513 return delpol;
514@@ -1767,12 +1771,41 @@ xfrm_policy_flush_secctx_check(struct ne
515 }
516 return err;
517 }
518+
519+static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
520+ struct net_device *dev,
521+ bool task_valid)
522+{
523+ struct xfrm_policy *pol;
524+ int err = 0;
525+
526+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
527+ if (pol->walk.dead ||
528+ xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
529+ pol->xdo.dev != dev)
530+ continue;
531+
532+ err = security_xfrm_policy_delete(pol->security);
533+ if (err) {
534+ xfrm_audit_policy_delete(pol, 0, task_valid);
535+ return err;
536+ }
537+ }
538+ return err;
539+}
540 #else
541 static inline int
542 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
543 {
544 return 0;
545 }
546+
547+static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
548+ struct net_device *dev,
549+ bool task_valid)
550+{
551+ return 0;
552+}
553 #endif
554
555 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
556@@ -1812,6 +1845,44 @@ out:
557 }
558 EXPORT_SYMBOL(xfrm_policy_flush);
559
560+int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
561+ bool task_valid)
562+{
563+ int dir, err = 0, cnt = 0;
564+ struct xfrm_policy *pol;
565+
566+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
567+
568+ err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid);
569+ if (err)
570+ goto out;
571+
572+again:
573+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
574+ dir = xfrm_policy_id2dir(pol->index);
575+ if (pol->walk.dead ||
576+ dir >= XFRM_POLICY_MAX ||
577+ pol->xdo.dev != dev)
578+ continue;
579+
580+ __xfrm_policy_unlink(pol, dir);
581+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
582+ cnt++;
583+ xfrm_audit_policy_delete(pol, 1, task_valid);
584+ xfrm_policy_kill(pol);
585+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
586+ goto again;
587+ }
588+ if (cnt)
589+ __xfrm_policy_inexact_flush(net);
590+ else
591+ err = -ESRCH;
592+out:
593+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
594+ return err;
595+}
596+EXPORT_SYMBOL(xfrm_dev_policy_flush);
597+
598 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
599 int (*func)(struct xfrm_policy *, int, int, void*),
600 void *data)
601@@ -2113,6 +2184,9 @@ static struct xfrm_policy *xfrm_policy_l
602 break;
603 }
604 }
605+ if (ret && ret->xdo.type == XFRM_DEV_OFFLOAD_PACKET)
606+ goto skip_inexact;
607+
608 bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
609 if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
610 daddr))
611@@ -2246,6 +2320,7 @@ int xfrm_policy_delete(struct xfrm_polic
612 pol = __xfrm_policy_unlink(pol, dir);
613 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
614 if (pol) {
615+ xfrm_dev_policy_delete(pol);
616 xfrm_policy_kill(pol);
617 return 0;
618 }
619--- a/net/xfrm/xfrm_state.c
620+++ b/net/xfrm/xfrm_state.c
621@@ -78,6 +78,25 @@ xfrm_spi_hash(struct net *net, const xfr
622 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
623 }
624
625+#define XFRM_STATE_INSERT(by, _n, _h, _type) \
626+ { \
627+ struct xfrm_state *_x = NULL; \
628+ \
629+ if (_type != XFRM_DEV_OFFLOAD_PACKET) { \
630+ hlist_for_each_entry_rcu(_x, _h, by) { \
631+ if (_x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
632+ continue; \
633+ break; \
634+ } \
635+ } \
636+ \
637+ if (!_x || _x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
638+ /* SAD is empty or consist from HW SAs only */ \
639+ hlist_add_head_rcu(_n, _h); \
640+ else \
641+ hlist_add_before_rcu(_n, &_x->by); \
642+ }
643+
644 static void xfrm_hash_transfer(struct hlist_head *list,
645 struct hlist_head *ndsttable,
646 struct hlist_head *nsrctable,
647@@ -93,18 +112,20 @@ static void xfrm_hash_transfer(struct hl
648 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
649 x->props.reqid, x->props.family,
650 nhashmask);
651- hlist_add_head_rcu(&x->bydst, ndsttable + h);
652+ XFRM_STATE_INSERT(bydst, &x->bydst, ndsttable + h, x->xso.type);
653
654 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
655 x->props.family,
656 nhashmask);
657- hlist_add_head_rcu(&x->bysrc, nsrctable + h);
658+ XFRM_STATE_INSERT(bysrc, &x->bysrc, nsrctable + h, x->xso.type);
659
660 if (x->id.spi) {
661 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
662 x->id.proto, x->props.family,
663 nhashmask);
664 hlist_add_head_rcu(&x->byspi, nspitable + h);
665+ XFRM_STATE_INSERT(byspi, &x->byspi, nspitable + h,
666+ x->xso.type);
667 }
668 }
669 }
670@@ -527,6 +548,8 @@ static enum hrtimer_restart xfrm_timer_h
671 int err = 0;
672
673 spin_lock(&x->lock);
674+ xfrm_dev_state_update_curlft(x);
675+
676 if (x->km.state == XFRM_STATE_DEAD)
677 goto out;
678 if (x->km.state == XFRM_STATE_EXPIRED)
679@@ -923,6 +946,49 @@ xfrm_init_tempstate(struct xfrm_state *x
680 x->props.family = tmpl->encap_family;
681 }
682
683+static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
684+ const xfrm_address_t *daddr,
685+ __be32 spi, u8 proto,
686+ unsigned short family,
687+ struct xfrm_state_offload *xdo)
688+{
689+ unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
690+ struct xfrm_state *x;
691+
692+ hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
693+#ifdef CONFIG_XFRM_OFFLOAD
694+ if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
695+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
696+ /* HW states are in the head of list, there is
697+ * no need to iterate further.
698+ */
699+ break;
700+
701+ /* Packet offload: both policy and SA should
702+ * have same device.
703+ */
704+ if (xdo->dev != x->xso.dev)
705+ continue;
706+ } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
707+ /* Skip HW policy for SW lookups */
708+ continue;
709+#endif
710+ if (x->props.family != family ||
711+ x->id.spi != spi ||
712+ x->id.proto != proto ||
713+ !xfrm_addr_equal(&x->id.daddr, daddr, family))
714+ continue;
715+
716+ if ((mark & x->mark.m) != x->mark.v)
717+ continue;
718+ if (!xfrm_state_hold_rcu(x))
719+ continue;
720+ return x;
721+ }
722+
723+ return NULL;
724+}
725+
726 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
727 const xfrm_address_t *daddr,
728 __be32 spi, u8 proto,
729@@ -1062,6 +1128,23 @@ xfrm_state_find(const xfrm_address_t *da
730 rcu_read_lock();
731 h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
732 hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
733+#ifdef CONFIG_XFRM_OFFLOAD
734+ if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
735+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
736+ /* HW states are in the head of list, there is
737+ * no need to iterate further.
738+ */
739+ break;
740+
741+ /* Packet offload: both policy and SA should
742+ * have same device.
743+ */
744+ if (pol->xdo.dev != x->xso.dev)
745+ continue;
746+ } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
747+ /* Skip HW policy for SW lookups */
748+ continue;
749+#endif
750 if (x->props.family == encap_family &&
751 x->props.reqid == tmpl->reqid &&
752 (mark & x->mark.m) == x->mark.v &&
753@@ -1079,6 +1162,23 @@ xfrm_state_find(const xfrm_address_t *da
754
755 h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
756 hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
757+#ifdef CONFIG_XFRM_OFFLOAD
758+ if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
759+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
760+ /* HW states are in the head of list, there is
761+ * no need to iterate further.
762+ */
763+ break;
764+
765+ /* Packet offload: both policy and SA should
766+ * have same device.
767+ */
768+ if (pol->xdo.dev != x->xso.dev)
769+ continue;
770+ } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
771+ /* Skip HW policy for SW lookups */
772+ continue;
773+#endif
774 if (x->props.family == encap_family &&
775 x->props.reqid == tmpl->reqid &&
776 (mark & x->mark.m) == x->mark.v &&
777@@ -1096,8 +1196,10 @@ found:
778 x = best;
779 if (!x && !error && !acquire_in_progress) {
780 if (tmpl->id.spi &&
781- (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
782- tmpl->id.proto, encap_family)) != NULL) {
783+ (x0 = __xfrm_state_lookup_all(net, mark, daddr,
784+ tmpl->id.spi, tmpl->id.proto,
785+ encap_family,
786+ &pol->xdo)) != NULL) {
787 to_put = x0;
788 error = -EEXIST;
789 goto out;
790@@ -1131,17 +1233,42 @@ found:
791 x = NULL;
792 goto out;
793 }
794-
795+#ifdef CONFIG_XFRM_OFFLOAD
796+ if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
797+ struct xfrm_state_offload *xdo = &pol->xdo;
798+ struct xfrm_state_offload *xso = &x->xso;
799+
800+ xso->type = XFRM_DEV_OFFLOAD_PACKET;
801+ xso->dir = xdo->dir;
802+ xso->dev = xdo->dev;
803+ error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
804+ if (error) {
805+ xso->dir = 0;
806+ xso->dev = NULL;
807+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
808+ x->km.state = XFRM_STATE_DEAD;
809+ to_put = x;
810+ x = NULL;
811+ goto out;
812+ }
813+ }
814+#endif
815 if (km_query(x, tmpl, pol) == 0) {
816 spin_lock_bh(&net->xfrm.xfrm_state_lock);
817 x->km.state = XFRM_STATE_ACQ;
818 list_add(&x->km.all, &net->xfrm.state_all);
819- hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
820+ XFRM_STATE_INSERT(bydst, &x->bydst,
821+ net->xfrm.state_bydst + h,
822+ x->xso.type);
823 h = xfrm_src_hash(net, daddr, saddr, encap_family);
824- hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
825+ XFRM_STATE_INSERT(bysrc, &x->bysrc,
826+ net->xfrm.state_bysrc + h,
827+ x->xso.type);
828 if (x->id.spi) {
829 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
830- hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
831+ XFRM_STATE_INSERT(byspi, &x->byspi,
832+ net->xfrm.state_byspi + h,
833+ x->xso.type);
834 }
835 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
836 hrtimer_start(&x->mtimer,
837@@ -1151,6 +1278,16 @@ found:
838 xfrm_hash_grow_check(net, x->bydst.next != NULL);
839 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
840 } else {
841+#ifdef CONFIG_XFRM_OFFLOAD
842+ struct xfrm_state_offload *xso = &x->xso;
843+
844+ if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
845+ xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
846+ xso->dir = 0;
847+ xso->dev = NULL;
848+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
849+ }
850+#endif
851 x->km.state = XFRM_STATE_DEAD;
852 to_put = x;
853 x = NULL;
854@@ -1246,16 +1383,19 @@ static void __xfrm_state_insert(struct x
855
856 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
857 x->props.reqid, x->props.family);
858- hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
859+ XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
860+ x->xso.type);
861
862 h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
863- hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
864+ XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
865+ x->xso.type);
866
867 if (x->id.spi) {
868 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
869 x->props.family);
870
871- hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
872+ XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
873+ x->xso.type);
874 }
875
876 hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
877@@ -1369,9 +1509,11 @@ static struct xfrm_state *__find_acq_cor
878 ktime_set(net->xfrm.sysctl_acq_expires, 0),
879 HRTIMER_MODE_REL_SOFT);
880 list_add(&x->km.all, &net->xfrm.state_all);
881- hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
882+ XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
883+ x->xso.type);
884 h = xfrm_src_hash(net, daddr, saddr, family);
885- hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
886+ XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
887+ x->xso.type);
888
889 net->xfrm.state_num++;
890
891@@ -1742,6 +1884,8 @@ EXPORT_SYMBOL(xfrm_state_update);
892
893 int xfrm_state_check_expire(struct xfrm_state *x)
894 {
895+ xfrm_dev_state_update_curlft(x);
896+
897 if (!x->curlft.use_time)
898 x->curlft.use_time = ktime_get_real_seconds();
899
900@@ -2043,7 +2187,8 @@ int xfrm_alloc_spi(struct xfrm_state *x,
901 spin_lock_bh(&net->xfrm.xfrm_state_lock);
902 x->id.spi = newspi;
903 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
904- hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
905+ XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
906+ x->xso.type);
907 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
908
909 err = 0;
910--- a/net/xfrm/xfrm_user.c
911+++ b/net/xfrm/xfrm_user.c
912@@ -844,6 +844,8 @@ static int copy_user_offload(struct xfrm
913 memset(xuo, 0, sizeof(*xuo));
914 xuo->ifindex = xso->dev->ifindex;
915 xuo->flags = xso->flags;
916+ if (xso->type == XFRM_DEV_OFFLOAD_PACKET)
917+ xuo->flags |= XFRM_OFFLOAD_PACKET;
918
919 return 0;
920 }
921@@ -1634,6 +1636,15 @@ static struct xfrm_policy *xfrm_policy_c
922 if (attrs[XFRMA_IF_ID])
923 xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
924
925+ /* configure the hardware if offload is requested */
926+ if (attrs[XFRMA_OFFLOAD_DEV]) {
927+ err = xfrm_dev_policy_add(net, xp,
928+ nla_data(attrs[XFRMA_OFFLOAD_DEV]),
929+ p->dir, 0);
930+ if (err)
931+ goto error;
932+ }
933+
934 return xp;
935 error:
936 *errp = err;
937@@ -1672,6 +1683,7 @@ static int xfrm_add_policy(struct sk_buf
938 xfrm_audit_policy_add(xp, err ? 0 : 1, true);
939
940 if (err) {
941+ xfrm_dev_policy_delete(xp);
942 security_xfrm_policy_free(xp->security);
943 kfree(xp);
944 return err;
945@@ -1783,6 +1795,8 @@ static int dump_one_policy(struct xfrm_p
946 err = xfrm_mark_put(skb, &xp->mark);
947 if (!err)
948 err = xfrm_if_id_put(skb, xp->if_id);
949+ if (!err && xp->xdo.dev)
950+ err = copy_user_offload(&xp->xdo, skb);
951 if (err) {
952 nlmsg_cancel(skb, nlh);
953 return err;
954@@ -2958,6 +2972,8 @@ static int build_acquire(struct sk_buff
955 err = xfrm_mark_put(skb, &xp->mark);
956 if (!err)
957 err = xfrm_if_id_put(skb, xp->if_id);
958+ if (!err && xp->xdo.dev)
959+ err = copy_user_offload(&xp->xdo, skb);
960 if (err) {
961 nlmsg_cancel(skb, nlh);
962 return err;
963@@ -3076,6 +3092,8 @@ static int build_polexpire(struct sk_buf
964 err = xfrm_mark_put(skb, &xp->mark);
965 if (!err)
966 err = xfrm_if_id_put(skb, xp->if_id);
967+ if (!err && xp->xdo.dev)
968+ err = copy_user_offload(&xp->xdo, skb);
969 if (err) {
970 nlmsg_cancel(skb, nlh);
971 return err;
972@@ -3159,6 +3177,8 @@ static int xfrm_notify_policy(struct xfr
973 err = xfrm_mark_put(skb, &xp->mark);
974 if (!err)
975 err = xfrm_if_id_put(skb, xp->if_id);
976+ if (!err && xp->xdo.dev)
977+ err = copy_user_offload(&xp->xdo, skb);
978 if (err)
979 goto out_free_skb;
980