From 7e50f84c94b82c3b2d23ac8878012b3b60ea0e96 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 31 Jan 2013 12:40:38 +0200 Subject: pf_key/xfrm_algo: prepare pf_key and xfrm_algo for new algorithms without pfkey support Mark existing algorithms as pfkey supported and make pfkey only use algorithms that have pfkey_supported set. Signed-off-by: Jussi Kivilinna Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 421f764794d5..814a1baa175c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1320,6 +1320,7 @@ struct xfrm_algo_desc { char *name; char *compat; u8 available:1; + u8 pfkey_supported:1; union { struct xfrm_algo_aead_info aead; struct xfrm_algo_auth_info auth; @@ -1561,8 +1562,8 @@ extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); extern void xfrm_probe_algs(void); -extern int xfrm_count_auth_supported(void); -extern int xfrm_count_enc_supported(void); +extern int xfrm_count_pfkey_auth_supported(void); +extern int xfrm_count_pfkey_enc_supported(void); extern struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx); extern struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx); extern struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id); -- cgit v1.2.3-59-g8ed1b From a0073fe18e718a1c815fe8b0120f1ac3c60284ba Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 5 Feb 2013 12:52:55 +0100 Subject: xfrm: Add a state resolution packet queue As the default, we blackhole packets until the key manager resolves the states. This patch implements a packet queue where IPsec packets are queued until the states are resolved. We generate a dummy xfrm bundle, the output routine of the returned route enqueues the packet to a per policy queue and arms a timer that checks for state resolution when dst_output() is called. Once the states are resolved, the packets are sent out of the queue. If the states are not resolved after some time, the queue is flushed. This patch keeps the defaut behaviour to blackhole packets as long as we have no states. To enable the packet queue the sysctl xfrm_larval_drop must be switched off. Signed-off-by: Steffen Klassert --- include/net/dst.h | 1 + include/net/xfrm.h | 7 ++ net/xfrm/xfrm_policy.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 9a7881066fb3..3da47e0a4a1f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,6 +61,7 @@ struct dst_entry { #define DST_NOPEER 0x0040 #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 +#define DST_XFRM_QUEUE 0x0200 unsigned short pending_confirm; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 814a1baa175c..30f3e5b362ee 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -501,6 +501,12 @@ struct xfrm_policy_walk { u32 seq; }; +struct xfrm_policy_queue { + struct sk_buff_head hold_queue; + struct timer_list hold_timer; + unsigned long timeout; +}; + struct xfrm_policy { #ifdef CONFIG_NET_NS struct net *xp_net; @@ -522,6 +528,7 @@ struct xfrm_policy { struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; struct xfrm_policy_walk_entry walk; + struct xfrm_policy_queue polq; u8 type; u8 action; u8 flags; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 41eabc46f110..456b11b0f049 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -35,6 +35,10 @@ #include "xfrm_hash.h" +#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) +#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) +#define XFRM_MAX_QUEUE_LEN 100 + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -51,7 +55,7 @@ static struct kmem_cache *xfrm_dst_cache __read_mostly; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); - +static void xfrm_policy_queue_process(unsigned long arg); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); @@ -287,8 +291,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); atomic_set(&policy->refcnt, 1); + skb_queue_head_init(&policy->polq.hold_queue); setup_timer(&policy->timer, xfrm_policy_timer, (unsigned long)policy); + setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, + (unsigned long)policy); policy->flo.ops = &xfrm_policy_fc_ops; } return policy; @@ -309,6 +316,16 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) } EXPORT_SYMBOL(xfrm_policy_destroy); +static void xfrm_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(list)) != NULL) { + dev_put(skb->dev); + kfree_skb(skb); + } +} + /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ @@ -319,6 +336,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) atomic_inc(&policy->genid); + del_timer(&policy->polq.hold_timer); + xfrm_queue_purge(&policy->polq.hold_queue); + if (del_timer(&policy->timer)) xfrm_pol_put(policy); @@ -562,6 +582,31 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s return 0; } +static void xfrm_policy_requeue(struct xfrm_policy *old, + struct xfrm_policy *new) +{ + struct xfrm_policy_queue *pq = &old->polq; + struct sk_buff_head list; + + __skb_queue_head_init(&list); + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice_init(&pq->hold_queue, &list); + del_timer(&pq->hold_timer); + spin_unlock_bh(&pq->hold_queue.lock); + + if (skb_queue_empty(&list)) + return; + + pq = &new->polq; + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice(&list, &pq->hold_queue); + pq->timeout = XFRM_QUEUE_TMO_MIN; + mod_timer(&pq->hold_timer, jiffies); + spin_unlock_bh(&pq->hold_queue.lock); +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { struct net *net = xp_net(policy); @@ -603,8 +648,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); rt_genid_bump(net); - if (delpol) + if (delpol) { + xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); + } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); @@ -1115,11 +1162,15 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } - if (old_pol) + if (old_pol) { + if (pol) + xfrm_policy_requeue(old_pol, pol); + /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); + } write_unlock_bh(&xfrm_policy_lock); if (old_pol) { @@ -1310,6 +1361,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f * It means we need to try again resolving. */ if (xdst->num_xfrms > 0) return NULL; + } else if (dst->flags & DST_XFRM_QUEUE) { + return NULL; } else { /* Real bundle */ if (stale_bundle(dst)) @@ -1673,6 +1726,171 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return xdst; } +static void xfrm_policy_queue_process(unsigned long arg) +{ + int err = 0; + struct sk_buff *skb; + struct sock *sk; + struct dst_entry *dst; + struct net_device *dev; + struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy_queue *pq = &pol->polq; + struct flowi fl; + struct sk_buff_head list; + + spin_lock(&pq->hold_queue.lock); + skb = skb_peek(&pq->hold_queue); + dst = skb_dst(skb); + sk = skb->sk; + xfrm_decode_session(skb, &fl, dst->ops->family); + spin_unlock(&pq->hold_queue.lock); + + dst_hold(dst->path); + dst = xfrm_lookup(xp_net(pol), dst->path, &fl, + sk, 0); + if (IS_ERR(dst)) + goto purge_queue; + + if (dst->flags & DST_XFRM_QUEUE) { + dst_release(dst); + + if (pq->timeout >= XFRM_QUEUE_TMO_MAX) + goto purge_queue; + + pq->timeout = pq->timeout << 1; + mod_timer(&pq->hold_timer, jiffies + pq->timeout); + return; + } + + dst_release(dst); + + __skb_queue_head_init(&list); + + spin_lock(&pq->hold_queue.lock); + pq->timeout = 0; + skb_queue_splice_init(&pq->hold_queue, &list); + spin_unlock(&pq->hold_queue.lock); + + while (!skb_queue_empty(&list)) { + skb = __skb_dequeue(&list); + + xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + dst_hold(skb_dst(skb)->path); + dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, + &fl, skb->sk, 0); + if (IS_ERR(dst)) { + dev_put(skb->dev); + kfree_skb(skb); + continue; + } + + nf_reset(skb); + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + dev = skb->dev; + err = dst_output(skb); + dev_put(dev); + } + + return; + +purge_queue: + pq->timeout = 0; + xfrm_queue_purge(&pq->hold_queue); +} + +static int xdst_queue_output(struct sk_buff *skb) +{ + unsigned long sched_next; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; + + if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { + kfree_skb(skb); + return -EAGAIN; + } + + skb_dst_force(skb); + dev_hold(skb->dev); + + spin_lock_bh(&pq->hold_queue.lock); + + if (!pq->timeout) + pq->timeout = XFRM_QUEUE_TMO_MIN; + + sched_next = jiffies + pq->timeout; + + if (del_timer(&pq->hold_timer)) { + if (time_before(pq->hold_timer.expires, sched_next)) + sched_next = pq->hold_timer.expires; + } + + __skb_queue_tail(&pq->hold_queue, skb); + mod_timer(&pq->hold_timer, sched_next); + + spin_unlock_bh(&pq->hold_queue.lock); + + return 0; +} + +static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, + struct dst_entry *dst, + const struct flowi *fl, + int num_xfrms, + u16 family) +{ + int err; + struct net_device *dev; + struct dst_entry *dst1; + struct xfrm_dst *xdst; + + xdst = xfrm_alloc_dst(net, family); + if (IS_ERR(xdst)) + return xdst; + + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || + (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) + return xdst; + + dst1 = &xdst->u.dst; + dst_hold(dst); + xdst->route = dst; + + dst_copy_metrics(dst1, dst); + + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; + dst1->flags |= DST_HOST | DST_XFRM_QUEUE; + dst1->lastuse = jiffies; + + dst1->input = dst_discard; + dst1->output = xdst_queue_output; + + dst_hold(dst); + dst1->child = dst; + dst1->path = dst; + + xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); + + err = -ENODEV; + dev = dst->dev; + if (!dev) + goto free_dst; + + err = xfrm_fill_dst(xdst, dev, fl); + if (err) + goto free_dst; + +out: + return xdst; + +free_dst: + dst_release(dst1); + xdst = ERR_PTR(err); + goto out; +} + static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) @@ -1751,7 +1969,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_alloc_dst(net, family); + xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2359,6 +2577,9 @@ static int xfrm_bundle_ok(struct xfrm_dst *first) (dst->dev && !netif_running(dst->dev))) return 0; + if (dst->flags & DST_XFRM_QUEUE) + return 1; + last = NULL; do { -- cgit v1.2.3-59-g8ed1b From 8d068875caca3b507ffa8a57d521483fd4eebcc7 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Wed, 6 Feb 2013 10:46:33 +0100 Subject: xfrm: make gc_thresh configurable in all namespaces The xfrm gc threshold can be configured via xfrm{4,6}_gc_thresh sysctl but currently only in init_net, other namespaces always use the default value. This can substantially limit the number of IPsec tunnels that can be effectively used. Signed-off-by: Michal Kubecek Signed-off-by: Steffen Klassert --- include/net/netns/ipv4.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv4/xfrm4_policy.c | 49 ++++++++++++++++++++++++++++++++++++++++++--- net/ipv6/xfrm6_policy.c | 52 +++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 95 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9b78862014a4..2ba9de89e8ec 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -22,6 +22,7 @@ struct netns_ipv4 { struct ctl_table_header *frags_hdr; struct ctl_table_header *ipv4_hdr; struct ctl_table_header *route_hdr; + struct ctl_table_header *xfrm4_hdr; #endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 214cb0a53359..1242f371718b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -16,6 +16,7 @@ struct netns_sysctl_ipv6 { struct ctl_table_header *route_hdr; struct ctl_table_header *icmp_hdr; struct ctl_table_header *frags_hdr; + struct ctl_table_header *xfrm6_hdr; #endif int bindv6only; int flush_delay; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0e28383c096f..9a459be24af7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -262,7 +262,51 @@ static struct ctl_table xfrm4_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; +static int __net_init xfrm4_net_init(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm4_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv4", table); + if (!hdr) + goto err_reg; + + net->ipv4.xfrm4_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit xfrm4_net_exit(struct net *net) +{ + struct ctl_table *table; + + if (net->ipv4.xfrm4_hdr == NULL) + return; + + table = net->ipv4.xfrm4_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.xfrm4_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations __net_initdata xfrm4_net_ops = { + .init = xfrm4_net_init, + .exit = xfrm4_net_exit, +}; #endif static void __init xfrm4_policy_init(void) @@ -277,8 +321,7 @@ void __init xfrm4_init(void) xfrm4_state_init(); xfrm4_policy_init(); #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", - xfrm4_policy_table); + register_pernet_subsys(&xfrm4_net_ops); #endif } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 128273744332..4ef7bdb65440 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -320,7 +320,51 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; +static int __net_init xfrm6_net_init(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm6_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv6", table); + if (!hdr) + goto err_reg; + + net->ipv6.sysctl.xfrm6_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + struct ctl_table *table; + + if (net->ipv6.sysctl.xfrm6_hdr == NULL) + return; + + table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations xfrm6_net_ops = { + .init = xfrm6_net_init, + .exit = xfrm6_net_exit, +}; #endif int __init xfrm6_init(void) @@ -339,8 +383,7 @@ int __init xfrm6_init(void) goto out_policy; #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6", - xfrm6_policy_table); + register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; @@ -352,8 +395,7 @@ out_policy: void xfrm6_fini(void) { #ifdef CONFIG_SYSCTL - if (sysctl_hdr) - unregister_net_sysctl_table(sysctl_hdr); + unregister_pernet_subsys(&xfrm6_net_ops); #endif xfrm6_policy_fini(); xfrm6_state_fini(); -- cgit v1.2.3-59-g8ed1b