From 63283dd21ed2bf25a71909a820ed3e8fe412e15d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 Jun 2014 18:51:39 +0200 Subject: netfilter: nf_tables: skip transaction if no update flags in tables Skip transaction handling for table updates with no changes in the flags. This fixes a crash when passing the table flag with all bits unset. Reported-by: Ana Rey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ab4566cfcbe4..da5dc37a7402 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -407,6 +407,9 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_DORMANT) return -EINVAL; + if (flags == ctx->table->flags) + return 0; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) -- cgit v1.2.3-59-g8ed1b From e688a7f8c6cb7a18aae7e55ccdd175f0ad9e69c0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 1 Jul 2014 11:49:18 +0200 Subject: netfilter: nf_tables: safe RCU iteration on list when dumping The dump operation through netlink is not protected by the nfnl_lock. Thus, a reader process can be dumping any of the existing object lists while another process can be updating the list content. This patch resolves this situation by protecting all the object lists with RCU in the netlink dump path which is the reader side. The updater path is already protected via nfnl_lock, so use list manipulation RCU-safe operations. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 94 ++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index da5dc37a7402..a27a7c56e7c3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -35,7 +35,7 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi) { INIT_LIST_HEAD(&afi->tables); nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&afi->list, &net->nft.af_info); + list_add_tail_rcu(&afi->list, &net->nft.af_info); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo); void nft_unregister_afinfo(struct nft_af_info *afi) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&afi->list); + list_del_rcu(&afi->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_afinfo); @@ -277,11 +277,12 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; - list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -299,6 +300,7 @@ cont: } } done: + rcu_read_unlock(); cb->args[0] = idx; return skb->len; } @@ -517,7 +519,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, module_put(afi->owner); return err; } - list_add_tail(&table->list, &afi->tables); + list_add_tail_rcu(&table->list, &afi->tables); return 0; } @@ -549,7 +551,7 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; - list_del(&table->list); + list_del_rcu(&table->list); return 0; } @@ -764,12 +766,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(chain, &table->chains, list) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -787,11 +790,11 @@ cont: } } done: + rcu_read_unlock(); cb->args[0] = idx; return skb->len; } - static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1133,7 +1136,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, goto err2; table->use++; - list_add_tail(&chain->list, &table->chains); + list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: if (!(table->flags & NFT_TABLE_F_DORMANT) && @@ -1183,7 +1186,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, return err; table->use--; - list_del(&chain->list); + list_del_rcu(&chain->list); return 0; } @@ -1202,9 +1205,9 @@ int nft_register_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) - list_add_tail(&type->list, &nf_tables_expressions); + list_add_tail_rcu(&type->list, &nf_tables_expressions); else - list_add(&type->list, &nf_tables_expressions); + list_add_rcu(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -1219,7 +1222,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr); void nft_unregister_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&type->list); + list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_expr); @@ -1555,13 +1558,14 @@ static int nf_tables_dump_rules(struct sk_buff *skb, u8 genctr = ACCESS_ONCE(net->nft.genctr); u8 gencursor = ACCESS_ONCE(net->nft.gencursor); - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { - list_for_each_entry(rule, &chain->rules, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(chain, &table->chains, list) { + list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_rule_is_active(net, rule)) goto cont; if (idx < s_idx) @@ -1582,6 +1586,8 @@ cont: } } done: + rcu_read_unlock(); + /* Invalidate this dump, a transition to the new generation happened */ if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) return -EBUSY; @@ -1935,7 +1941,7 @@ static LIST_HEAD(nf_tables_set_ops); int nft_register_set(struct nft_set_ops *ops) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&ops->list, &nf_tables_set_ops); + list_add_tail_rcu(&ops->list, &nf_tables_set_ops); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -1944,7 +1950,7 @@ EXPORT_SYMBOL_GPL(nft_register_set); void nft_unregister_set(struct nft_set_ops *ops) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&ops->list); + list_del_rcu(&ops->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_set); @@ -2237,7 +2243,8 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, if (cb->args[1]) return skb->len; - list_for_each_entry(set, &ctx->table->sets, list) { + rcu_read_lock(); + list_for_each_entry_rcu(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, @@ -2250,6 +2257,7 @@ cont: } cb->args[1] = 1; done: + rcu_read_unlock(); return skb->len; } @@ -2263,7 +2271,8 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, if (cb->args[1]) return skb->len; - list_for_each_entry(table, &ctx->afi->tables, list) { + rcu_read_lock(); + list_for_each_entry_rcu(table, &ctx->afi->tables, list) { if (cur_table) { if (cur_table != table) continue; @@ -2272,7 +2281,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, } ctx->table = table; idx = 0; - list_for_each_entry(set, &ctx->table->sets, list) { + list_for_each_entry_rcu(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, @@ -2287,6 +2296,7 @@ cont: } cb->args[1] = 1; done: + rcu_read_unlock(); return skb->len; } @@ -2303,7 +2313,8 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, if (cb->args[1]) return skb->len; - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (cur_family) { if (afi->family != cur_family) continue; @@ -2311,7 +2322,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, cur_family = 0; } - list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { if (cur_table) { if (cur_table != table) continue; @@ -2322,7 +2333,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, ctx->table = table; ctx->afi = afi; idx = 0; - list_for_each_entry(set, &ctx->table->sets, list) { + list_for_each_entry_rcu(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; if (nf_tables_fill_set(skb, ctx, set, @@ -2342,6 +2353,7 @@ cont: } cb->args[1] = 1; done: + rcu_read_unlock(); return skb->len; } @@ -2600,7 +2612,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) goto err2; - list_add_tail(&set->list, &table->sets); + list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; @@ -2620,7 +2632,7 @@ static void nft_set_destroy(struct nft_set *set) static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { - list_del(&set->list); + list_del_rcu(&set->list); nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); nft_set_destroy(set); } @@ -2655,7 +2667,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; - list_del(&set->list); + list_del_rcu(&set->list); ctx.table->use--; return 0; } @@ -2707,14 +2719,14 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, } bind: binding->chain = ctx->chain; - list_add_tail(&binding->list, &set->bindings); + list_add_tail_rcu(&binding->list, &set->bindings); return 0; } void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) { - list_del(&binding->list); + list_del_rcu(&binding->list); if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && !(set->flags & NFT_SET_INACTIVE)) @@ -3494,12 +3506,12 @@ static int nf_tables_abort(struct sk_buff *skb) } nft_trans_destroy(trans); } else { - list_del(&trans->ctx.table->list); + list_del_rcu(&trans->ctx.table->list); } break; case NFT_MSG_DELTABLE: - list_add_tail(&trans->ctx.table->list, - &trans->ctx.afi->tables); + list_add_tail_rcu(&trans->ctx.table->list, + &trans->ctx.afi->tables); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: @@ -3510,7 +3522,7 @@ static int nf_tables_abort(struct sk_buff *skb) nft_trans_destroy(trans); } else { trans->ctx.table->use--; - list_del(&trans->ctx.chain->list); + list_del_rcu(&trans->ctx.chain->list); if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && trans->ctx.chain->flags & NFT_BASE_CHAIN) { nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, @@ -3520,8 +3532,8 @@ static int nf_tables_abort(struct sk_buff *skb) break; case NFT_MSG_DELCHAIN: trans->ctx.table->use++; - list_add_tail(&trans->ctx.chain->list, - &trans->ctx.table->chains); + list_add_tail_rcu(&trans->ctx.chain->list, + &trans->ctx.table->chains); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: @@ -3535,12 +3547,12 @@ static int nf_tables_abort(struct sk_buff *skb) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del(&nft_trans_set(trans)->list); + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; - list_add_tail(&nft_trans_set(trans)->list, - &trans->ctx.table->sets); + list_add_tail_rcu(&nft_trans_set(trans)->list, + &trans->ctx.table->sets); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: -- cgit v1.2.3-59-g8ed1b From 38e029f14a9702f71d5953246df9f722bca49017 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 1 Jul 2014 12:23:12 +0200 Subject: netfilter: nf_tables: set NLM_F_DUMP_INTR if netlink dumping is stale An updater may interfer with the dumping of any of the object lists. Fix this by using a per-net generation counter and use the nl_dump_check_consistent() interface so the NLM_F_DUMP_INTR flag is set to notify userspace that it has to restart the dump since an updater has interfered. This patch also replaces the existing consistency checking code in the rule dumping path since it is broken. Basically, the value that the dump callback returns is not propagated to userspace via netlink_dump_start(). Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 2 +- net/netfilter/nf_tables_api.c | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 26a394cb91a8..eee608b12cc9 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -13,8 +13,8 @@ struct netns_nftables { struct nft_af_info *inet; struct nft_af_info *arp; struct nft_af_info *bridge; + unsigned int base_seq; u8 gencursor; - u8 genctr; }; #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a27a7c56e7c3..ac03d748360e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -278,6 +278,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb, int family = nfmsg->nfgen_family; rcu_read_lock(); + cb->seq = net->nft.base_seq; + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -295,6 +297,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb, NLM_F_MULTI, afi->family, table) < 0) goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -767,6 +771,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, int family = nfmsg->nfgen_family; rcu_read_lock(); + cb->seq = net->nft.base_seq; + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -784,6 +790,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, NLM_F_MULTI, afi->family, table, chain) < 0) goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -1555,10 +1563,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - u8 genctr = ACCESS_ONCE(net->nft.genctr); - u8 gencursor = ACCESS_ONCE(net->nft.gencursor); rcu_read_lock(); + cb->seq = net->nft.base_seq; + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -1579,6 +1587,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, NLM_F_MULTI | NLM_F_APPEND, afi->family, table, chain, rule) < 0) goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -1588,10 +1598,6 @@ cont: done: rcu_read_unlock(); - /* Invalidate this dump, a transition to the new generation happened */ - if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) - return -EBUSY; - cb->args[0] = idx; return skb->len; } @@ -2244,6 +2250,8 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, return skb->len; rcu_read_lock(); + cb->seq = ctx->net->nft.base_seq; + list_for_each_entry_rcu(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; @@ -2252,6 +2260,7 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, cb->args[0] = idx; goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -2272,6 +2281,8 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, return skb->len; rcu_read_lock(); + cb->seq = ctx->net->nft.base_seq; + list_for_each_entry_rcu(table, &ctx->afi->tables, list) { if (cur_table) { if (cur_table != table) @@ -2290,6 +2301,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, cb->args[2] = (unsigned long) table; goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -2314,6 +2326,8 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, return skb->len; rcu_read_lock(); + cb->seq = net->nft.base_seq; + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (cur_family) { if (afi->family != cur_family) @@ -2344,6 +2358,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, cb->args[3] = afi->family; goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -3361,7 +3376,7 @@ static int nf_tables_commit(struct sk_buff *skb) struct nft_set *set; /* Bump generation counter, invalidate any dump in progress */ - net->nft.genctr++; + while (++net->nft.base_seq == 0); /* A new generation has just started */ net->nft.gencursor = gencursor_next(net); @@ -3966,6 +3981,7 @@ static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); INIT_LIST_HEAD(&net->nft.commit_list); + net->nft.base_seq = 1; return 0; } -- cgit v1.2.3-59-g8ed1b From ce355e209feb030945dae4c358c02f29a84f3f8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jul 2014 15:14:06 +0200 Subject: netfilter: nf_tables: 64bit stats need some extra synchronization Use generic u64_stats_sync infrastructure to get proper 64bit stats, even on 32bit arches, at no extra cost for 64bit arches. Without this fix, 32bit arches can have some wrong counters at the time the carry is propagated into upper word. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++-- net/netfilter/nf_tables_api.c | 15 +++++++++++---- net/netfilter/nf_tables_core.c | 10 ++++++---- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 713b0b88bd5a..c4d86198d3d6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #define NFT_JUMP_STACK_SIZE 16 @@ -528,8 +529,9 @@ enum nft_chain_type { }; struct nft_stats { - u64 bytes; - u64 pkts; + u64 bytes; + u64 pkts; + struct u64_stats_sync syncp; }; #define NFT_HOOK_OPS_MAX 2 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ac03d748360e..8746ff9a8357 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -644,13 +644,20 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) { struct nft_stats *cpu_stats, total; struct nlattr *nest; + unsigned int seq; + u64 pkts, bytes; int cpu; memset(&total, 0, sizeof(total)); for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); - total.pkts += cpu_stats->pkts; - total.bytes += cpu_stats->bytes; + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + pkts = cpu_stats->pkts; + bytes = cpu_stats->bytes; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + total.pkts += pkts; + total.bytes += bytes; } nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); if (nest == NULL) @@ -875,7 +882,7 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) return ERR_PTR(-EINVAL); - newstats = alloc_percpu(struct nft_stats); + newstats = netdev_alloc_pcpu_stats(struct nft_stats); if (newstats == NULL) return ERR_PTR(-ENOMEM); @@ -1091,7 +1098,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } basechain->stats = stats; } else { - stats = alloc_percpu(struct nft_stats); + stats = netdev_alloc_pcpu_stats(struct nft_stats); if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 345acfb1720b..3b90eb2b2c55 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -109,7 +109,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - struct nft_stats __percpu *stats; + struct nft_stats *stats; int rulenum; /* * Cache cursor to avoid problems in case that the cursor is updated @@ -205,9 +205,11 @@ next_rule: nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); rcu_read_lock_bh(); - stats = rcu_dereference(nft_base_chain(basechain)->stats); - __this_cpu_inc(stats->pkts); - __this_cpu_add(stats->bytes, pkt->skb->len); + stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); rcu_read_unlock_bh(); return nft_base_chain(basechain)->policy; -- cgit v1.2.3-59-g8ed1b From c3caf1192f904de2f1381211f564537235d50de3 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Mon, 14 Jul 2014 15:54:46 -0700 Subject: net-gre-gro: Fix a bug that breaks the forwarding path Fixed a bug that was introduced by my GRE-GRO patch (bf5a755f5e9186406bbf50f4087100af5bd68e40 net-gre-gro: Add GRE support to the GRO stack) that breaks the forwarding path because various GSO related fields were not set. The bug will cause on the egress path either the GSO code to fail, or a GRE-TSO capable (NETIF_F_GSO_GRE) NICs to choke. The following fix has been tested for both cases. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ net/ipv4/af_inet.c | 3 +++ net/ipv4/gre_offload.c | 3 +++ net/ipv4/tcp_offload.c | 2 +- net/ipv6/tcpv6_offload.c | 2 +- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7990984ca364..367a586d0c8a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4096,6 +4096,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + skb->encapsulation = 0; + skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); napi->skb = skb; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d5e6836cf772..d156b3c5f363 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1429,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff) int proto = iph->protocol; int err = -ENOSYS; + if (skb->encapsulation) + skb_set_inner_network_header(skb, nhoff); + csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index eb92deb12666..f0bdd47bbbcb 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -263,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff) int err = -ENOENT; __be16 type; + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type = SKB_GSO_GRE; + type = greh->protocol; if (greh->flags & GRE_KEY) grehlen += GRE_HEADER_SECTION; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4e86c59ec7f7..55046ecd083e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -309,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; return tcp_gro_complete(skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 8517d3cd1aed..01b0ff9a0c2c 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, &iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; return tcp_gro_complete(skb); } -- cgit v1.2.3-59-g8ed1b From 7a9810e7bd99c922d9cedf64dbaa5ef6be412295 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 17 Jul 2014 12:55:40 +0900 Subject: r8169: Enable RX_MULTI_EN for RTL_GIGA_MAC_VER_40 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ethernet port on my ASUS A88X Pro mainboard stopped working several times a day, with messages like these in dmesg: AMD-Vi: Event logged [IO_PAGE_FAULT device=05:00.0 domain=0x001e address=0x0000000000003000 flags=0x0050] Searching the web for these messages led me to similar reports about different hardware supported by r8169, and eventually to commits 3ced8c955e74d319f3e3997f7169c79d524dfd06 ('r8169: enforce RX_MULTI_EN for the 8168f.') and eb2dc35d99028b698cdedba4f5522bc43e576bd2 ('r8169: RxConfig hack for the 8168evl'). So I tried this change, and it fixes the problem for me. Signed-off-by: Michel Dänzer Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 06bdc31a828d..61623e9af574 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4240,6 +4240,8 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_40: + RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); + break; case RTL_GIGA_MAC_VER_41: case RTL_GIGA_MAC_VER_42: case RTL_GIGA_MAC_VER_43: -- cgit v1.2.3-59-g8ed1b From a4b70a07ed12a71131cab7adce2ce91c71b37060 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 16 Jul 2014 10:02:26 -0400 Subject: sunvnet: clean up objects created in vnet_new() on vnet_exit() Nothing cleans up the objects created by vnet_new(), they are completely leaked. vnet_exit(), after doing the vio_unregister_driver() to clean up ports, should call a helper function that iterates over vnet_list and cleans up those objects. This includes unregister_netdevice() as well as free_netdev(). Signed-off-by: Sowmini Varadhan Acked-by: Dave Kleikamp Reviewed-by: Karl Volz Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 1c24a8f368bd..fd411d6e19a2 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1083,6 +1083,24 @@ static struct vnet *vnet_find_or_create(const u64 *local_mac) return vp; } +static void vnet_cleanup(void) +{ + struct vnet *vp; + struct net_device *dev; + + mutex_lock(&vnet_list_mutex); + while (!list_empty(&vnet_list)) { + vp = list_first_entry(&vnet_list, struct vnet, list); + list_del(&vp->list); + dev = vp->dev; + /* vio_unregister_driver() should have cleaned up port_list */ + BUG_ON(!list_empty(&vp->port_list)); + unregister_netdev(dev); + free_netdev(dev); + } + mutex_unlock(&vnet_list_mutex); +} + static const char *local_mac_prop = "local-mac-address"; static struct vnet *vnet_find_parent(struct mdesc_handle *hp, @@ -1240,7 +1258,6 @@ static int vnet_port_remove(struct vio_dev *vdev) kfree(port); - unregister_netdev(vp->dev); } return 0; } @@ -1268,6 +1285,7 @@ static int __init vnet_init(void) static void __exit vnet_exit(void) { vio_unregister_driver(&vnet_port_driver); + vnet_cleanup(); } module_init(vnet_init); -- cgit v1.2.3-59-g8ed1b From 858e6c321065344339906672bccd0eafe9622258 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 16 Jul 2014 13:33:50 +0300 Subject: net/mlx4_en: cq->irq_desc wasn't set in legacy EQ's Fix a regression introduced by commit 35f6f45 ("net/mlx4_en: Don't use irq_affinity_notifier to track changes in IRQ affinity map"). When core is started in legacy EQ's (number of IRQ's < rx rings), cq->irq_desc was NULL. This caused a kernel crash under heavy traffic - when having more than rx NAPI budget completions. Fixed to have it set for both EQ modes. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 14c00048bbec..82322b1c8411 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -129,14 +129,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, name); } - cq->irq_desc = - irq_to_desc(mlx4_eq_get_irq(mdev->dev, - cq->vector)); } } else { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; } + + cq->irq_desc = + irq_to_desc(mlx4_eq_get_irq(mdev->dev, + cq->vector)); } else { /* For TX we use the same irq per ring we assigned for the RX */ -- cgit v1.2.3-59-g8ed1b From cc25eaae238ddd693aa5eaa73e565d8ff4915f6e Mon Sep 17 00:00:00 2001 From: Christoph Schulz Date: Wed, 16 Jul 2014 22:10:29 +0200 Subject: net: ppp: fix creating PPP pass and active filters Commit 568f194e8bd16c353ad50f9ab95d98b20578a39d ("net: ppp: use sk_unattached_filter api") inadvertently changed the logic when setting PPP pass and active filters. This applies to both the generic PPP subsystem implemented by drivers/net/ppp/ppp_generic.c and the ISDN PPP subsystem implemented by drivers/isdn/i4l/isdn_ppp.c. The original code in ppp_ioctl() (or isdn_ppp_ioctl(), resp.) handling PPPIOCSPASS and PPPIOCSACTIVE allowed to remove a pass/active filter previously set by using a filter of length zero. However, with the new code this is not possible anymore as this case is not explicitly checked for, which leads to passing NULL as a filter to sk_unattached_filter_create(). This results in returning EINVAL to the caller. Additionally, the variables ppp->pass_filter and ppp->active_filter (or is->pass_filter and is->active_filter, resp.) are not reset to NULL, although the filters they point to may have been destroyed by sk_unattached_filter_destroy(), so in this EINVAL case dangling pointers are left behind (provided the pointers were previously non-NULL). This patch corrects both problems by checking whether the filter passed is empty or non-empty, and prevents sk_unattached_filter_create() from being called in the first case. Moreover, the pointers are always reset to NULL as soon as sk_unattached_filter_destroy() returns. Signed-off-by: Christoph Schulz Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 20 ++++++++++++++++---- drivers/net/ppp/ppp_generic.c | 22 ++++++++++++++++------ 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index a333b7f798d1..62f0688d45a5 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -638,9 +638,15 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) fprog.len = len; fprog.filter = code; - if (is->pass_filter) + if (is->pass_filter) { sk_unattached_filter_destroy(is->pass_filter); - err = sk_unattached_filter_create(&is->pass_filter, &fprog); + is->pass_filter = NULL; + } + if (fprog.filter != NULL) + err = sk_unattached_filter_create(&is->pass_filter, + &fprog); + else + err = 0; kfree(code); return err; @@ -657,9 +663,15 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) fprog.len = len; fprog.filter = code; - if (is->active_filter) + if (is->active_filter) { sk_unattached_filter_destroy(is->active_filter); - err = sk_unattached_filter_create(&is->active_filter, &fprog); + is->active_filter = NULL; + } + if (fprog.filter != NULL) + err = sk_unattached_filter_create(&is->active_filter, + &fprog); + else + err = 0; kfree(code); return err; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index e2f20f807de8..d5b77ef3a210 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -757,10 +757,15 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) }; ppp_lock(ppp); - if (ppp->pass_filter) + if (ppp->pass_filter) { sk_unattached_filter_destroy(ppp->pass_filter); - err = sk_unattached_filter_create(&ppp->pass_filter, - &fprog); + ppp->pass_filter = NULL; + } + if (fprog.filter != NULL) + err = sk_unattached_filter_create(&ppp->pass_filter, + &fprog); + else + err = 0; kfree(code); ppp_unlock(ppp); } @@ -778,10 +783,15 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) }; ppp_lock(ppp); - if (ppp->active_filter) + if (ppp->active_filter) { sk_unattached_filter_destroy(ppp->active_filter); - err = sk_unattached_filter_create(&ppp->active_filter, - &fprog); + ppp->active_filter = NULL; + } + if (fprog.filter != NULL) + err = sk_unattached_filter_create(&ppp->active_filter, + &fprog); + else + err = 0; kfree(code); ppp_unlock(ppp); } -- cgit v1.2.3-59-g8ed1b From a28d0e873d2899bd750ae495f84fe9c1a2f53809 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Jul 2014 13:50:45 +0300 Subject: wan/x25_asy: integer overflow in x25_asy_change_mtu() If "newmtu * 2 + 4" is too large then it can cause an integer overflow leading to memory corruption. Eric Dumazet suggests that 65534 is a reasonable upper limit. Btw, "newmtu" is not allowed to be a negative number because of the check in dev_set_mtu(), so that's ok. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 5895f1978691..fa9fdfa128c1 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -122,8 +122,12 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu) { struct x25_asy *sl = netdev_priv(dev); unsigned char *xbuff, *rbuff; - int len = 2 * newmtu; + int len; + if (newmtu > 65534) + return -EINVAL; + + len = 2 * newmtu; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); -- cgit v1.2.3-59-g8ed1b From 5343330010a892b76a97fd93ad3c455a4a32a7fb Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Thu, 17 Jul 2014 13:33:51 +0200 Subject: net: qmi_wwan: add two Sierra Wireless/Netgear devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two device IDs found in an out-of-tree driver downloadable from Netgear. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c4638c67f6b9..22756db53dca 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -667,6 +667,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, + {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ @@ -757,6 +758,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9054, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9055, 8)}, /* Netgear AirCard 341U */ {QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */ + {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ -- cgit v1.2.3-59-g8ed1b From c2a6c7813f1ffae636e369b5d7011c9f518d3cd9 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Thu, 17 Jul 2014 13:34:09 +0200 Subject: net: huawei_cdc_ncm: add "subclass 3" devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Huawei's usage of the subclass and protocol fields is not 100% clear to us, but there appears to be a very strict system. A device with the "shared" device ID 12d1:1506 and this NCM function was recently reported (showing only default altsetting): Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 3 bInterfaceProtocol 22 iInterface 8 CDC Network Control Model (NCM) ** UNRECOGNIZED: 05 24 00 10 01 ** UNRECOGNIZED: 06 24 1a 00 01 1f ** UNRECOGNIZED: 0c 24 1b 00 01 00 04 10 14 dc 05 20 ** UNRECOGNIZED: 0d 24 0f 0a 0f 00 00 00 ea 05 03 00 01 ** UNRECOGNIZED: 05 24 06 01 01 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0010 1x 16 bytes bInterval 9 Cc: Enrico Mioso Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/huawei_cdc_ncm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 5d95a13dbe2a..735f7dadb9a0 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -194,6 +194,9 @@ static const struct usb_device_id huawei_cdc_ncm_devs[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, 0xff, 0x02, 0x76), .driver_info = (unsigned long)&huawei_cdc_ncm_info, }, + { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, 0xff, 0x03, 0x16), + .driver_info = (unsigned long)&huawei_cdc_ncm_info, + }, /* Terminating entry */ { -- cgit v1.2.3-59-g8ed1b From 7801db8aec957fa6610efe0ee26a6c8bc0f1d73b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Jul 2014 17:34:53 -0700 Subject: net_sched: avoid generating same handle for u32 filters When kernel generates a handle for a u32 filter, it tries to start from the max in the bucket. So when we have a filter with the max (fff) handle, it will cause kernel always generates the same handle for new filters. This can be shown by the following command: tc qdisc add dev eth0 ingress tc filter add dev eth0 parent ffff: protocol ip pref 770 handle 800::fff u32 match ip protocol 1 0xff tc filter add dev eth0 parent ffff: protocol ip pref 770 u32 match ip protocol 1 0xff ... we will get some u32 filters with same handle: # tc filter show dev eth0 parent ffff: filter protocol ip pref 770 u32 filter protocol ip pref 770 u32 fh 800: ht divisor 1 filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0 match 00010000/00ff0000 at 8 filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0 match 00010000/00ff0000 at 8 filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0 match 00010000/00ff0000 at 8 filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0 match 00010000/00ff0000 at 8 handles should be unique. This patch fixes it by looking up a bitmap, so that can guarantee the handle is as unique as possible. For compatibility, we still start from 0x800. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c39b583ace32..70c0be8d0121 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -460,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) return 0; } +#define NR_U32_NODE (1<<12) static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) { struct tc_u_knode *n; - unsigned int i = 0x7FF; + unsigned long i; + unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), + GFP_KERNEL); + if (!bitmap) + return handle | 0xFFF; for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next) - if (i < TC_U32_NODE(n->handle)) - i = TC_U32_NODE(n->handle); - i++; + set_bit(TC_U32_NODE(n->handle), bitmap); - return handle | (i > 0xFFF ? 0xFFF : i); + i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); + if (i >= NR_U32_NODE) + i = find_next_zero_bit(bitmap, NR_U32_NODE, 1); + + kfree(bitmap); + return handle | (i >= NR_U32_NODE ? 0xFFF : i); } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { -- cgit v1.2.3-59-g8ed1b From 1a998d3e6bc1e44f4c0bc7509bdedef8ed3845ec Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Fri, 18 Jul 2014 19:08:02 +0100 Subject: xen-netback: Fix handling frag_list on grant op error path The error handling for skb's with frag_list was completely wrong, it caused double unmap attempts to happen if the error was on the first skb. Move it to the right place in the loop. Signed-off-by: Zoltan Kiss Reported-by: Armin Zentai Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1844a47636b6..a773f2016bad 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1030,10 +1030,16 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, { struct gnttab_map_grant_ref *gop_map = *gopp_map; u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + /* This always points to the shinfo of the skb being checked, which + * could be either the first or the one on the frag_list + */ struct skb_shared_info *shinfo = skb_shinfo(skb); + /* If this is non-NULL, we are currently checking the frag_list skb, and + * this points to the shinfo of the first one + */ + struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; int i, err; - struct sk_buff *first_skb = NULL; /* Check status of header. */ err = (*gopp_copy)->status; @@ -1086,31 +1092,28 @@ check_frags: xenvif_idx_unmap(queue, pending_idx); } + /* And if we found the error while checking the frag_list, unmap + * the first skb's frags + */ + if (first_shinfo) { + for (j = 0; j < first_shinfo->nr_frags; j++) { + pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]); + xenvif_idx_unmap(queue, pending_idx); + } + } + /* Remember the error: invalidate all subsequent fragments. */ err = newerr; } - if (skb_has_frag_list(skb)) { - first_skb = skb; - skb = shinfo->frag_list; - shinfo = skb_shinfo(skb); + if (skb_has_frag_list(skb) && !first_shinfo) { + first_shinfo = skb_shinfo(skb); + shinfo = skb_shinfo(skb_shinfo(skb)->frag_list); nr_frags = shinfo->nr_frags; goto check_frags; } - /* There was a mapping error in the frag_list skb. We have to unmap - * the first skb's frags - */ - if (first_skb && err) { - int j; - shinfo = skb_shinfo(first_skb); - for (j = 0; j < shinfo->nr_frags; j++) { - pending_idx = frag_get_pending_idx(&shinfo->frags[j]); - xenvif_idx_unmap(queue, pending_idx); - } - } - *gopp_map = gop_map; return err; } -- cgit v1.2.3-59-g8ed1b From b42cc6e421e7bf74e545483aa34b99d2a2ca6d3a Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Fri, 18 Jul 2014 19:08:03 +0100 Subject: xen-netback: Fix releasing frag_list skbs in error path When the grant operations failed, the skb is freed up eventually, and it tries to release the frags, if there is any. For the main skb nr_frags is set to 0 to avoid this, but on the frag_list it iterates through the frags array, and tries to call put_page on the page pointer which contains garbage at that time. Signed-off-by: Zoltan Kiss Reported-by: Armin Zentai Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index a773f2016bad..8cbf60d4689e 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1521,7 +1521,16 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) /* Check the remap error code. */ if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) { + /* If there was an error, xenvif_tx_check_gop is + * expected to release all the frags which were mapped, + * so kfree_skb shouldn't do it again + */ skb_shinfo(skb)->nr_frags = 0; + if (skb_has_frag_list(skb)) { + struct sk_buff *nskb = + skb_shinfo(skb)->frag_list; + skb_shinfo(nskb)->nr_frags = 0; + } kfree_skb(skb); continue; } -- cgit v1.2.3-59-g8ed1b From 1b860da0404a76af8533099ffe0a965490939369 Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Fri, 18 Jul 2014 19:08:04 +0100 Subject: xen-netback: Fix releasing header slot on error path This patch makes this function aware that the first frag and the header might share the same ring slot. That could happen if the first slot is bigger than PKT_PROT_LEN. Due to this the error path might release that slot twice or never, depending on the error scenario. xenvif_idx_release is also removed from xenvif_idx_unmap, and called separately. Signed-off-by: Zoltan Kiss Reported-by: Armin Zentai Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 8cbf60d4689e..6fff911dc134 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1039,6 +1039,8 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, */ struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; + const bool sharedslot = nr_frags && + frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; int i, err; /* Check status of header. */ @@ -1051,7 +1053,10 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, (*gopp_copy)->status, pending_idx, (*gopp_copy)->source.u.ref); - xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); + /* The first frag might still have this slot mapped */ + if (!sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); } check_frags: @@ -1068,8 +1073,19 @@ check_frags: pending_idx, gop_map->handle); /* Had a previous error? Invalidate this fragment. */ - if (unlikely(err)) + if (unlikely(err)) { xenvif_idx_unmap(queue, pending_idx); + /* If the mapping of the first frag was OK, but + * the header's copy failed, and they are + * sharing a slot, send an error + */ + if (i == 0 && sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + else + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } continue; } @@ -1081,15 +1097,27 @@ check_frags: gop_map->status, pending_idx, gop_map->ref); + xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); /* Not the first error? Preceding frags already invalidated. */ if (err) continue; - /* First error: invalidate preceding fragments. */ + + /* First error: if the header haven't shared a slot with the + * first frag, release it as well. + */ + if (!sharedslot) + xenvif_idx_release(queue, + XENVIF_TX_CB(skb)->pending_idx, + XEN_NETIF_RSP_OKAY); + + /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); xenvif_idx_unmap(queue, pending_idx); + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); } /* And if we found the error while checking the frag_list, unmap @@ -1099,6 +1127,8 @@ check_frags: for (j = 0; j < first_shinfo->nr_frags; j++) { pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]); xenvif_idx_unmap(queue, pending_idx); + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); } } @@ -1834,8 +1864,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) tx_unmap_op.status); BUG(); } - - xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_OKAY); } static inline int rx_work_todo(struct xenvif_queue *queue) -- cgit v1.2.3-59-g8ed1b From d8cfbfc4660054150ca1b7c501a8edc0771022f9 Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Fri, 18 Jul 2014 19:08:05 +0100 Subject: xen-netback: Fix pointer incrementation to avoid incorrect logging Due to this pointer is increased prematurely, the error log contains rubbish. Signed-off-by: Zoltan Kiss Reported-by: Armin Zentai Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 6fff911dc134..c65b636bcab9 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1045,7 +1045,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, /* Check status of header. */ err = (*gopp_copy)->status; - (*gopp_copy)++; if (unlikely(err)) { if (net_ratelimit()) netdev_dbg(queue->vif->dev, @@ -1058,6 +1057,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); } + (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { -- cgit v1.2.3-59-g8ed1b From 640d7efe4c08f06c4ae5d31b79bd8740e7f6790a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 21 Jul 2014 00:06:48 +0100 Subject: dns_resolver: Null-terminate the right string *_result[len] is parsed as *(_result[len]) which is not at all what we want to touch here. Signed-off-by: Ben Hutchings Fixes: 84a7c0b1db1c ("dns_resolver: assure that dns_query() result is null-terminated") Signed-off-by: David S. Miller --- net/dns_resolver/dns_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 9acec61f5433..dd8696a3dbec 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -150,7 +150,7 @@ int dns_query(const char *type, const char *name, size_t namelen, goto put; memcpy(*_result, upayload->data, len); - *_result[len] = '\0'; + (*_result)[len] = '\0'; if (_expiry) *_expiry = rkey->expiry; -- cgit v1.2.3-59-g8ed1b From d46b6bfa7628030a93e05f7087b7c638a85b4a35 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 23 Jun 2014 15:55:36 +0200 Subject: batman-adv: drop QinQ claim frames in bridge loop avoidance Since bridge loop avoidance only supports untagged or simple 802.1q tagged VLAN claim frames, claim frames with stacked VLAN headers (QinQ) should be detected and dropped. Transporting the over the mesh may cause problems on the receivers, or create bogus entries in the local tt tables. Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 44 ++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6f0d9ec37950..a957c8140721 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -800,11 +800,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, bla_dst = (struct batadv_bla_claim_dst *)hw_dst; bla_dst_own = &bat_priv->bla.claim_dest; - /* check if it is a claim packet in general */ - if (memcmp(bla_dst->magic, bla_dst_own->magic, - sizeof(bla_dst->magic)) != 0) - return 0; - /* if announcement packet, use the source, * otherwise assume it is in the hw_src */ @@ -866,12 +861,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct sk_buff *skb) { - struct batadv_bla_claim_dst *bla_dst; + struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; uint8_t *hw_src, *hw_dst; - struct vlan_ethhdr *vhdr; + struct vlan_hdr *vhdr, vhdr_buf; struct ethhdr *ethhdr; struct arphdr *arphdr; unsigned short vid; + int vlan_depth = 0; __be16 proto; int headlen; int ret; @@ -882,9 +878,24 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, proto = ethhdr->h_proto; headlen = ETH_HLEN; if (vid & BATADV_VLAN_HAS_TAG) { - vhdr = vlan_eth_hdr(skb); - proto = vhdr->h_vlan_encapsulated_proto; - headlen += VLAN_HLEN; + /* Traverse the VLAN/Ethertypes. + * + * At this point it is known that the first protocol is a VLAN + * header, so start checking at the encapsulated protocol. + * + * The depth of the VLAN headers is recorded to drop BLA claim + * frames encapsulated into multiple VLAN headers (QinQ). + */ + do { + vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN, + &vhdr_buf); + if (!vhdr) + return 0; + + proto = vhdr->h_vlan_encapsulated_proto; + headlen += VLAN_HLEN; + vlan_depth++; + } while (proto == htons(ETH_P_8021Q)); } if (proto != htons(ETH_P_ARP)) @@ -914,6 +925,19 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, hw_src = (uint8_t *)arphdr + sizeof(struct arphdr); hw_dst = hw_src + ETH_ALEN + 4; bla_dst = (struct batadv_bla_claim_dst *)hw_dst; + bla_dst_own = &bat_priv->bla.claim_dest; + + /* check if it is a claim frame in general */ + if (memcmp(bla_dst->magic, bla_dst_own->magic, + sizeof(bla_dst->magic)) != 0) + return 0; + + /* check if there is a claim frame encapsulated deeper in (QinQ) and + * drop that, as this is not supported by BLA but should also not be + * sent via the mesh. + */ + if (vlan_depth > 1) + return 1; /* check if it is a claim frame. */ ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, -- cgit v1.2.3-59-g8ed1b From 35df3b298fc8779f7edf4b0228c683f7e98edcd5 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 8 May 2014 17:13:15 +0200 Subject: batman-adv: fix TT VLAN inconsistency on VLAN re-add When a VLAN interface (on top of batX) is removed and re-added within a short timeframe TT does not have enough time to properly cleanup. This creates an internal TT state mismatch as the newly created softif_vlan will be initialized from scratch with a TT client count of zero (even if TT entries for this VLAN still exist). The resulting TT messages are bogus due to the counter / tt client listing mismatch, thus creating inconsistencies on every node in the network To fix this issue destroy_vlan() has to not free the VLAN object immediately but it has to be kept alive until all the TT entries for this VLAN have been removed. destroy_vlan() still removes the sysfs folder so that the user has the feeling that everything went fine. If the same VLAN is re-added before the old object is free'd, then the latter is resurrected and re-used. Implement such behaviour by increasing the reference counter of a softif_vlan object every time a new local TT entry for such VLAN is created and remove the object from the list only when all the TT entries have been destroyed. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/soft-interface.c | 60 +++++++++++++++++++++++++++++--------- net/batman-adv/translation-table.c | 26 +++++++++++++++++ net/batman-adv/types.h | 2 ++ 3 files changed, 74 insertions(+), 14 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e7ee65dc20bf..cbd677f48c00 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -448,10 +448,15 @@ out: * possibly free it * @softif_vlan: the vlan object to release */ -void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan) +void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) { - if (atomic_dec_and_test(&softif_vlan->refcount)) - kfree_rcu(softif_vlan, rcu); + if (atomic_dec_and_test(&vlan->refcount)) { + spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); + hlist_del_rcu(&vlan->list); + spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); + + kfree_rcu(vlan, rcu); + } } /** @@ -505,6 +510,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) if (!vlan) return -ENOMEM; + vlan->bat_priv = bat_priv; vlan->vid = vid; atomic_set(&vlan->refcount, 1); @@ -516,6 +522,10 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) return err; } + spin_lock_bh(&bat_priv->softif_vlan_list_lock); + hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ @@ -523,10 +533,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) bat_priv->soft_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - return 0; } @@ -538,18 +544,13 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, struct batadv_softif_vlan *vlan) { - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_del_rcu(&vlan->list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - - batadv_sysfs_del_vlan(bat_priv, vlan); - /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); + batadv_sysfs_del_vlan(bat_priv, vlan); batadv_softif_vlan_free_ref(vlan); } @@ -567,6 +568,8 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); + struct batadv_softif_vlan *vlan; + int ret; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types @@ -576,7 +579,36 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, vid |= BATADV_VLAN_HAS_TAG; - return batadv_softif_create_vlan(bat_priv, vid); + /* if a new vlan is getting created and it already exists, it means that + * it was not deleted yet. batadv_softif_vlan_get() increases the + * refcount in order to revive the object. + * + * if it does not exist then create it. + */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) + return batadv_softif_create_vlan(bat_priv, vid); + + /* recreate the sysfs object if it was already destroyed (and it should + * be since we received a kill_vid() for this vlan + */ + if (!vlan->kobj) { + ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); + if (ret) { + batadv_softif_vlan_free_ref(vlan); + return ret; + } + } + + /* add a new TT local entry. This one will be marked with the NOPURGE + * flag. This must be added again, even if the vlan object already + * exists, because the entry was deleted by kill_vid() + */ + batadv_tt_local_add(bat_priv->soft_iface, + bat_priv->soft_iface->dev_addr, vid, + BATADV_NULL_IFINDEX, BATADV_NO_MARK); + + return 0; } /** diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d636bde72c9a..5f59e7f899a0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -511,6 +511,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global = NULL; + struct batadv_softif_vlan *vlan; struct net_device *in_dev = NULL; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; @@ -572,6 +573,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (!tt_local) goto out; + /* increase the refcounter of the related vlan */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", addr, BATADV_PRINT_VID(vid), @@ -604,6 +608,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_free_ref(tt_local); + batadv_softif_vlan_free_ref(vlan); goto out; } @@ -1009,6 +1014,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, { struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; + struct batadv_softif_vlan *vlan; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1039,6 +1045,11 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); + /* decrease the reference held for this vlan */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + out: if (tt_local_entry) batadv_tt_local_entry_free_ref(tt_local_entry); @@ -1111,6 +1122,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; + struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -1131,6 +1143,13 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); + + /* decrease the reference held for this vlan */ + vlan = batadv_softif_vlan_get(bat_priv, + tt_common_entry->vid); + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + batadv_tt_local_entry_free_ref(tt_local); } spin_unlock_bh(list_lock); @@ -3139,6 +3158,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; + struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -3167,6 +3187,12 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) tt_local = container_of(tt_common, struct batadv_tt_local_entry, common); + + /* decrease the reference held for this vlan */ + vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + batadv_tt_local_entry_free_ref(tt_local); } spin_unlock_bh(list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 34891a56773f..8854c05622a9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -687,6 +687,7 @@ struct batadv_priv_nc { /** * struct batadv_softif_vlan - per VLAN attributes set + * @bat_priv: pointer to the mesh object * @vid: VLAN identifier * @kobj: kobject for sysfs vlan subdirectory * @ap_isolation: AP isolation state @@ -696,6 +697,7 @@ struct batadv_priv_nc { * @rcu: struct used for freeing in a RCU-safe manner */ struct batadv_softif_vlan { + struct batadv_priv *bat_priv; unsigned short vid; struct kobject *kobj; atomic_t ap_isolation; /* boolean */ -- cgit v1.2.3-59-g8ed1b From 10ec9472f05b45c94db3c854d22581a20b97db41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Jul 2014 07:17:42 +0200 Subject: ipv4: fix buffer overflow in ip_options_compile() There is a benign buffer overflow in ip_options_compile spotted by AddressSanitizer[1] : Its benign because we always can access one extra byte in skb->head (because header is followed by struct skb_shared_info), and in this case this byte is not even used. [28504.910798] ================================================================== [28504.912046] AddressSanitizer: heap-buffer-overflow in ip_options_compile [28504.913170] Read of size 1 by thread T15843: [28504.914026] [] ip_options_compile+0x121/0x9c0 [28504.915394] [] ip_options_get_from_user+0xad/0x120 [28504.916843] [] do_ip_setsockopt.isra.15+0x8df/0x1630 [28504.918175] [] ip_setsockopt+0x30/0xa0 [28504.919490] [] tcp_setsockopt+0x5b/0x90 [28504.920835] [] sock_common_setsockopt+0x5f/0x70 [28504.922208] [] SyS_setsockopt+0xa2/0x140 [28504.923459] [] system_call_fastpath+0x16/0x1b [28504.924722] [28504.925106] Allocated by thread T15843: [28504.925815] [] ip_options_get_from_user+0x35/0x120 [28504.926884] [] do_ip_setsockopt.isra.15+0x8df/0x1630 [28504.927975] [] ip_setsockopt+0x30/0xa0 [28504.929175] [] tcp_setsockopt+0x5b/0x90 [28504.930400] [] sock_common_setsockopt+0x5f/0x70 [28504.931677] [] SyS_setsockopt+0xa2/0x140 [28504.932851] [] system_call_fastpath+0x16/0x1b [28504.934018] [28504.934377] The buggy address ffff880026382828 is located 0 bytes to the right [28504.934377] of 40-byte region [ffff880026382800, ffff880026382828) [28504.937144] [28504.937474] Memory state around the buggy address: [28504.938430] ffff880026382300: ........ rrrrrrrr rrrrrrrr rrrrrrrr [28504.939884] ffff880026382400: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28504.941294] ffff880026382500: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr [28504.942504] ffff880026382600: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28504.943483] ffff880026382700: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28504.944511] >ffff880026382800: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr [28504.945573] ^ [28504.946277] ffff880026382900: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28505.094949] ffff880026382a00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28505.096114] ffff880026382b00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28505.097116] ffff880026382c00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28505.098472] ffff880026382d00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr [28505.099804] Legend: [28505.100269] f - 8 freed bytes [28505.100884] r - 8 redzone bytes [28505.101649] . - 8 allocated bytes [28505.102406] x=1..7 - x allocated bytes + (8-x) redzone bytes [28505.103637] ================================================================== [1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 5e7aecea05cd..ad382499bace 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -288,6 +288,10 @@ int ip_options_compile(struct net *net, optptr++; continue; } + if (unlikely(l < 2)) { + pp_ptr = optptr; + goto error; + } optlen = optptr[1]; if (optlen < 2 || optlen > l) { pp_ptr = optptr; -- cgit v1.2.3-59-g8ed1b