From bf37d2b3fdc9e451f9e376a3922ac8df5aa24128 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 4 Aug 2013 12:55:48 +0300 Subject: busy_poll: cleanup do-nothing placeholders When renaming ll_poll to busy poll, I introduced a typo in the name of the do-nothing placeholder for sk_busy_loop and called it sk_busy_poll. This broke compile when busy poll was not configured. Cong Wang submitted a patch to fixed that. This patch removes the now redundant, misspelled placeholder. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/net/busy_poll.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index f18b91966d3d..8e2dfc106aed 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -162,11 +162,6 @@ static inline bool sk_can_busy_loop(struct sock *sk) return false; } -static inline bool sk_busy_poll(struct sock *sk, int nonblock) -{ - return false; -} - static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) { -- cgit v1.2.3-59-g8ed1b From 288a9376371d425edeeea41a0310922c5fb2092d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 7 Aug 2013 11:33:25 +0300 Subject: net: rename busy poll MIB counter Rename mib counter from "low latency" to "busy poll" v1 also moved the counter to the ip MIB (suggested by Shawn Bohrer) Eric Dumazet suggested that the current location is better. So v2 just renames the counter to fit the new naming convention. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- include/uapi/linux/snmp.h | 2 +- net/ipv4/proc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8e2dfc106aed..8a358a2c97e6 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -122,7 +122,7 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) if (rc > 0) /* local bh are disabled so it is ok to use _BH */ NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_LOWLATENCYRXPACKETS, rc); + LINUX_MIB_BUSYPOLLRXPACKETS, rc); } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index af0a674cc677..a1356d3b54df 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -253,7 +253,7 @@ enum LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ - LINUX_MIB_LOWLATENCYRXPACKETS, /* LowLatencyRxPackets */ + LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6577a1149a47..463bd1273346 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,7 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), - SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), + SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_SENTINEL }; -- cgit v1.2.3-59-g8ed1b From 4221f40513233fa8edeef7fc82e44163fde03b9b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 13 Aug 2013 01:41:06 -0700 Subject: ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id. Using inner-id for tunnel id is not safe in some rare cases. E.g. packets coming from multiple sources entering same tunnel can have same id. Therefore on tunnel packet receive we could have packets from two different stream but with same source and dst IP with same ip-id which could confuse ip packet reassembly. Following patch reverts optimization from commit 490ab08127 (IP_GRE: Fix IP-Identification.) CC: Jarno Rajahalme CC: Ansis Atteka Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 14 -------------- net/ipv4/ip_tunnel_core.c | 4 +--- 2 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 781b3cf86a2f..a354db5b7662 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -145,20 +145,6 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); -} - int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); int iptunnel_xmit(struct net *net, struct rtable *rt, struct sk_buff *skb, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 7167b08977df..850525b34899 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -76,9 +76,7 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - tunnel_ip_select_ident(skb, - (const struct iphdr *)skb_inner_network_header(skb), - &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) -- cgit v1.2.3-59-g8ed1b From 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 14 Aug 2013 23:47:11 +0200 Subject: net_sched: restore "linklayer atm" handling commit 56b765b79 ("htb: improved accuracy at high rates") broke the "linklayer atm" handling. tc class add ... htb rate X ceil Y linklayer atm The linklayer setting is implemented by modifying the rate table which is send to the kernel. No direct parameter were transferred to the kernel indicating the linklayer setting. The commit 56b765b79 ("htb: improved accuracy at high rates") removed the use of the rate table system. To keep compatible with older iproute2 utils, this patch detects the linklayer by parsing the rate table. It also supports future versions of iproute2 to send this linklayer parameter to the kernel directly. This is done by using the __reserved field in struct tc_ratespec, to convey the choosen linklayer option, but only using the lower 4 bits of this field. Linklayer detection is limited to speeds below 100Mbit/s, because at high rates the rtab is gets too inaccurate, so bad that several fields contain the same values, this resembling the ATM detect. Fields even start to contain "0" time to send, e.g. at 1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have been more broken than we first realized. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/sch_generic.h | 9 ++++++++- include/uapi/linux/pkt_sched.h | 10 +++++++++- net/sched/sch_api.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/sched/sch_generic.c | 1 + net/sched/sch_htb.c | 13 +++++++++++++ 5 files changed, 72 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6eab63363e59..e5ae0c50fa9c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -683,13 +683,19 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u8 linklayer; u8 shift; }; static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, unsigned int len) { - return ((u64)(len + r->overhead) * r->mult) >> r->shift; + len += r->overhead; + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; + + return ((u64)len * r->mult) >> r->shift; } extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf); @@ -700,6 +706,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, memset(res, 0, sizeof(*res)); res->rate = r->rate_bytes_ps; res->overhead = r->overhead; + res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } #endif diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index dbd71b0c7d8c..09d62b9228ff 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -73,9 +73,17 @@ struct tc_estimator { #define TC_H_ROOT (0xFFFFFFFFU) #define TC_H_INGRESS (0xFFFFFFF1U) +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + struct tc_ratespec { unsigned char cell_log; - unsigned char __reserved; + __u8 linklayer; /* lower 4 bits */ unsigned short overhead; short cell_align; unsigned short mpu; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 281c1bded1f6..51b968d3febb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) return q; } +/* The linklayer setting were not transferred from iproute2, in older + * versions, and the rate tables lookup systems have been dropped in + * the kernel. To keep backward compatible with older iproute2 tc + * utils, we detect the linklayer setting by detecting if the rate + * table were modified. + * + * For linklayer ATM table entries, the rate table will be aligned to + * 48 bytes, thus some table entries will contain the same value. The + * mpu (min packet unit) is also encoded into the old rate table, thus + * starting from the mpu, we find low and high table entries for + * mapping this cell. If these entries contain the same value, when + * the rate tables have been modified for linklayer ATM. + * + * This is done by rounding mpu to the nearest 48 bytes cell/entry, + * and then roundup to the next cell, calc the table entry one below, + * and compare. + */ +static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) +{ + int low = roundup(r->mpu, 48); + int high = roundup(low+1, 48); + int cell_low = low >> r->cell_log; + int cell_high = (high >> r->cell_log) - 1; + + /* rtab is too inaccurate at rates > 100Mbit/s */ + if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { + pr_debug("TC linklayer: Giving up ATM detection\n"); + return TC_LINKLAYER_ETHERNET; + } + + if ((cell_high > cell_low) && (cell_high < 256) + && (rtab[cell_low] == rtab[cell_high])) { + pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", + cell_low, cell_high, rtab[cell_high]); + return TC_LINKLAYER_ATM; + } + return TC_LINKLAYER_ETHERNET; +} + static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) @@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); + if (r->linklayer == TC_LINKLAYER_UNAWARE) + r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index eeb8276d7a89..48be3d5c0d92 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -909,6 +909,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; r->rate_bytes_ps = conf->rate; + r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* * The deal here is to replace a divide by a reciprocal one diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 45e751527dfc..c2178b15ca6e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1329,6 +1329,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; + struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; @@ -1350,6 +1351,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + /* Keeping backward compatible with rate_table based iproute2 tc */ + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); + if (rtab) + qdisc_put_rtab(rtab); + } + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); + if (ctab) + qdisc_put_rtab(ctab); + } + if (!cl) { /* new class */ struct Qdisc *new_q; int prio; -- cgit v1.2.3-59-g8ed1b