From 306f7aa1807be7588f115d7cafa475f65e72e3d1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Feb 2015 14:39:15 +0100 Subject: ieee802154: correct ieee802154_is_valid_psdu_len This patch corrects the ieee802154_is_valid_psdu_len function that this function also checks on reserved values 6-8 for validation the psdu length. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 6e82d888287c..40b0ab953937 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -28,7 +28,8 @@ #include #define IEEE802154_MTU 127 -#define IEEE802154_MIN_PSDU_LEN 5 +#define IEEE802154_ACK_PSDU_LEN 5 +#define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff @@ -204,11 +205,18 @@ enum { /** * ieee802154_is_valid_psdu_len - check if psdu len is valid + * available lengths: + * 0-4 Reserved + * 5 MPDU (Acknowledgment) + * 6-8 Reserved + * 9-127 MPDU + * * @len: psdu len with (MHR + payload + MFR) */ static inline bool ieee802154_is_valid_psdu_len(const u8 len) { - return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU); + return (len == IEEE802154_ACK_PSDU_LEN || + (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** -- cgit v1.3-6-gb490 From 959d10f6bbf6ab5b8813c4e37540a2e43ca2ae96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Feb 2015 03:19:24 -0800 Subject: igmp: add __ip_mc_{join|leave}_group() There is a need to perform igmp join/leave operations while RTNL is held. Make ip_mc_{join|leave}_group() wrappers around __ip_mc_{join|leave}_group() to avoid the proliferation of work queues. For example, vxlan_igmp_join() could possibly be removed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ net/ipv4/igmp.c | 52 ++++++++++++++++++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..b5a6470e686c 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,7 +111,9 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); +extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); +extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 666cf364df86..4b1172d73e03 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1849,30 +1849,25 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; } - -/* - * Join a multicast group - */ -int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) +int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) { - int err; __be32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml = NULL, *i; + struct ip_mc_socklist *iml, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); int ifindex; int count = 0; + int err; + + ASSERT_RTNL(); if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); - in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { - iml = NULL; err = -ENODEV; goto done; } @@ -1900,9 +1895,22 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) ip_mc_inc_group(in_dev, addr); err = 0; done: - rtnl_unlock(); return err; } +EXPORT_SYMBOL(__ip_mc_join_group); + +/* Join a multicast group + */ +int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) +{ + int ret; + + rtnl_lock(); + ret = __ip_mc_join_group(sk, imr); + rtnl_unlock(); + + return ret; +} EXPORT_SYMBOL(ip_mc_join_group); static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, @@ -1925,11 +1933,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } -/* - * Ask a socket to leave a group. - */ - -int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) +int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; @@ -1940,7 +1944,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) u32 ifindex; int ret = -EADDRNOTAVAIL; - rtnl_lock(); + ASSERT_RTNL(); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { ret = -ENODEV; @@ -1964,14 +1969,25 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; ip_mc_dec_group(in_dev, group); - rtnl_unlock(); + /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); kfree_rcu(iml, rcu); return 0; } out: + return ret; +} +EXPORT_SYMBOL(__ip_mc_leave_group); + +int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) +{ + int ret; + + rtnl_lock(); + ret = __ip_mc_leave_group(sk, imr); rtnl_unlock(); + return ret; } EXPORT_SYMBOL(ip_mc_leave_group); -- cgit v1.3-6-gb490 From 059a2440fd3cf4ec57735db2c0a90401cde84fca Mon Sep 17 00:00:00 2001 From: Bojan Prtvar Date: Sun, 22 Feb 2015 11:46:35 +0100 Subject: net: Remove state argument from skb_find_text() Although it is clear that textsearch state is intentionally passed to skb_find_text() as uninitialized argument, it was never used by the callers. Therefore, we can simplify skb_find_text() by making it local variable. Signed-off-by: Bojan Prtvar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- net/core/skbuff.c | 9 ++++----- net/netfilter/nf_conntrack_amanda.c | 10 +++------- net/netfilter/xt_string.c | 3 +-- net/sched/em_text.c | 3 +-- 5 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30007afe70b3..d898b32dedcc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -870,8 +870,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, void skb_abort_seq_read(struct skb_seq_state *st); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state); + unsigned int to, struct ts_config *config); /* * Packet hash types specify the type of hash in skb_set_hash. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 88c613eab142..374e43bc6b80 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2865,7 +2865,6 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) * @from: search offset * @to: search limit * @config: textsearch configuration - * @state: uninitialized textsearch state variable * * Finds a pattern in the skb data according to the specified * textsearch configuration. Use textsearch_next() to retrieve @@ -2873,17 +2872,17 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) * to the first occurrence or UINT_MAX if no match was found. */ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state) + unsigned int to, struct ts_config *config) { + struct ts_state state; unsigned int ret; config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; - skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); + skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); - ret = textsearch_find(config, state); + ret = textsearch_find(config, &state); return (ret <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index b8b95f4027ca..57a26cc90c9f 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -88,7 +88,6 @@ static int amanda_help(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct ts_state ts; struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; unsigned int dataoff, start, stop, off, i; @@ -113,23 +112,20 @@ static int amanda_help(struct sk_buff *skb, return NF_ACCEPT; } - memset(&ts, 0, sizeof(ts)); start = skb_find_text(skb, dataoff, skb->len, - search[SEARCH_CONNECT].ts, &ts); + search[SEARCH_CONNECT].ts); if (start == UINT_MAX) goto out; start += dataoff + search[SEARCH_CONNECT].len; - memset(&ts, 0, sizeof(ts)); stop = skb_find_text(skb, start, skb->len, - search[SEARCH_NEWLINE].ts, &ts); + search[SEARCH_NEWLINE].ts); if (stop == UINT_MAX) goto out; stop += start; for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { - memset(&ts, 0, sizeof(ts)); - off = skb_find_text(skb, start, stop, search[i].ts, &ts); + off = skb_find_text(skb, start, stop, search[i].ts); if (off == UINT_MAX) continue; off += start + search[i].len; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 5699adb97652..0bc3460319c8 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -26,13 +26,12 @@ static bool string_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_string_info *conf = par->matchinfo; - struct ts_state state; bool invert; invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT; return (skb_find_text((struct sk_buff *)skb, conf->from_offset, - conf->to_offset, conf->config, &state) + conf->to_offset, conf->config) != UINT_MAX) ^ invert; } diff --git a/net/sched/em_text.c b/net/sched/em_text.c index f03c3de16c27..73e2ed576ceb 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -34,7 +34,6 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, { struct text_match *tm = EM_TEXT_PRIV(m); int from, to; - struct ts_state state; from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data; from += tm->from_offset; @@ -42,7 +41,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data; to += tm->to_offset; - return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX; + return skb_find_text(skb, from, to, tm->config) != UINT_MAX; } static int em_text_change(struct net *net, void *data, int len, -- cgit v1.3-6-gb490 From ccdaeb2b176f7db491a6f8e8b1c51f9393525f7d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 27 Feb 2015 09:58:26 +0100 Subject: at86rf230: add support for external xtal trim This patch adds support for setting the xtal trim register. Some at86rf2xx transceiver boards needs fine tuning the xtal capacitor. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- .../bindings/net/ieee802154/at86rf230.txt | 3 ++ drivers/net/ieee802154/at86rf230.c | 54 ++++++++++++++++++++-- include/linux/spi/at86rf230.h | 1 + 3 files changed, 54 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt index d3bbdded4cbe..1ae5100fea14 100644 --- a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt +++ b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt @@ -11,6 +11,8 @@ Required properties: Optional properties: - reset-gpio: GPIO spec for the rstn pin - sleep-gpio: GPIO spec for the slp_tr pin + - xtal-trim: u8 value for fine tuning the internal capacitance + arrays of xtal pins: 0 = +0 pF, 0xf = +4.5 pF Example: @@ -20,4 +22,5 @@ Example: reg = <0>; interrupts = <19 1>; interrupt-parent = <&gpio3>; + xtal-trim = /bits/ 8 <0x06>; }; diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 9888b7ff24bc..c1323e5cdd0c 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1315,7 +1315,7 @@ static struct at86rf2xx_chip_data at86rf212_data = { .get_desense_steps = at86rf212_get_desens_steps }; -static int at86rf230_hw_init(struct at86rf230_local *lp) +static int at86rf230_hw_init(struct at86rf230_local *lp, u8 xtal_trim) { int rc, irq_type, irq_pol = IRQ_ACTIVE_HIGH; unsigned int dvdd; @@ -1362,6 +1362,45 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) usleep_range(lp->data->t_sleep_cycle, lp->data->t_sleep_cycle + 100); + /* xtal_trim value is calculated by: + * CL = 0.5 * (CX + CTRIM + CPAR) + * + * whereas: + * CL = capacitor of used crystal + * CX = connected capacitors at xtal pins + * CPAR = in all at86rf2xx datasheets this is a constant value 3 pF, + * but this is different on each board setup. You need to fine + * tuning this value via CTRIM. + * CTRIM = variable capacitor setting. Resolution is 0.3 pF range is + * 0 pF upto 4.5 pF. + * + * Examples: + * atben transceiver: + * + * CL = 8 pF + * CX = 12 pF + * CPAR = 3 pF (We assume the magic constant from datasheet) + * CTRIM = 0.9 pF + * + * (12+0.9+3)/2 = 7.95 which is nearly at 8 pF + * + * xtal_trim = 0x3 + * + * openlabs transceiver: + * + * CL = 16 pF + * CX = 22 pF + * CPAR = 3 pF (We assume the magic constant from datasheet) + * CTRIM = 4.5 pF + * + * (22+4.5+3)/2 = 14.75 which is the nearest value to 16 pF + * + * xtal_trim = 0xf + */ + rc = at86rf230_write_subreg(lp, SR_XTAL_TRIM, xtal_trim); + if (rc) + return rc; + rc = at86rf230_read_subreg(lp, SR_DVDD_OK, &dvdd); if (rc) return rc; @@ -1378,9 +1417,11 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) } static int -at86rf230_get_pdata(struct spi_device *spi, int *rstn, int *slp_tr) +at86rf230_get_pdata(struct spi_device *spi, int *rstn, int *slp_tr, + u8 *xtal_trim) { struct at86rf230_platform_data *pdata = spi->dev.platform_data; + int ret; if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node) { if (!pdata) @@ -1388,11 +1429,15 @@ at86rf230_get_pdata(struct spi_device *spi, int *rstn, int *slp_tr) *rstn = pdata->rstn; *slp_tr = pdata->slp_tr; + *xtal_trim = pdata->xtal_trim; return 0; } *rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0); *slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0); + ret = of_property_read_u8(spi->dev.of_node, "xtal-trim", xtal_trim); + if (ret < 0 && ret != -EINVAL) + return ret; return 0; } @@ -1505,13 +1550,14 @@ static int at86rf230_probe(struct spi_device *spi) struct at86rf230_local *lp; unsigned int status; int rc, irq_type, rstn, slp_tr; + u8 xtal_trim; if (!spi->irq) { dev_err(&spi->dev, "no IRQ specified\n"); return -EINVAL; } - rc = at86rf230_get_pdata(spi, &rstn, &slp_tr); + rc = at86rf230_get_pdata(spi, &rstn, &slp_tr, &xtal_trim); if (rc < 0) { dev_err(&spi->dev, "failed to parse platform_data: %d\n", rc); return rc; @@ -1570,7 +1616,7 @@ static int at86rf230_probe(struct spi_device *spi) spi_set_drvdata(spi, lp); - rc = at86rf230_hw_init(lp); + rc = at86rf230_hw_init(lp, xtal_trim); if (rc) goto free_dev; diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index cd519a11c2c6..b63fe6f5fdc8 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -22,6 +22,7 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; + u8 xtal_trim; }; #endif -- cgit v1.3-6-gb490 From 6588af614e7b79294fbcd4a666a7422c0c854e80 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 26 Feb 2015 19:34:37 +0000 Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the usbnet core does not update the tx_packets statistic for drivers with FLAG_MULTI_PACKET and there is no hook in the TX completion path where they could do this. cdc_ncm and dependent drivers are bumping tx_packets stat on the transmit path while asix and sr9800 aren't updating it at all. Add a packet count in struct skb_data so these drivers can fill it in, initialise it to 1 for other drivers, and add the packet count to the tx_packets statistic on completion. Signed-off-by: Ben Hutchings Tested-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 2 ++ drivers/net/usb/cdc_ncm.c | 3 ++- drivers/net/usb/sr9800.c | 1 + drivers/net/usb/usbnet.c | 5 +++-- include/linux/usb/usbnet.h | 12 ++++++++++++ 5 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 5c55f11572ba..724a9b50df7a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes)); skb_put(skb, sizeof(padbytes)); } + + usbnet_set_skb_tx_stats(skb, 1); return skb; } diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 80a844e0ae03..70cbea551139 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1172,7 +1172,6 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; - dev->net->stats.tx_packets += ctx->tx_curr_frame_num; /* keep private stats: framing overhead and number of NTBs */ ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; @@ -1184,6 +1183,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) */ dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + usbnet_set_skb_tx_stats(skb_out, n); + return skb_out; exit_no_skb: diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index b94a0fbb8b3b..7650cdc8fe6b 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb, skb_put(skb, sizeof(padbytes)); } + usbnet_set_skb_tx_stats(skb, 1); return skb; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 449835f4331e..0f3ff285f6a1 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1188,8 +1188,7 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - if (!(dev->driver_info->flags & FLAG_MULTI_PACKET)) - dev->net->stats.tx_packets++; + dev->net->stats.tx_packets += entry->packets; dev->net->stats.tx_bytes += entry->length; } else { dev->net->stats.tx_errors++; @@ -1348,6 +1347,8 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, urb->transfer_flags |= URB_ZERO_PACKET; } entry->length = urb->transfer_buffer_length = length; + if (!(info->flags & FLAG_MULTI_PACKET)) + usbnet_set_skb_tx_stats(skb, 1); spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d9a4905e01d0..ff3fb2bd0e90 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -228,8 +228,20 @@ struct skb_data { /* skb->cb is one of these */ struct usbnet *dev; enum skb_state state; size_t length; + unsigned long packets; }; +/* Drivers that set FLAG_MULTI_PACKET must call this in their + * tx_fixup method before returning an skb. + */ +static inline void +usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) +{ + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; +} + extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, -- cgit v1.3-6-gb490 From 5f852eb536ad651b8734559dcf4353514cb0bea3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2015 14:10:18 -0800 Subject: tcp: tso: remove tp->tso_deferred TSO relies on ability to defer sending a small amount of packets. Heuristic is to wait for future ACKS in hope to send more packets at once. Current algorithm uses a per socket tso_deferred field as a pseudo timer. This pseudo timer relies on future ACK, but there is no guarantee we receive them in time. Fix would be to use a real timer, but cost of such timer is probably too expensive for typical cases. This patch changes the logic to test the time of last transmit, because we should not add bursts of more than 1ms for any given flow. We've used this patch for about two years at Google, before FQ/pacing as it would reduce a fair amount of bursts. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - net/ipv4/tcp_output.c | 14 +++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1a7adb411647..97dbf16f7d9d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -236,7 +236,6 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u32 fackets_out; /* FACK'd packets */ - u32 tso_deferred; /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..cb95c7a9d1e7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1763,9 +1763,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if (icsk->icsk_ca_state != TCP_CA_Open) goto send_now; - /* Defer for less than two clock ticks. */ - if (tp->tso_deferred && - (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + /* Avoid bursty behavior by allowing defer + * only if the last write was recent. + */ + if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1807,11 +1808,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, goto send_now; } - /* Ok, it looks like it is advisable to defer. - * Do not rearm the timer if already set to not break TCP ACK clocking. - */ - if (!tp->tso_deferred) - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. */ if (cong_win < send_win && cong_win < skb->len) *is_cwnd_limited = true; @@ -1819,7 +1816,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, return true; send_now: - tp->tso_deferred = 0; return false; } -- cgit v1.3-6-gb490 From a2c83fff582ae133d9f5bb187404ea9ce4da1f96 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:42 +0100 Subject: ebpf: constify various function pointer structs We can move bpf_map_ops and bpf_verifier_ops and other structs into ro section, bpf_map_type_list and bpf_prog_type_list into read mostly. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 14 +++++++------- kernel/bpf/arraymap.c | 6 +++--- kernel/bpf/hashtab.c | 6 +++--- kernel/bpf/helpers.c | 6 +++--- net/core/filter.c | 6 +++--- 5 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bbfceb756452..78446860f796 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -32,13 +32,13 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; - struct bpf_map_ops *ops; + const struct bpf_map_ops *ops; struct work_struct work; }; struct bpf_map_type_list { struct list_head list_node; - struct bpf_map_ops *ops; + const struct bpf_map_ops *ops; enum bpf_map_type type; }; @@ -109,7 +109,7 @@ struct bpf_verifier_ops { struct bpf_prog_type_list { struct list_head list_node; - struct bpf_verifier_ops *ops; + const struct bpf_verifier_ops *ops; enum bpf_prog_type type; }; @@ -121,7 +121,7 @@ struct bpf_prog_aux { atomic_t refcnt; bool is_gpl_compatible; enum bpf_prog_type prog_type; - struct bpf_verifier_ops *ops; + const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; u32 used_map_cnt; struct bpf_prog *prog; @@ -138,8 +138,8 @@ struct bpf_prog *bpf_prog_get(u32 ufd); int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); /* verifier prototypes for helper functions called from eBPF programs */ -extern struct bpf_func_proto bpf_map_lookup_elem_proto; -extern struct bpf_func_proto bpf_map_update_elem_proto; -extern struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_elem_proto; +extern const struct bpf_func_proto bpf_map_update_elem_proto; +extern const struct bpf_func_proto bpf_map_delete_elem_proto; #endif /* _LINUX_BPF_H */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 9eb4d8a7cd87..8a6616583f38 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -134,7 +134,7 @@ static void array_map_free(struct bpf_map *map) kvfree(array); } -static struct bpf_map_ops array_ops = { +static const struct bpf_map_ops array_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, @@ -143,14 +143,14 @@ static struct bpf_map_ops array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list tl = { +static struct bpf_map_type_list array_type __read_mostly = { .ops = &array_ops, .type = BPF_MAP_TYPE_ARRAY, }; static int __init register_array_map(void) { - bpf_register_map_type(&tl); + bpf_register_map_type(&array_type); return 0; } late_initcall(register_array_map); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b3ba43674310..83c209d9b17a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -345,7 +345,7 @@ static void htab_map_free(struct bpf_map *map) kfree(htab); } -static struct bpf_map_ops htab_ops = { +static const struct bpf_map_ops htab_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, @@ -354,14 +354,14 @@ static struct bpf_map_ops htab_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list tl = { +static struct bpf_map_type_list htab_type __read_mostly = { .ops = &htab_ops, .type = BPF_MAP_TYPE_HASH, }; static int __init register_htab_map(void) { - bpf_register_map_type(&tl); + bpf_register_map_type(&htab_type); return 0; } late_initcall(register_htab_map); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9e3414d85459..a3c7701a8b5e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -41,7 +41,7 @@ static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return (unsigned long) value; } -struct bpf_func_proto bpf_map_lookup_elem_proto = { +const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, @@ -60,7 +60,7 @@ static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return map->ops->map_update_elem(map, key, value, r4); } -struct bpf_func_proto bpf_map_update_elem_proto = { +const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, .ret_type = RET_INTEGER, @@ -80,7 +80,7 @@ static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return map->ops->map_delete_elem(map, key); } -struct bpf_func_proto bpf_map_delete_elem_proto = { +const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, .ret_type = RET_INTEGER, diff --git a/net/core/filter.c b/net/core/filter.c index f6bdc2b1ba01..6fe09e36dad9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1159,19 +1159,19 @@ static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type return false; } -static struct bpf_verifier_ops sock_filter_ops = { +static const struct bpf_verifier_ops sock_filter_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, }; -static struct bpf_prog_type_list tl = { +static struct bpf_prog_type_list sock_filter_type __read_mostly = { .ops = &sock_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; static int __init register_sock_filter_ops(void) { - bpf_register_prog_type(&tl); + bpf_register_prog_type(&sock_filter_type); return 0; } late_initcall(register_sock_filter_ops); -- cgit v1.3-6-gb490 From f1a66f85b74c5ef7b503f746ea97742dacd56419 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:43 +0100 Subject: ebpf: export BPF_PSEUDO_MAP_FD to uapi We need to export BPF_PSEUDO_MAP_FD to user space, as it's used in the ELF BPF loader where instructions are being loaded that need map fixups. An initial stage loads all maps into the kernel, and later on replaces related instructions in the eBPF blob with BPF_PSEUDO_MAP_FD as source register and the actual fd as immediate value. The kernel verifier recognizes this keyword and replaces the map fd with a real pointer internally. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 -- include/uapi/linux/bpf.h | 2 ++ samples/bpf/libbpf.h | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index caac2087a4d5..5e3863d5f666 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -145,8 +145,6 @@ struct bpf_prog_aux; .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) -#define BPF_PSEUDO_MAP_FD 1 - /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 45da7ec7d274..0248180bf2e2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -120,6 +120,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_SOCKET_FILTER, }; +#define BPF_PSEUDO_MAP_FD 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 58c5fe1bdba1..a6bb7e9c22c3 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -92,7 +92,9 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) -#define BPF_PSEUDO_MAP_FD 1 +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ -- cgit v1.3-6-gb490 From 0fc174dea54546e2b1146e1197da1b6d4bc48107 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:44 +0100 Subject: ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs Socket filter code and other subsystems with upcoming eBPF support should not need to deal with the fact that we have CONFIG_BPF_SYSCALL defined or not. Having the bpf syscall as a config option is a nice thing and I'd expect it to stay that way for expert users (I presume one day the default setting of it might change, though), but code making use of it should not care if it's actually enabled or not. Instead, hide this via header files and let the rest deal with it. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 78446860f796..9c458144cdb4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -113,8 +113,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -void bpf_register_prog_type(struct bpf_prog_type_list *tl); - struct bpf_prog; struct bpf_prog_aux { @@ -129,11 +127,25 @@ struct bpf_prog_aux { }; #ifdef CONFIG_BPF_SYSCALL +void bpf_register_prog_type(struct bpf_prog_type_list *tl); + void bpf_prog_put(struct bpf_prog *prog); +struct bpf_prog *bpf_prog_get(u32 ufd); #else -static inline void bpf_prog_put(struct bpf_prog *prog) {} +static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) +{ +} + +static inline struct bpf_prog *bpf_prog_get(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void bpf_prog_put(struct bpf_prog *prog) +{ +} #endif -struct bpf_prog *bpf_prog_get(u32 ufd); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); -- cgit v1.3-6-gb490 From 24701ecea76b0b93bd9667486934ec310825f558 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:47 +0100 Subject: ebpf: move read-only fields to bpf_prog and shrink bpf_prog_aux is_gpl_compatible and prog_type should be moved directly into bpf_prog as they stay immutable during bpf_prog's lifetime, are core attributes and they can be locked as read-only later on via bpf_prog_select_runtime(). With a bit of rearranging, this also allows us to shrink bpf_prog_aux to exactly 1 cacheline. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +--- include/linux/filter.h | 4 +++- kernel/bpf/syscall.c | 7 +++---- kernel/bpf/verifier.c | 4 ++-- net/core/filter.c | 4 ++-- 5 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9c458144cdb4..a1a7ff2df328 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -117,11 +117,9 @@ struct bpf_prog; struct bpf_prog_aux { atomic_t refcnt; - bool is_gpl_compatible; - enum bpf_prog_type prog_type; + u32 used_map_cnt; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; - u32 used_map_cnt; struct bpf_prog *prog; struct work_struct work; }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5e3863d5f666..9ee8c67ea249 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -308,9 +308,11 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ bool jited; /* Is our filter JIT'ed? */ + bool gpl_compatible; /* Is our filter GPL compatible? */ u32 len; /* Number of filter blocks */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ + enum bpf_prog_type type; /* Type of BPF program */ struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); /* Instructions for interpreter */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 536edc2be307..0d69449acbd0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -354,10 +354,11 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) list_for_each_entry(tl, &bpf_prog_types, list_node) { if (tl->type == type) { prog->aux->ops = tl->ops; - prog->aux->prog_type = type; + prog->type = type; return 0; } } + return -EINVAL; } @@ -508,7 +509,7 @@ static int bpf_prog_load(union bpf_attr *attr) prog->jited = false; atomic_set(&prog->aux->refcnt, 1); - prog->aux->is_gpl_compatible = is_gpl; + prog->gpl_compatible = is_gpl; /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); @@ -517,7 +518,6 @@ static int bpf_prog_load(union bpf_attr *attr) /* run eBPF verifier */ err = bpf_check(prog, attr); - if (err < 0) goto free_used_maps; @@ -528,7 +528,6 @@ static int bpf_prog_load(union bpf_attr *attr) bpf_prog_select_runtime(prog); err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); - if (err < 0) /* failed to allocate fd */ goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 594d341f04db..bdf4192a889b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -852,7 +852,7 @@ static int check_call(struct verifier_env *env, int func_id) } /* eBPF programs must be GPL compatible to use GPL-ed functions */ - if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) { + if (!env->prog->gpl_compatible && fn->gpl_only) { verbose("cannot call GPL only function from proprietary program\n"); return -EINVAL; } @@ -1205,7 +1205,7 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) struct reg_state *reg; int i, err; - if (!may_access_skb(env->prog->aux->prog_type)) { + if (!may_access_skb(env->prog->type)) { verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n"); return -EINVAL; } diff --git a/net/core/filter.c b/net/core/filter.c index 514d4082f326..ff000cb25e0a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -814,7 +814,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp) static void __bpf_prog_release(struct bpf_prog *prog) { - if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { + if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); } else { bpf_release_orig_filter(prog); @@ -1105,7 +1105,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { + if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); return -EINVAL; } -- cgit v1.3-6-gb490 From 287f3a943fef58c5c73e42545169443be379222f Mon Sep 17 00:00:00 2001 From: Simon Farnsworth Date: Sun, 1 Mar 2015 10:54:39 +0000 Subject: pppoe: Use workqueue to die properly when a PADT is received When a PADT frame is received, the socket may not be in a good state to close down the PPP interface. The current implementation handles this by simply blocking all further PPP traffic, and hoping that the lack of traffic will trigger the user to investigate. Use schedule_work to get to a process context from which we clear down the PPP interface, in a fashion analogous to hangup on a TTY-based PPP interface. This causes pppd to disconnect immediately, and allows tools to take immediate corrective action. Note that pppd's rp_pppoe.so plugin has code in it to disable the session when it disconnects; however, as a consequence of this patch, the session is already disabled before rp_pppoe.so is asked to disable the session. The result is a harmless error message: Failed to disconnect PPPoE socket: 114 Operation already in progress This message is safe to ignore, as long as the error is 114 Operation already in progress; in that specific case, it means that the PPPoE session has already been disabled before pppd tried to disable it. Signed-off-by: Simon Farnsworth Tested-by: Dan Williams Tested-by: Christoph Schulz Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 17 ++++++++++++++++- include/linux/if_pppox.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index d2408a5e43a6..9c97e9bcf5f5 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -455,6 +455,18 @@ out: return NET_RX_DROP; } +static void pppoe_unbind_sock_work(struct work_struct *work) +{ + struct pppox_sock *po = container_of(work, struct pppox_sock, + proto.pppoe.padt_work); + struct sock *sk = sk_pppox(po); + + lock_sock(sk); + pppox_unbind_sock(sk); + release_sock(sk); + sock_put(sk); +} + /************************************************************************ * * Receive a PPPoE Discovery frame. @@ -500,7 +512,8 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, } bh_unlock_sock(sk); - sock_put(sk); + if (!schedule_work(&po->proto.pppoe.padt_work)) + sock_put(sk); } abort: @@ -613,6 +626,8 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); + INIT_WORK(&po->proto.pppoe.padt_work, pppoe_unbind_sock_work); + error = -EINVAL; if (sp->sa_protocol != PX_PROTO_OE) goto end; diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index aff7ad8a4ea3..66a7d7600f43 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -19,6 +19,7 @@ #include #include #include +#include #include static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb) @@ -32,6 +33,7 @@ struct pppoe_opt { struct pppoe_addr pa; /* what this socket is bound to*/ struct sockaddr_pppox relay; /* what socket data will be relayed to (PPPoE relaying) */ + struct work_struct padt_work;/* Work item for handling PADT */ }; struct pptp_opt { -- cgit v1.3-6-gb490 From 744d5a3e9fe2690dd85d9991dbb078301694658b Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 1 Mar 2015 14:58:31 +0200 Subject: net: move skb->dropcount to skb->cb[] Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)") unionized skb->mark and skb->dropcount in order to allow recording of the socket drop count while maintaining struct sk_buff size. skb->dropcount was introduced since there was no available room in skb->cb[] in packet sockets. However, its introduction led to the inability to export skb->mark, or any other aliased field to userspace if so desired. Moving the dropcount metric to skb->cb[] eliminates this problem at the expense of 4 bytes less in skb->cb[] for protocol families using it. Signed-off-by: Eyal Birger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 -- include/net/sock.h | 18 ++++++++++++++++-- net/socket.c | 4 ++-- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d898b32dedcc..bba1330757c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,7 +492,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark - * @dropcount: total number of sk_receive_queue overflows * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) @@ -641,7 +640,6 @@ struct sk_buff { #endif union { __u32 mark; - __u32 dropcount; __u32 reserved_tailroom; }; diff --git a/include/net/sock.h b/include/net/sock.h index 0996fe451e5f..38369d3580a1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2078,13 +2078,27 @@ static inline int sock_intr_errno(long timeo) return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; } +struct sock_skb_cb { + u32 dropcount; +}; + +/* Store sock_skb_cb at the end of skb->cb[] so protocol families + * using skb->cb[] would keep using it directly and utilize its + * alignement guarantee. + */ +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \ + sizeof(struct sock_skb_cb))) + +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \ + SOCK_SKB_CB_OFFSET)) + #define sock_skb_cb_check_size(size) \ - BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb)) + BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET) static inline void sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) { - skb->dropcount = atomic_read(&sk->sk_drops); + SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); } void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, diff --git a/net/socket.c b/net/socket.c index bbedbfcb42c2..b78cf601a021 100644 --- a/net/socket.c +++ b/net/socket.c @@ -731,9 +731,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount) put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, - sizeof(__u32), &skb->dropcount); + sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, -- cgit v1.3-6-gb490 From 4186721d02b71ae943e60bbf50d3488fd5fd6adb Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 8 Feb 2015 17:11:47 +0100 Subject: bcma: add helpers bringing PCIe hosted bus up / down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bringing PCIe hosted bus up requires operating on host-related core. Since we plan to support PCIe Gen 2 devices we should provide a helper picking the correct one (PCIE or PCIE2). Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 2 ++ drivers/bcma/driver_pci.c | 20 ++---------------- drivers/bcma/host_pci.c | 28 ++++++++++++++++++++++++++ drivers/net/wireless/b43/main.c | 4 ++-- drivers/net/wireless/brcm80211/brcmsmac/main.c | 8 ++++---- include/linux/bcma/bcma.h | 3 +++ include/linux/bcma/bcma_driver_pci.h | 2 -- 7 files changed, 41 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index ac6c5fca906d..351f4afdad25 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -101,6 +101,8 @@ static inline void __exit bcma_host_soc_unregister_driver(void) /* driver_pci.c */ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address); +void bcma_core_pci_up(struct bcma_drv_pci *pc); +void bcma_core_pci_down(struct bcma_drv_pci *pc); extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc); diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index 786666488a2d..cf92bfa7eae0 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -328,28 +328,12 @@ static void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend) bcma_pcie_read(pc, BCMA_CORE_PCI_DLLP_PMTHRESHREG); } -void bcma_core_pci_up(struct bcma_bus *bus) +void bcma_core_pci_up(struct bcma_drv_pci *pc) { - struct bcma_drv_pci *pc; - - if (bus->hosttype != BCMA_HOSTTYPE_PCI) - return; - - pc = &bus->drv_pci[0]; - bcma_core_pci_extend_L1timer(pc, true); } -EXPORT_SYMBOL_GPL(bcma_core_pci_up); -void bcma_core_pci_down(struct bcma_bus *bus) +void bcma_core_pci_down(struct bcma_drv_pci *pc) { - struct bcma_drv_pci *pc; - - if (bus->hosttype != BCMA_HOSTTYPE_PCI) - return; - - pc = &bus->drv_pci[0]; - bcma_core_pci_extend_L1timer(pc, false); } -EXPORT_SYMBOL_GPL(bcma_core_pci_down); diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 53c6a8a58859..8dd37dc94cae 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -310,3 +310,31 @@ void __exit bcma_host_pci_exit(void) { pci_unregister_driver(&bcma_pci_bridge_driver); } + +/************************************************** + * Runtime ops for drivers. + **************************************************/ + +/* See also pcicore_up */ +void bcma_host_pci_up(struct bcma_bus *bus) +{ + if (bus->hosttype != BCMA_HOSTTYPE_PCI) + return; + + if (bus->host_is_pcie2) + pr_warn("Bringing up bus with PCIe Gen 2 host is unsupported yet\n"); + else + bcma_core_pci_up(&bus->drv_pci[0]); +} +EXPORT_SYMBOL_GPL(bcma_host_pci_up); + +/* See also pcicore_down */ +void bcma_host_pci_down(struct bcma_bus *bus) +{ + if (bus->hosttype != BCMA_HOSTTYPE_PCI) + return; + + if (!bus->host_is_pcie2) + bcma_core_pci_down(&bus->drv_pci[0]); +} +EXPORT_SYMBOL_GPL(bcma_host_pci_down); diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index ccbdb05b28cd..34065a6f7725 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4819,7 +4819,7 @@ static void b43_wireless_core_exit(struct b43_wldev *dev) switch (dev->dev->bus_type) { #ifdef CONFIG_B43_BCMA case B43_BUS_BCMA: - bcma_core_pci_down(dev->dev->bdev->bus); + bcma_host_pci_down(dev->dev->bdev->bus); break; #endif #ifdef CONFIG_B43_SSB @@ -4868,7 +4868,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev) case B43_BUS_BCMA: bcma_core_pci_irq_ctl(&dev->dev->bdev->bus->drv_pci[0], dev->dev->bdev, true); - bcma_core_pci_up(dev->dev->bdev->bus); + bcma_host_pci_up(dev->dev->bdev->bus); break; #endif #ifdef CONFIG_B43_SSB diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index eb8584a9c49a..bcbfc6e467e4 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -4668,7 +4668,7 @@ static int brcms_b_attach(struct brcms_c_info *wlc, struct bcma_device *core, brcms_c_coredisable(wlc_hw); /* Match driver "down" state */ - bcma_core_pci_down(wlc_hw->d11core->bus); + bcma_host_pci_down(wlc_hw->d11core->bus); /* turn off pll and xtal to match driver "down" state */ brcms_b_xtal(wlc_hw, OFF); @@ -4969,12 +4969,12 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw) */ if (brcms_b_radio_read_hwdisabled(wlc_hw)) { /* put SB PCI in down state again */ - bcma_core_pci_down(wlc_hw->d11core->bus); + bcma_host_pci_down(wlc_hw->d11core->bus); brcms_b_xtal(wlc_hw, OFF); return -ENOMEDIUM; } - bcma_core_pci_up(wlc_hw->d11core->bus); + bcma_host_pci_up(wlc_hw->d11core->bus); /* reset the d11 core */ brcms_b_corereset(wlc_hw, BRCMS_USE_COREFLAGS); @@ -5171,7 +5171,7 @@ static int brcms_b_down_finish(struct brcms_hardware *wlc_hw) /* turn off primary xtal and pll */ if (!wlc_hw->noreset) { - bcma_core_pci_down(wlc_hw->d11core->bus); + bcma_host_pci_down(wlc_hw->d11core->bus); brcms_b_xtal(wlc_hw, OFF); } } diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 994739da827f..037620b3f113 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -434,6 +434,9 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, return bcma_find_core_unit(bus, coreid, 0); } +extern void bcma_host_pci_up(struct bcma_bus *bus); +extern void bcma_host_pci_down(struct bcma_bus *bus); + extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); extern int bcma_core_enable(struct bcma_device *core, u32 flags); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 3f809ae372c4..23af893e9b86 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -242,8 +242,6 @@ extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); -extern void bcma_core_pci_up(struct bcma_bus *bus); -extern void bcma_core_pci_down(struct bcma_bus *bus); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); -- cgit v1.3-6-gb490 From 5b6ff664c8959d715e785b9465b042407a5d87a0 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 8 Feb 2015 17:11:48 +0100 Subject: bcma: change IRQ control function to accept bus as an argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't operate on PCI core, but PCI host device, so there is no point of passing core related struct. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/driver_pci.c | 6 +++--- drivers/net/wireless/b43/main.c | 2 +- drivers/net/wireless/brcm80211/brcmsmac/main.c | 2 +- include/linux/bcma/bcma_driver_pci.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index cf92bfa7eae0..cfd35bc1c5a3 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -282,21 +282,21 @@ void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) } EXPORT_SYMBOL_GPL(bcma_core_pci_power_save); -int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, +int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable) { struct pci_dev *pdev; u32 coremask, tmp; int err = 0; - if (!pc || core->bus->hosttype != BCMA_HOSTTYPE_PCI) { + if (bus->hosttype != BCMA_HOSTTYPE_PCI) { /* This bcma device is not on a PCI host-bus. So the IRQs are * not routed through the PCI core. * So we must not enable routing through the PCI core. */ goto out; } - pdev = pc->core->bus->host_pci; + pdev = bus->host_pci; err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp); if (err) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 34065a6f7725..f34aa67f3179 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4866,7 +4866,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev) switch (dev->dev->bus_type) { #ifdef CONFIG_B43_BCMA case B43_BUS_BCMA: - bcma_core_pci_irq_ctl(&dev->dev->bdev->bus->drv_pci[0], + bcma_core_pci_irq_ctl(dev->dev->bdev->bus, dev->dev->bdev, true); bcma_host_pci_up(dev->dev->bdev->bus); break; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index bcbfc6e467e4..c84af1dfc88f 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -4959,7 +4959,7 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw) * Configure pci/pcmcia here instead of in brcms_c_attach() * to allow mfg hotswap: down, hotswap (chip power cycle), up. */ - bcma_core_pci_irq_ctl(&wlc_hw->d11core->bus->drv_pci[0], wlc_hw->d11core, + bcma_core_pci_irq_ctl(wlc_hw->d11core->bus, wlc_hw->d11core, true); /* diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 23af893e9b86..6b8bca67851f 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -240,7 +240,7 @@ struct bcma_drv_pci { extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); -extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, +extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); -- cgit v1.3-6-gb490 From 804e27dee49e20c0addd1b7276654220cc3768ae Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 8 Feb 2015 17:11:49 +0100 Subject: bcma: support bringing up bus hosted on PCIe Gen 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 3 +++ drivers/bcma/driver_pcie2.c | 28 ++++++++++++++++++++++++++-- drivers/bcma/host_pci.c | 2 +- include/linux/bcma/bcma_driver_pcie2.h | 2 ++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 351f4afdad25..36929126a206 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -104,6 +104,9 @@ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address); void bcma_core_pci_up(struct bcma_drv_pci *pc); void bcma_core_pci_down(struct bcma_drv_pci *pc); +/* driver_pcie2.c */ +void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2); + extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc); #ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE diff --git a/drivers/bcma/driver_pcie2.c b/drivers/bcma/driver_pcie2.c index e4be537b0c66..4568bc7bd54a 100644 --- a/drivers/bcma/driver_pcie2.c +++ b/drivers/bcma/driver_pcie2.c @@ -156,14 +156,23 @@ static void pciedev_reg_pm_clk_period(struct bcma_drv_pcie2 *pcie2) void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2) { - struct bcma_chipinfo *ci = &pcie2->core->bus->chipinfo; + struct bcma_bus *bus = pcie2->core->bus; + struct bcma_chipinfo *ci = &bus->chipinfo; u32 tmp; tmp = pcie2_read32(pcie2, BCMA_CORE_PCIE2_SPROM(54)); if ((tmp & 0xe) >> 1 == 2) bcma_core_pcie2_cfg_write(pcie2, 0x4e0, 0x17); - /* TODO: Do we need pcie_reqsize? */ + switch (bus->chipinfo.id) { + case BCMA_CHIP_ID_BCM4360: + case BCMA_CHIP_ID_BCM4352: + pcie2->reqsize = 1024; + break; + default: + pcie2->reqsize = 128; + break; + } if (ci->id == BCMA_CHIP_ID_BCM4360 && ci->rev > 3) bcma_core_pcie2_war_delay_perst_enab(pcie2, true); @@ -173,3 +182,18 @@ void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2) pciedev_crwlpciegen2_180(pcie2); pciedev_crwlpciegen2_182(pcie2); } + +/************************************************** + * Runtime ops. + **************************************************/ + +void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2) +{ + struct bcma_bus *bus = pcie2->core->bus; + struct pci_dev *dev = bus->host_pci; + int err; + + err = pcie_set_readrq(dev, pcie2->reqsize); + if (err) + bcma_err(bus, "Error setting PCI_EXP_DEVCTL_READRQ: %d\n", err); +} diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 8dd37dc94cae..5fb87a899a24 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -322,7 +322,7 @@ void bcma_host_pci_up(struct bcma_bus *bus) return; if (bus->host_is_pcie2) - pr_warn("Bringing up bus with PCIe Gen 2 host is unsupported yet\n"); + bcma_core_pcie2_up(&bus->drv_pcie2); else bcma_core_pci_up(&bus->drv_pci[0]); } diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h index 5988b05781c3..d8c43294c527 100644 --- a/include/linux/bcma/bcma_driver_pcie2.h +++ b/include/linux/bcma/bcma_driver_pcie2.h @@ -143,6 +143,8 @@ struct bcma_drv_pcie2 { struct bcma_device *core; + + u16 reqsize; }; #define pcie2_read16(pcie2, offset) bcma_read16((pcie2)->core, offset) -- cgit v1.3-6-gb490 From 1b784140474e4fc94281a49e96c67d29df0efbde Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 2 Mar 2015 15:37:48 +0800 Subject: net: Remove iocb argument from sendmsg and recvmsg After TIPC doesn't depend on iocb argument in its internal implementations of sendmsg() and recvmsg() hooks defined in proto structure, no any user is using iocb argument in them at all now. Then we can drop the redundant iocb argument completely from kinds of implementations of both sendmsg() and recvmsg() in the entire networking stack. Cc: Christoph Hellwig Suggested-by: Al Viro Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- crypto/algif_hash.c | 8 ++-- crypto/algif_rng.c | 4 +- crypto/algif_skcipher.c | 8 ++-- drivers/isdn/mISDN/socket.c | 7 ++-- drivers/net/macvtap.c | 9 ++--- drivers/net/ppp/pppoe.c | 8 ++-- drivers/net/tun.c | 6 +-- drivers/vhost/net.c | 6 +-- include/linux/net.h | 10 ++--- include/net/af_vsock.h | 4 +- include/net/bluetooth/bluetooth.h | 8 ++-- include/net/inet_common.h | 7 ++-- include/net/ping.h | 7 ++-- include/net/sock.h | 16 ++++---- include/net/tcp.h | 7 ++-- include/net/udp.h | 3 +- net/appletalk/ddp.c | 7 ++-- net/atm/common.c | 7 ++-- net/atm/common.h | 7 ++-- net/ax25/af_ax25.c | 7 ++-- net/bluetooth/af_bluetooth.c | 8 ++-- net/bluetooth/hci_sock.c | 8 ++-- net/bluetooth/l2cap_sock.c | 12 +++--- net/bluetooth/rfcomm/sock.c | 10 ++--- net/bluetooth/sco.c | 10 ++--- net/caif/caif_socket.c | 17 ++++----- net/can/bcm.c | 7 ++-- net/can/raw.c | 7 ++-- net/core/sock.c | 13 +++---- net/dccp/dccp.h | 8 ++-- net/dccp/probe.c | 3 +- net/dccp/proto.c | 7 ++-- net/decnet/af_decnet.c | 7 ++-- net/ieee802154/socket.c | 21 +++++------ net/ipv4/af_inet.c | 11 +++--- net/ipv4/ping.c | 7 ++-- net/ipv4/raw.c | 7 ++-- net/ipv4/tcp.c | 7 ++-- net/ipv4/udp.c | 9 ++--- net/ipv4/udp_impl.h | 4 +- net/ipv6/ping.c | 3 +- net/ipv6/raw.c | 8 ++-- net/ipv6/udp.c | 10 ++--- net/ipv6/udp_impl.h | 7 ++-- net/ipx/af_ipx.c | 7 ++-- net/irda/af_irda.c | 29 +++++++-------- net/iucv/af_iucv.c | 8 ++-- net/key/af_key.c | 6 +-- net/l2tp/l2tp_ip.c | 4 +- net/l2tp/l2tp_ip6.c | 8 ++-- net/l2tp/l2tp_ppp.c | 7 ++-- net/llc/af_llc.c | 7 ++-- net/netlink/af_netlink.c | 6 +-- net/netrom/af_netrom.c | 7 ++-- net/nfc/llcp_sock.c | 8 ++-- net/nfc/rawsock.c | 7 ++-- net/packet/af_packet.c | 11 +++--- net/phonet/datagram.c | 8 ++-- net/phonet/pep.c | 8 ++-- net/phonet/socket.c | 6 +-- net/rds/rds.h | 7 ++-- net/rds/recv.c | 4 +- net/rds/send.c | 3 +- net/rose/af_rose.c | 7 ++-- net/rxrpc/af_rxrpc.c | 7 ++-- net/rxrpc/ar-internal.h | 10 ++--- net/rxrpc/ar-output.c | 20 ++++------ net/rxrpc/ar-recvmsg.c | 4 +- net/sctp/socket.c | 8 ++-- net/socket.c | 78 +++++++-------------------------------- net/tipc/socket.c | 23 ++++-------- net/unix/af_unix.c | 50 +++++++++++-------------- net/vmw_vsock/af_vsock.c | 20 +++++----- net/vmw_vsock/vmci_transport.c | 3 +- net/x25/af_x25.c | 6 +-- 75 files changed, 302 insertions(+), 442 deletions(-) (limited to 'include/linux') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 01da360bdb55..0a465e0f3012 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -34,8 +34,8 @@ struct hash_ctx { struct ahash_request req; }; -static int hash_sendmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t ignored) +static int hash_sendmsg(struct socket *sock, struct msghdr *msg, + size_t ignored) { int limit = ALG_MAX_PAGES * PAGE_SIZE; struct sock *sk = sock->sk; @@ -139,8 +139,8 @@ unlock: return err ?: size; } -static int hash_recvmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c index 67f612cfed97..3acba0a7cd55 100644 --- a/crypto/algif_rng.c +++ b/crypto/algif_rng.c @@ -55,8 +55,8 @@ struct rng_ctx { struct crypto_rng *drng; }; -static int rng_recvmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 0c8a1e5ccadf..b9743dc35801 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -239,8 +239,8 @@ static void skcipher_data_wakeup(struct sock *sk) rcu_read_unlock(); } -static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t size) +static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, + size_t size) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -424,8 +424,8 @@ unlock: return err ?: size; } -static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t ignored, int flags) +static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 84b35925ee4d..8dc7290089bb 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -112,8 +112,8 @@ mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } static int -mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sk_buff *skb; struct sock *sk = sock->sk; @@ -173,8 +173,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } static int -mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +mISDN_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index e40fdfccc9c1..1e51c6bf3ae1 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1127,16 +1127,15 @@ static const struct file_operations macvtap_fops = { #endif }; -static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int macvtap_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); } -static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, - int flags) +static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, + size_t total_len, int flags) { struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); int ret; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 9c97e9bcf5f5..ff059e1d8ac6 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -835,8 +835,8 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, return err; } -static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct sk_buff *skb; struct sock *sk = sock->sk; @@ -977,8 +977,8 @@ static const struct ppp_channel_ops pppoe_chan_ops = { .start_xmit = pppoe_xmit, }; -static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, int flags) +static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, + size_t total_len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 857dca47bf80..b96b94cee760 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1448,8 +1448,7 @@ static void tun_sock_write_space(struct sock *sk) kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } -static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { int ret; struct tun_file *tfile = container_of(sock, struct tun_file, socket); @@ -1464,8 +1463,7 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, return ret; } -static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, +static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index afa06d28725d..633012cc9a57 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -390,7 +390,7 @@ static void handle_tx(struct vhost_net *net) ubufs = NULL; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ - err = sock->ops->sendmsg(NULL, sock, &msg, len); + err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { vhost_net_ubuf_put(ubufs); @@ -566,7 +566,7 @@ static void handle_rx(struct vhost_net *net) /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); - err = sock->ops->recvmsg(NULL, sock, &msg, + err = sock->ops->recvmsg(sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; @@ -597,7 +597,7 @@ static void handle_rx(struct vhost_net *net) */ iov_iter_advance(&fixup, sizeof(hdr)); } - err = sock->ops->recvmsg(NULL, sock, &msg, + err = sock->ops->recvmsg(sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: * it's not supposed to do this usually, but might be hard diff --git a/include/linux/net.h b/include/linux/net.h index 17d83393afcc..e74114bcca68 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,7 +120,6 @@ struct socket { struct vm_area_struct; struct page; -struct kiocb; struct sockaddr; struct msghdr; struct module; @@ -162,8 +161,8 @@ struct proto_ops { int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); #endif - int (*sendmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len); + int (*sendmsg) (struct socket *sock, struct msghdr *m, + size_t total_len); /* Notes for implementing recvmsg: * =============================== * msg->msg_namelen should get updated by the recvmsg handlers @@ -172,9 +171,8 @@ struct proto_ops { * handlers can assume that msg.msg_name is either NULL or has * a minimum size of sizeof(struct sockaddr_storage). */ - int (*recvmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, - int flags); + int (*recvmsg) (struct socket *sock, struct msghdr *m, + size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 0d87674fb775..172632dd9930 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,8 +100,8 @@ struct vsock_transport { /* DGRAM. */ int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); - int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, - struct msghdr *msg, size_t len, int flags); + int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, + size_t len, int flags); int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, struct msghdr *, size_t len); bool (*dgram_allow)(u32 cid, u32 port); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 4500bf88ff55..6bb97df16d2d 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -245,10 +245,10 @@ int bt_sock_register(int proto, const struct net_proto_family *ops); void bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); -int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags); -int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags); +int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); +int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index b2828a06a5a6..4a92423eefa5 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -21,12 +21,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags); -int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size); +int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags); +int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); diff --git a/include/net/ping.h b/include/net/ping.h index cc16d413f681..ac80cb45e630 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -75,12 +75,11 @@ void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); -int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index 38369d3580a1..250822cc1e02 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -958,10 +958,9 @@ struct proto { int (*compat_ioctl)(struct sock *sk, unsigned int cmd, unsigned long arg); #endif - int (*sendmsg)(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len); - int (*recvmsg)(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, + int (*sendmsg)(struct sock *sk, struct msghdr *msg, + size_t len); + int (*recvmsg)(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, @@ -1562,9 +1561,8 @@ int sock_no_listen(struct socket *, int); int sock_no_shutdown(struct socket *, int); int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); -int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); -int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, - int); +int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); +int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, @@ -1576,8 +1574,8 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, */ int sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags); +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); int sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int compat_sock_common_getsockopt(struct socket *sock, int level, diff --git a/include/net/tcp.h b/include/net/tcp.h index 8d6b983d5099..f87599d5af82 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -349,8 +349,7 @@ void tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tcp_release_cb(struct sock *sk); @@ -430,8 +429,8 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); -int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len); +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); diff --git a/include/net/udp.h b/include/net/udp.h index 32d8d9f07f76..6d4ed18e1427 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -238,8 +238,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); void udp_err(struct sk_buff *, u32); -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); +int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0d0766ea5ab1..3b7ad43c7dad 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1559,8 +1559,7 @@ freeit: return 0; } -static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t len) +static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); @@ -1728,8 +1727,8 @@ out: return err ? : len; } -static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ddpehdr *ddp; diff --git a/net/atm/common.c b/net/atm/common.c index b84057e41bd6..ed0466637e13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -523,8 +523,8 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) return 0; } -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct atm_vcc *vcc; @@ -569,8 +569,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, return copied; } -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t size) +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); diff --git a/net/atm/common.h b/net/atm/common.h index cc3c2dae4d79..4d6f5b2068ac 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -13,10 +13,9 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags); -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t total_len); +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ca049a7c9287..330c1f4a5a0b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1432,8 +1432,7 @@ out: return err; } -static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); struct sock *sk = sock->sk; @@ -1599,8 +1598,8 @@ out: return err; } -static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 4b904c97a068..20a4698e2255 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -210,8 +210,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) } EXPORT_SYMBOL(bt_accept_dequeue); -int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -283,8 +283,8 @@ static long bt_sock_data_wait(struct sock *sk, long timeo) return timeo; } -int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 37198fb99ffe..aa8be4cb19a1 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -826,8 +826,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, } } -static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -871,8 +871,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err ? : copied; } -static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct hci_dev *hdev; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 60694f0f4c73..9070720eedc8 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -944,8 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, return err; } -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; @@ -976,8 +976,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; } -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1004,9 +1004,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); if (sock->type == SOCK_STREAM) - err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_stream_recvmsg(sock, msg, len, flags); else - err = bt_sock_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_recvmsg(sock, msg, len, flags); if (pi->chan->mode != L2CAP_MODE_ERTM) return err; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 3c6d2c8ac1a4..825e8fb5114b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -549,8 +549,8 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * return 0; } -static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -615,8 +615,8 @@ done: return sent; } -static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -627,7 +627,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return 0; } - len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags); + len = bt_sock_stream_recvmsg(sock, msg, size, flags); lock_sock(sk); if (!(flags & MSG_PEEK) && len > 0) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 76321b546e84..2bb7ef46bb99 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -688,8 +688,8 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len return 0; } -static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; int err; @@ -758,8 +758,8 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) } } -static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct sco_pinfo *pi = sco_pi(sk); @@ -777,7 +777,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); - return bt_sock_recvmsg(iocb, sock, msg, len, flags); + return bt_sock_recvmsg(sock, msg, len, flags); } static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 769b185fefbd..b6bf51bb187d 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -271,8 +271,8 @@ static void caif_check_flow_release(struct sock *sk) * Copied from unix_dgram_recvmsg, but removed credit checks, * changed locking, address handling and added MSG_TRUNC. */ -static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) +static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, + size_t len, int flags) { struct sock *sk = sock->sk; @@ -343,9 +343,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) * Copied from unix_stream_recvmsg, but removed credit checks, * changed locking calls, changed address handling. */ -static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int copied = 0; @@ -511,8 +510,8 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, } /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ -static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -586,8 +585,8 @@ err: * Changed removed permission handling and added waiting for flow on * and other minor adaptations. */ -static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); diff --git a/net/can/bcm.c b/net/can/bcm.c index d559f922326d..b523453585be 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1231,8 +1231,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ -static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); @@ -1535,8 +1534,8 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, return 0; } -static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/can/raw.c b/net/can/raw.c index 94601b7ff0a3..63ffdb0f3a23 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -658,8 +658,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -728,8 +727,8 @@ send_failed: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/core/sock.c b/net/core/sock.c index 9c74fc8f0e32..726e1f99aa8d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2163,15 +2163,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_no_getsockopt); -int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len) +int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg); -int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len, int flags) +int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, + int flags) { return -EOPNOTSUPP; } @@ -2543,14 +2542,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, EXPORT_SYMBOL(compat_sock_common_getsockopt); #endif -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e4c144fa706f..3b1d64d6e093 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -310,11 +310,9 @@ int compat_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); #endif int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int nonblock, int flags, - int *addr_len); +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); unsigned int dccp_poll(struct file *file, struct socket *sock, diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 595ddf0459db..d8346d0eadeb 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -72,8 +72,7 @@ static void printl(const char *fmt, ...) wake_up(&dccpw.wait); } -static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { const struct inet_sock *inet = inet_sk(sk); struct ccid3_hc_tx_sock *hc = NULL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e171b780b499..52a94016526d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -741,8 +741,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) return 0; } -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { const struct dccp_sock *dp = dccp_sk(sk); const int flags = msg->msg_flags; @@ -806,8 +805,8 @@ out_discard: EXPORT_SYMBOL_GPL(dccp_sendmsg); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { const struct dccp_hdr *dh; long timeo; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 810228646de3..754484b3cd0e 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1669,8 +1669,8 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int } -static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1905,8 +1905,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, return skb; } -static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 2878d8ca6d3b..b60c65f70346 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -98,12 +98,12 @@ static int ieee802154_sock_release(struct socket *sock) return 0; } -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; - return sk->sk_prot->sendmsg(iocb, sk, msg, len); + return sk->sk_prot->sendmsg(sk, msg, len); } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, @@ -255,8 +255,7 @@ static int raw_disconnect(struct sock *sk, int flags) return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -327,8 +326,8 @@ out: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -615,8 +614,7 @@ static int dgram_disconnect(struct sock *sk, int flags) return 0; } -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -715,9 +713,8 @@ out: return err; } -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4ce954cc94a4..64a9c0fdc4aa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -716,8 +716,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_getname); -int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size) +int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -728,7 +727,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, msg, size); + return sk->sk_prot->sendmsg(sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); @@ -750,8 +749,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(inet_sendpage); -int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; @@ -759,7 +758,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sock_rps_record_flow(sk); - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e9f66e1cda50..3648e7f32f3d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -684,8 +684,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; @@ -841,8 +840,8 @@ do_confirm: goto out; } -int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f027a708b7e0..923cf538fce1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -481,8 +481,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; @@ -709,8 +708,8 @@ out: return ret; * we return it, otherwise we block. */ -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4b57ea8dabc7..d939c35001f9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1064,8 +1064,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return err; } -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1543,8 +1542,8 @@ EXPORT_SYMBOL(tcp_read_sock); * Probably, code can be easily improved even more. */ -int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { struct tcp_sock *tp = tcp_sk(sk); int copied = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0224f930c613..f27556e2158b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -873,8 +873,7 @@ out: } EXPORT_SYMBOL(udp_push_pending_frames); -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); @@ -1136,7 +1135,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, * sendpage interface can't pass. * This will succeed only when the socket is connected. */ - ret = udp_sendmsg(NULL, sk, &msg, 0); + ret = udp_sendmsg(sk, &msg, 0); if (ret < 0) return ret; } @@ -1254,8 +1253,8 @@ EXPORT_SYMBOL(udp_ioctl); * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index f3c27899f62b..7e0fe4bdd967 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -21,8 +21,8 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname, int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bd46f736f61d..fee25c0ed1f5 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d84b2c7f24e..a5287b3582a4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) * we return it, otherwise we block. */ -static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) +static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d048d46779fc..70568a4548e4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -391,8 +391,7 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup); * return it, otherwise we block. */ -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1101,8 +1100,7 @@ out: return err; } -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); @@ -1164,12 +1162,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, do_udp_sendmsg: if (__ipv6_only_sock(sk)) return -ENETUNREACH; - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); } } if (up->pending == AF_INET) - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index c779c3c90b9d..0682c031ccdc 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f11ad1d95e0e..4ea5d7497b5f 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1688,8 +1688,7 @@ out: return rc; } -static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); @@ -1754,8 +1753,8 @@ out: } -static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 568edc72d737..ee0ea25c8e7a 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1256,14 +1256,13 @@ static int irda_release(struct socket *sock) } /* - * Function irda_sendmsg (iocb, sock, msg, len) + * Function irda_sendmsg (sock, msg, len) * * Send message down to TinyTP. This function is used for both STREAM and * SEQPACK services. This is possible since it forces the client to * fragment the message if necessary */ -static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1348,13 +1347,13 @@ out: } /* - * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags) + * Function irda_recvmsg_dgram (sock, msg, size, flags) * * Try to receive message and copy it to user. The frame is discarded * after being read, regardless of how much the user actually read */ -static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1398,10 +1397,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_recvmsg_stream (iocb, sock, msg, size, flags) + * Function irda_recvmsg_stream (sock, msg, size, flags) */ -static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1515,14 +1514,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_sendmsg_dgram (iocb, sock, msg, len) + * Function irda_sendmsg_dgram (sock, msg, len) * * Send message down to TinyTP for the unreliable sequenced * packet service... * */ -static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1594,14 +1593,14 @@ out: } /* - * Function irda_sendmsg_ultra (iocb, sock, msg, len) + * Function irda_sendmsg_ultra (sock, msg, len) * * Send message down to IrLMP for the unreliable Ultra * packet service... */ #ifdef CONFIG_IRDA_ULTRA -static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 2e9953b2db84..94b4c898a116 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1026,8 +1026,8 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, (void *) prmdata, 8); } -static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); @@ -1317,8 +1317,8 @@ static void iucv_process_message_q(struct sock *sk) } } -static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/key/af_key.c b/net/key/af_key.c index f8ac939d52b4..9255fd9d94bc 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3588,8 +3588,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } #endif -static int pfkey_sendmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len) +static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; @@ -3630,8 +3629,7 @@ out: return err ? : len; } -static int pfkey_recvmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len, +static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 05dfc8aa36af..79649937ec71 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -385,7 +385,7 @@ drop: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) +static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct sk_buff *skb; int rc; @@ -506,7 +506,7 @@ no_route: goto out; } -static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8611f1b63141..d1ded3777815 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -480,8 +480,7 @@ out: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -643,9 +642,8 @@ do_confirm: goto done; } -static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index cc7a828fc914..e9b0dec56b8e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -185,9 +185,8 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) /* Receive message. This is the recvmsg for the PPPoL2TP socket. */ -static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, - int flags) +static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int err; struct sk_buff *skb; @@ -295,7 +294,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) * when a user application does a sendmsg() on the session socket. L2TP and * PPP headers must be inserted into the user's data. */ -static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, +static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { static const unsigned char ppph[2] = { 0xff, 0x03 }; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2c0b83ce43bd..17a8dff06090 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -704,8 +704,8 @@ out: * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; @@ -878,8 +878,7 @@ copy_uaddr: * Transmit data provided by the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2702673f0f23..a96025c0583f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2256,8 +2256,7 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2346,8 +2345,7 @@ out: return err; } -static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, +static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct scm_cookie scm; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 69f1d5e9959f..b987fd56c3c5 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1023,8 +1023,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) return 1; } -static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); @@ -1133,8 +1132,8 @@ out: return err; } -static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index e181e290427c..9578bd6a4f3e 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -750,8 +750,8 @@ error: return ret; } -static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -793,8 +793,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return nfc_llcp_send_i_frame(llcp_sock, msg, len); } -static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 373e138c0ab6..82b4e8024778 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -211,8 +211,7 @@ static void rawsock_tx_work(struct work_struct *work) } } -static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nfc_dev *dev = nfc_rawsock(sk)->dev; @@ -248,8 +247,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, return len; } -static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9db83693d736..404c9735aee9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1606,8 +1606,8 @@ oom: * protocol layers and you must therefore supply it with a complete frame */ -static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); @@ -2601,8 +2601,7 @@ out: return err; } -static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); @@ -2882,8 +2881,8 @@ out: * If necessary we block. */ -static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 26054b4b467c..5e710435ffa9 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -83,8 +83,7 @@ static int pn_init(struct sock *sk) return 0; } -static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; @@ -125,9 +124,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, return (err >= 0) ? len : err; } -static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 5d3f2b7507d4..6de2aeb98a1f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1118,8 +1118,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } -static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pep_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); struct sk_buff *skb; @@ -1246,9 +1245,8 @@ struct sk_buff *pep_read(struct sock *sk) return skb; } -static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb; int err; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 008214a3d5eb..d575ef4e9aa6 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -425,15 +425,15 @@ out: return err; } -static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct sock *sk = sock->sk; if (pn_socket_autobind(sock)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, m, total_len); + return sk->sk_prot->sendmsg(sk, m, total_len); } const struct proto_ops phonet_dgram_ops = { diff --git a/net/rds/rds.h b/net/rds/rds.h index c2a5eef41343..c3f2855c3d84 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -702,8 +702,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, void rds_inc_put(struct rds_incoming *inc); void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, struct rds_incoming *inc, gfp_t gfp); -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags); +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags); void rds_clear_recv_queue(struct rds_sock *rs); int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); void rds_inc_info_copy(struct rds_incoming *inc, @@ -711,8 +711,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, __be32 saddr, __be32 daddr, int flip); /* send.c */ -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len); +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); void rds_send_reset(struct rds_connection *conn); int rds_send_xmit(struct rds_connection *conn); struct sockaddr_in; diff --git a/net/rds/recv.c b/net/rds/recv.c index f9ec1acd801c..a00462b0d01d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -395,8 +395,8 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) return 0; } -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags) +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rds/send.c b/net/rds/send.c index 42f65d4305c8..44672befc0ee 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -920,8 +920,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len) +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 43bac7c4dd9e..8ae603069a1a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1046,8 +1046,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros return 1; } -static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); @@ -1211,8 +1210,8 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, } -static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7b1670489638..0095b9a0b779 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -441,8 +441,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, * - sends a call data packet * - may send an abort (abort code in control data) */ -static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len) +static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { struct rxrpc_transport *trans; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); @@ -482,7 +481,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, switch (rx->sk.sk_state) { case RXRPC_SERVER_LISTENING: if (!m->msg_name) { - ret = rxrpc_server_sendmsg(iocb, rx, m, len); + ret = rxrpc_server_sendmsg(rx, m, len); break; } case RXRPC_SERVER_BOUND: @@ -492,7 +491,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, break; } case RXRPC_CLIENT_CONNECTED: - ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len); + ret = rxrpc_client_sendmsg(rx, trans, m, len); break; default: ret = -ENOTCONN; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ba9fd36d3f15..2fc1e659e5c9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -548,10 +548,9 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, extern unsigned rxrpc_resend_timeout; int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); -int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct rxrpc_transport *, struct msghdr *, size_t); -int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *, - size_t); +int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *, + struct msghdr *, size_t); +int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * ar-peer.c @@ -572,8 +571,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; * ar-recvmsg.c */ void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, - int); +int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * ar-security.c diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 8331c95e1522..09f584566e23 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -23,8 +23,7 @@ */ unsigned rxrpc_resend_timeout = 4 * HZ; -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len); @@ -129,9 +128,8 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) * - caller holds the socket locked * - the socket may be either a client socket or a server socket */ -int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct rxrpc_transport *trans, struct msghdr *msg, - size_t len) +int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans, + struct msghdr *msg, size_t len) { struct rxrpc_conn_bundle *bundle; enum rxrpc_command cmd; @@ -191,7 +189,7 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, /* request phase complete for this client call */ ret = -EPROTO; } else { - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); } rxrpc_put_call(call); @@ -232,7 +230,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(NULL, call->socket, call, msg, len); + ret = rxrpc_send_data(call->socket, call, msg, len); } release_sock(&call->socket->sk); @@ -271,8 +269,7 @@ EXPORT_SYMBOL(rxrpc_kernel_abort_call); * send a message through a server socket * - caller holds the socket locked */ -int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct msghdr *msg, size_t len) +int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_command cmd; struct rxrpc_call *call; @@ -313,7 +310,7 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, break; } - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); break; case RXRPC_CMD_SEND_ABORT: @@ -520,8 +517,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, * - must be called in process context * - caller holds the socket locked */ -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len) { diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index d58ba702bd2c..a4f883e2d66f 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -43,8 +43,8 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) * - we need to be careful about two or more threads calling recvmsg * simultaneously */ -int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct rxrpc_skb_priv *sp; struct rxrpc_call *call = NULL, *continue_call = NULL; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aafe94bf292e..f1a65398f311 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1586,8 +1586,7 @@ static int sctp_error(struct sock *sk, int flags, int err) static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2066,9 +2065,8 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ -static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); diff --git a/net/socket.c b/net/socket.c index b78cf601a021..95d3085cb477 100644 --- a/net/socket.c +++ b/net/socket.c @@ -610,45 +610,20 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size) { - return sock->ops->sendmsg(iocb, sock, msg, size); + return sock->ops->sendmsg(sock, msg, size); } -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { int err = security_socket_sendmsg(sock, msg, size); - return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); -} - -static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, - size_t size, bool nosec) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : - __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, false); + return err ?: sock_sendmsg_nosec(sock, msg, size); } EXPORT_SYMBOL(sock_sendmsg); -static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, true); -} - int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -744,47 +719,21 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); -static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, size, flags); } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { int err = security_socket_recvmsg(sock, msg, size, flags); - return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); -} - -int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; + return err ?: sock_recvmsg_nosec(sock, msg, size, flags); } EXPORT_SYMBOL(sock_recvmsg); -static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - /** * kernel_recvmsg - Receive a message from a socket (kernel space) * @sock: The socket to receive the message from @@ -861,8 +810,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - res = __sock_recvmsg(iocb, sock, &msg, - iocb->ki_nbytes, msg.msg_flags); + res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -883,7 +831,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + res = sock_sendmsg(sock, &msg, iocb->ki_nbytes); *from = msg.msg_iter; return res; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c245ec31fa4c..dcb797c60806 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -895,7 +895,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) /** * tipc_sendmsg - send message in connectionless manner - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: amount of user data to be sent @@ -907,7 +906,7 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, +static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; @@ -1052,7 +1051,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) /** * tipc_send_stream - send stream-oriented data - * @iocb: (unused) * @sock: socket structure * @m: data to send * @dsz: total length of data to be transmitted @@ -1062,8 +1060,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success (or partial success), * or errno if no data sent */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; int ret; @@ -1147,7 +1144,6 @@ next: /** * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: length of data to be transmitted @@ -1156,13 +1152,12 @@ next: * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - return tipc_send_stream(iocb, sock, m, dsz); + return tipc_send_stream(sock, m, dsz); } /* tipc_sk_finish_conn - complete the setup of a connection @@ -1337,7 +1332,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) /** * tipc_recvmsg - receive packet-oriented message - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1347,8 +1341,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * * Returns size of returned message data, errno otherwise */ -static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, + int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -1432,7 +1426,6 @@ exit: /** * tipc_recv_stream - receive stream-oriented data - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1442,8 +1435,8 @@ exit: * * Returns size of returned message data, errno otherwise */ -static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recv_stream(struct socket *sock, struct msghdr *m, + size_t buf_len, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 526b6edab018..433f287ee548 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -516,20 +516,15 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); -static int unix_stream_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_stream_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); -static int unix_dgram_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_dgram_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); +static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); -static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, + int); static int unix_set_peek_off(struct sock *sk, int val) { @@ -1442,8 +1437,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, * Send AF_UNIX data. */ -static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1622,8 +1617,8 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) -static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct sock *other = NULL; @@ -1725,8 +1720,8 @@ out_err: return sent ? : err; } -static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk = sock->sk; @@ -1741,19 +1736,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_namelen) msg->msg_namelen = 0; - return unix_dgram_sendmsg(kiocb, sock, msg, len); + return unix_dgram_sendmsg(sock, msg, len); } -static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; - return unix_dgram_recvmsg(iocb, sock, msg, size, flags); + return unix_dgram_recvmsg(sock, msg, size, flags); } static void unix_copy_addr(struct msghdr *msg, struct sock *sk) @@ -1766,9 +1760,8 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; @@ -1900,9 +1893,8 @@ static unsigned int unix_skb_len(const struct sk_buff *skb) return skb->len - UNIXCB(skb).consumed; } -static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1d0e39c9a3e2..2ec86e652a19 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -949,8 +949,8 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, return mask; } -static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk; @@ -1062,11 +1062,10 @@ out: return err; } -static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { - return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, - flags); + return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags); } static const struct proto_ops vsock_dgram_ops = { @@ -1505,8 +1504,8 @@ static int vsock_stream_getsockopt(struct socket *sock, return 0; } -static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk; struct vsock_sock *vsk; @@ -1644,9 +1643,8 @@ out: static int -vsock_stream_recvmsg(struct kiocb *kiocb, - struct socket *sock, - struct msghdr *msg, size_t len, int flags) +vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7f3255084a6c..c294da095461 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1730,8 +1730,7 @@ static int vmci_transport_dgram_enqueue( return err - sizeof(*dg); } -static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, - struct vsock_sock *vsk, +static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags) { diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d9149b68b9bc..c3ab230e4493 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1077,8 +1077,7 @@ out_clear_request: goto out; } -static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); @@ -1252,8 +1251,7 @@ out_kfree_skb: } -static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, +static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; -- cgit v1.3-6-gb490 From 61e021f3b86cbbcc04cbe8ac7b7da2b8c94b5e8e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 2 Mar 2015 15:21:55 +0100 Subject: ebpf: move CONFIG_BPF_SYSCALL-only function declarations Masami noted that it would be better to hide the remaining CONFIG_BPF_SYSCALL-only function declarations within the BPF header ifdef, w/o else path dummy alternatives since these functions are not supposed to have a user outside of CONFIG_BPF_SYSCALL. Suggested-by: Masami Hiramatsu Reference: http://article.gmane.org/gmane.linux.kernel.api/8658 Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a1a7ff2df328..a884f5a2c503 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -42,10 +42,6 @@ struct bpf_map_type_list { enum bpf_map_type type; }; -void bpf_register_map_type(struct bpf_map_type_list *tl); -void bpf_map_put(struct bpf_map *map); -struct bpf_map *bpf_map_get(struct fd f); - /* function argument constraints */ enum bpf_arg_type { ARG_ANYTHING = 0, /* any argument is ok */ @@ -126,9 +122,16 @@ struct bpf_prog_aux { #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); +void bpf_register_map_type(struct bpf_map_type_list *tl); -void bpf_prog_put(struct bpf_prog *prog); struct bpf_prog *bpf_prog_get(u32 ufd); +void bpf_prog_put(struct bpf_prog *prog); + +struct bpf_map *bpf_map_get(struct fd f); +void bpf_map_put(struct bpf_map *map); + +/* verify correctness of eBPF program */ +int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { @@ -142,10 +145,7 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } -#endif - -/* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +#endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; -- cgit v1.3-6-gb490 From d476059e77d1af48453a58f9de1e36f2eaff6450 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 2 Mar 2015 00:11:09 -0600 Subject: net: Kill dev_rebuild_header Now that there are no more users kill dev_rebuild_header and all of it's implementations. This is long overdue. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/firewire/net.c | 13 -------- drivers/isdn/i4l/isdn_net.c | 33 ------------------- drivers/media/dvb-core/dvb_net.c | 1 - drivers/net/arcnet/arcnet.c | 55 ------------------------------- drivers/net/ipvlan/ipvlan_main.c | 1 - drivers/net/macvlan.c | 1 - drivers/net/wireless/hostap/hostap_main.c | 1 - include/linux/etherdevice.h | 1 - include/linux/netdevice.h | 12 +------ net/802/fc.c | 21 ------------ net/802/fddi.c | 26 --------------- net/802/hippi.c | 28 ---------------- net/8021q/vlan_dev.c | 35 -------------------- net/ethernet/eth.c | 34 ------------------- net/netrom/nr_dev.c | 31 ----------------- net/rose/rose_dev.c | 14 -------- 16 files changed, 1 insertion(+), 306 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 2c68da1ceeee..f4ea80d602f7 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -237,18 +237,6 @@ static int fwnet_header_create(struct sk_buff *skb, struct net_device *net, return -net->hard_header_len; } -static int fwnet_header_rebuild(struct sk_buff *skb) -{ - struct fwnet_header *h = (struct fwnet_header *)skb->data; - - if (get_unaligned_be16(&h->h_proto) == ETH_P_IP) - return arp_find((unsigned char *)&h->h_dest, skb); - - dev_notice(&skb->dev->dev, "unable to resolve type %04x addresses\n", - be16_to_cpu(h->h_proto)); - return 0; -} - static int fwnet_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) { @@ -282,7 +270,6 @@ static int fwnet_header_parse(const struct sk_buff *skb, unsigned char *haddr) static const struct header_ops fwnet_header_ops = { .create = fwnet_header_create, - .rebuild = fwnet_header_rebuild, .cache = fwnet_header_cache, .cache_update = fwnet_header_cache_update, .parse = fwnet_header_parse, diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index 94affa5e6f28..546b7e81161d 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1951,38 +1951,6 @@ static int isdn_net_header(struct sk_buff *skb, struct net_device *dev, return len; } -/* We don't need to send arp, because we have point-to-point connections. */ -static int -isdn_net_rebuild_header(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - isdn_net_local *lp = netdev_priv(dev); - int ret = 0; - - if (lp->p_encap == ISDN_NET_ENCAP_ETHER) { - struct ethhdr *eth = (struct ethhdr *) skb->data; - - /* - * Only ARP/IP is currently supported - */ - - if (eth->h_proto != htons(ETH_P_IP)) { - printk(KERN_WARNING - "isdn_net: %s don't know how to resolve type %d addresses?\n", - dev->name, (int) eth->h_proto); - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); - return 0; - } - /* - * Try to get ARP to resolve the header. - */ -#ifdef CONFIG_INET - ret = arp_find(eth->h_dest, skb); -#endif - } - return ret; -} - static int isdn_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) { @@ -2005,7 +1973,6 @@ static void isdn_header_cache_update(struct hh_cache *hh, static const struct header_ops isdn_header_ops = { .create = isdn_net_header, - .rebuild = isdn_net_rebuild_header, .cache = isdn_header_cache, .cache_update = isdn_header_cache_update, }; diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 686d3277dad1..4a77cb02dffc 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -1190,7 +1190,6 @@ static int dvb_net_stop(struct net_device *dev) static const struct header_ops dvb_header_ops = { .create = eth_header, .parse = eth_header_parse, - .rebuild = eth_rebuild_header, }; diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 09de683c167e..10f71c732b59 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -104,7 +104,6 @@ EXPORT_SYMBOL(arcnet_timeout); static int arcnet_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -static int arcnet_rebuild_header(struct sk_buff *skb); static int go_tx(struct net_device *dev); static int debug = ARCNET_DEBUG; @@ -312,7 +311,6 @@ static int choose_mtu(void) static const struct header_ops arcnet_header_ops = { .create = arcnet_header, - .rebuild = arcnet_rebuild_header, }; static const struct net_device_ops arcnet_netdev_ops = { @@ -538,59 +536,6 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev, return proto->build_header(skb, dev, type, _daddr); } - -/* - * Rebuild the ARCnet hard header. This is called after an ARP (or in the - * future other address resolution) has completed on this sk_buff. We now - * let ARP fill in the destination field. - */ -static int arcnet_rebuild_header(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct arcnet_local *lp = netdev_priv(dev); - int status = 0; /* default is failure */ - unsigned short type; - uint8_t daddr=0; - struct ArcProto *proto; - /* - * XXX: Why not use skb->mac_len? - */ - if (skb->network_header - skb->mac_header != 2) { - BUGMSG(D_NORMAL, - "rebuild_header: shouldn't be here! (hdrsize=%d)\n", - (int)(skb->network_header - skb->mac_header)); - return 0; - } - type = *(uint16_t *) skb_pull(skb, 2); - BUGMSG(D_DURING, "rebuild header for protocol %Xh\n", type); - - if (type == ETH_P_IP) { -#ifdef CONFIG_INET - BUGMSG(D_DURING, "rebuild header for ethernet protocol %Xh\n", type); - status = arp_find(&daddr, skb) ? 1 : 0; - BUGMSG(D_DURING, " rebuilt: dest is %d; protocol %Xh\n", - daddr, type); -#endif - } else { - BUGMSG(D_NORMAL, - "I don't understand ethernet protocol %Xh addresses!\n", type); - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } - - /* if we couldn't resolve the address... give up. */ - if (!status) - return 0; - - /* add the _real_ header this time! */ - proto = arc_proto_map[lp->default_proto[daddr]]; - proto->build_header(skb, dev, type, daddr); - - return 1; /* success */ -} - - - /* Called by the kernel in order to transmit a packet. */ netdev_tx_t arcnet_send_packet(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 4f4099d5603d..2950c3780230 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -336,7 +336,6 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, static const struct header_ops ipvlan_header_ops = { .create = ipvlan_hard_header, - .rebuild = eth_rebuild_header, .parse = eth_header_parse, .cache = eth_header_cache, .cache_update = eth_header_cache_update, diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 1df38bdae2ee..b5e3320ca506 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -550,7 +550,6 @@ static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev, static const struct header_ops macvlan_hard_header_ops = { .create = macvlan_hard_header, - .rebuild = eth_rebuild_header, .parse = eth_header_parse, .cache = eth_header_cache, .cache_update = eth_header_cache_update, diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index 52919ad42726..8f9f3e9fbfce 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -798,7 +798,6 @@ static void prism2_tx_timeout(struct net_device *dev) const struct header_ops hostap_80211_ops = { .create = eth_header, - .rebuild = eth_rebuild_header, .cache = eth_header_cache, .cache_update = eth_header_cache_update, .parse = hostap_80211_header_parse, diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 1d869d185a0d..606563ef8a72 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -35,7 +35,6 @@ extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -int eth_rebuild_header(struct sk_buff *skb); int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5897b4ea5a3f..2007f3b44d05 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -261,7 +261,6 @@ struct header_ops { unsigned short type, const void *daddr, const void *saddr, unsigned int len); int (*parse)(const struct sk_buff *skb, unsigned char *haddr); - int (*rebuild)(struct sk_buff *skb); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, @@ -1346,7 +1345,7 @@ enum netdev_priv_flags { * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations * @fwd_ops: Management operations - * @header_ops: Includes callbacks for creating,parsing,rebuilding,etc + * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * * @flags: Interface flags (a la BSD) @@ -2399,15 +2398,6 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } -static inline int dev_rebuild_header(struct sk_buff *skb) -{ - const struct net_device *dev = skb->dev; - - if (!dev->header_ops || !dev->header_ops->rebuild) - return 0; - return dev->header_ops->rebuild(skb); -} - typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) diff --git a/net/802/fc.c b/net/802/fc.c index 7c174b6750cd..7b9219022418 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -75,29 +75,8 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, return -hdr_len; } -/* - * A neighbour discovery of some species (eg arp) has completed. We - * can now send the packet. - */ - -static int fc_rebuild_header(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - struct fch_hdr *fch=(struct fch_hdr *)skb->data; - struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); - if(fcllc->ethertype != htons(ETH_P_IP)) { - printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); - return 0; - } - return arp_find(fch->daddr, skb); -#else - return 0; -#endif -} - static const struct header_ops fc_header_ops = { .create = fc_header, - .rebuild = fc_rebuild_header, }; static void fc_setup(struct net_device *dev) diff --git a/net/802/fddi.c b/net/802/fddi.c index 59e7346f1193..7d3a0af954e8 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -87,31 +87,6 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, return -hl; } - -/* - * Rebuild the FDDI MAC header. This is called after an ARP - * (or in future other address resolution) has completed on - * this sk_buff. We now let ARP fill in the other fields. - */ - -static int fddi_rebuild_header(struct sk_buff *skb) -{ - struct fddihdr *fddi = (struct fddihdr *)skb->data; - -#ifdef CONFIG_INET - if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP)) - /* Try to get ARP to resolve the header and fill destination address */ - return arp_find(fddi->daddr, skb); - else -#endif - { - printk("%s: Don't know how to resolve type %04X addresses.\n", - skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); - return 0; - } -} - - /* * Determine the packet's protocol ID and fill in skb fields. * This routine is called before an incoming packet is passed @@ -177,7 +152,6 @@ EXPORT_SYMBOL(fddi_change_mtu); static const struct header_ops fddi_header_ops = { .create = fddi_header, - .rebuild = fddi_rebuild_header, }; diff --git a/net/802/hippi.c b/net/802/hippi.c index 2e03f8259dd5..ade1a52cdcff 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -90,33 +90,6 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, } -/* - * Rebuild the HIPPI MAC header. This is called after an ARP has - * completed on this sk_buff. We now let ARP fill in the other fields. - */ - -static int hippi_rebuild_header(struct sk_buff *skb) -{ - struct hippi_hdr *hip = (struct hippi_hdr *)skb->data; - - /* - * Only IP is currently supported - */ - - if(hip->snap.ethertype != htons(ETH_P_IP)) - { - printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype)); - return 0; - } - - /* - * We don't support dynamic ARP on HIPPI, but we use the ARP - * static ARP tables to hold the I-FIELDs. - */ - return arp_find(hip->le.daddr, skb); -} - - /* * Determine the packet's protocol ID. */ @@ -186,7 +159,6 @@ EXPORT_SYMBOL(hippi_neigh_setup_dev); static const struct header_ops hippi_header_ops = { .create = hippi_header, - .rebuild = hippi_rebuild_header, }; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 118956448cf6..1dcfec8b49f3 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -36,39 +36,6 @@ #include #include -/* - * Rebuild the Ethernet MAC header. This is called after an ARP - * (or in future other address resolution) has completed on this - * sk_buff. We now let ARP fill in the other fields. - * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. - * - * TODO: This needs a checkup, I'm ignorant here. --BLG - */ -static int vlan_dev_rebuild_header(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); - - switch (veth->h_vlan_encapsulated_proto) { -#ifdef CONFIG_INET - case htons(ETH_P_IP): - - /* TODO: Confirm this will work with VLAN headers... */ - return arp_find(veth->h_dest, skb); -#endif - default: - pr_debug("%s: unable to resolve type %X addresses\n", - dev->name, ntohs(veth->h_vlan_encapsulated_proto)); - - ether_addr_copy(veth->h_source, dev->dev_addr); - break; - } - - return 0; -} - /* * Create the VLAN header for an arbitrary protocol layer * @@ -534,7 +501,6 @@ static int vlan_dev_get_lock_subclass(struct net_device *dev) static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, - .rebuild = vlan_dev_rebuild_header, .parse = eth_header_parse, }; @@ -554,7 +520,6 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev static const struct header_ops vlan_passthru_header_ops = { .create = vlan_passthru_hard_header, - .rebuild = dev_rebuild_header, .parse = eth_header_parse, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 238f38d21641..8dbdf6c910b7 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -112,39 +112,6 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL(eth_header); -/** - * eth_rebuild_header- rebuild the Ethernet MAC header. - * @skb: socket buffer to update - * - * This is called after an ARP or IPV6 ndisc it's resolution on this - * sk_buff. We now let protocol (ARP) fill in the other fields. - * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. - */ -int eth_rebuild_header(struct sk_buff *skb) -{ - struct ethhdr *eth = (struct ethhdr *)skb->data; - struct net_device *dev = skb->dev; - - switch (eth->h_proto) { -#ifdef CONFIG_INET - case htons(ETH_P_IP): - return arp_find(eth->h_dest, skb); -#endif - default: - netdev_dbg(dev, - "%s: unable to resolve type %X addresses.\n", - dev->name, ntohs(eth->h_proto)); - - memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); - break; - } - - return 0; -} -EXPORT_SYMBOL(eth_rebuild_header); - /** * eth_get_headlen - determine the the length of header for an ethernet frame * @data: pointer to start of frame @@ -369,7 +336,6 @@ EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, .parse = eth_header_parse, - .rebuild = eth_rebuild_header, .cache = eth_header_cache, .cache_update = eth_header_cache_update, }; diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 6ae063cebf7d..988f542481a8 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -65,36 +65,6 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) return 1; } -#ifdef CONFIG_INET - -static int nr_rebuild_header(struct sk_buff *skb) -{ - unsigned char *bp = skb->data; - - if (arp_find(bp + 7, skb)) - return 1; - - bp[6] &= ~AX25_CBIT; - bp[6] &= ~AX25_EBIT; - bp[6] |= AX25_SSSID_SPARE; - bp += AX25_ADDR_LEN; - - bp[6] &= ~AX25_CBIT; - bp[6] |= AX25_EBIT; - bp[6] |= AX25_SSSID_SPARE; - - return 0; -} - -#else - -static int nr_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -#endif - static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) @@ -188,7 +158,6 @@ static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops nr_header_ops = { .create = nr_header, - .rebuild= nr_rebuild_header, }; static const struct net_device_ops nr_netdev_ops = { diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 90209c1fa49b..369ca81a8c5d 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -56,19 +56,6 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev, return -37; } -static int rose_rebuild_header(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - unsigned char *bp = (unsigned char *)skb->data; - - if (arp_find(bp + 7, skb)) { - return 1; - } - -#endif - return 0; -} - static int rose_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; @@ -133,7 +120,6 @@ static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops rose_header_ops = { .create = rose_header, - .rebuild = rose_rebuild_header, }; static const struct net_device_ops rose_netdev_ops = { -- cgit v1.3-6-gb490 From 0189197f441602acdca3f97750d392a895b778fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Mar 2015 19:10:47 -0600 Subject: mpls: Basic routing support This change adds a new Kconfig option MPLS_ROUTING. The core of this change is the code to look at an mpls packet received from another machine. Look that packet up in a routing table and forward the packet on. Support of MPLS over ATM is not considered or attempted here. This implemntation follows RFC3032 and implements the MPLS shim header that can pass over essentially any network. What RFC3021 refers to as the as the Incoming Label Map (ILM) I call net->mpls.platform_label[]. What RFC3031 refers to as the Next Label Hop Forwarding Entry (NHLFE) I call mpls_route. Though calling it the label fordwarding information base (lfib) might also be valid. Further the implemntation forwards packets as described in RFC3032. There is no need and given the original motivation for MPLS a strong discincentive to have a flexible label forwarding path. In essence the logic is the topmost label is read, looked up, removed, and replaced by 0 or more new lables and the sent out the specified interface to it's next hop. Quite a few optional features are not implemented here. Among them are generation of ICMP errors when the TTL is exceeded or the packet is larger than the next hop MTU (those conditions are detected and the packets are dropped instead of generating an icmp error). The traffic class field is always set to 0. The implementation focuses on IP over MPLS and does not handle egress of other kinds of protocols. Instead of implementing coordination with the neighbour table and sorting out how to input next hops in a different address family (for which there is value). I was lazy and implemented a next hop mac address instead. The code is simpler and there are flavor of MPLS such as MPLS-TP where neither an IPv4 nor an IPv6 next hop is appropriate so a next hop by mac address would need to be implemented at some point. Two new definitions AF_MPLS and PF_MPLS are exposed to userspace. Decoding the mpls header must be done by first byeswapping a 32bit bit endian word into the local cpu endian and then bit shifting to extract the pieces. There is no C bit-field that can represent a wire format mpls header on a little endian machine as the low bits of the 20bit label wind up in the wrong half of third byte. Therefore internally everything is deal with in cpu native byte order except when writing to and reading from a packet. For management simplicity if a label is configured to forward out an interface that is down the packet is dropped early. Similarly if an network interface is removed rt_dev is updated to NULL (so no reference is preserved) and any packets for that label are dropped. Keeping the label entries in the kernel allows the kernel label table to function as the definitive source of which labels are allocated and which are not. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/socket.h | 2 + include/net/net_namespace.h | 4 + include/net/netns/mpls.h | 15 ++ net/mpls/Kconfig | 5 + net/mpls/Makefile | 1 + net/mpls/af_mpls.c | 349 ++++++++++++++++++++++++++++++++++++++++++++ net/mpls/internal.h | 56 +++++++ 7 files changed, 432 insertions(+) create mode 100644 include/net/netns/mpls.h create mode 100644 net/mpls/af_mpls.c create mode 100644 net/mpls/internal.h (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 5c19cba34dce..fab4d0ddf4ed 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -181,6 +181,7 @@ struct ucred { #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ #define AF_IB 27 /* Native InfiniBand address */ +#define AF_MPLS 28 /* MPLS */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ @@ -226,6 +227,7 @@ struct ucred { #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC #define PF_IB AF_IB +#define PF_MPLS AF_MPLS #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 36faf4990c4b..2cb9acb618e9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -26,6 +26,7 @@ #endif #include #include +#include #include struct user_namespace; @@ -129,6 +130,9 @@ struct net { #endif #if IS_ENABLED(CONFIG_IP_VS) struct netns_ipvs *ipvs; +#endif +#if IS_ENABLED(CONFIG_MPLS) + struct netns_mpls mpls; #endif struct sock *diag_nlsk; atomic_t fnhe_genid; diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h new file mode 100644 index 000000000000..f90aaf8d4f89 --- /dev/null +++ b/include/net/netns/mpls.h @@ -0,0 +1,15 @@ +/* + * mpls in net namespaces + */ + +#ifndef __NETNS_MPLS_H__ +#define __NETNS_MPLS_H__ + +struct mpls_route; + +struct netns_mpls { + size_t platform_labels; + struct mpls_route __rcu * __rcu *platform_label; +}; + +#endif /* __NETNS_MPLS_H__ */ diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index a77fbcdd04ee..f4286ee7e2b0 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -22,4 +22,9 @@ config NET_MPLS_GSO that have had MPLS stack entries pushed onto them and thus become MPLS GSO packets. +config MPLS_ROUTING + bool "MPLS: routing support" + help + Add support for forwarding of mpls packets. + endif # MPLS diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 6dec088c2d0f..60af15f1960e 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -2,3 +2,4 @@ # Makefile for MPLS. # obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o +obj-$(CONFIG_MPLS_ROUTING) += af_mpls.o diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c new file mode 100644 index 000000000000..924377736b2a --- /dev/null +++ b/net/mpls/af_mpls.c @@ -0,0 +1,349 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#define MAX_NEW_LABELS 2 + +/* This maximum ha length copied from the definition of struct neighbour */ +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) + +struct mpls_route { /* next hop label forwarding entry */ + struct net_device *rt_dev; + struct rcu_head rt_rcu; + u32 rt_label[MAX_NEW_LABELS]; + u8 rt_protocol; /* routing protocol that set this entry */ + u8 rt_labels:2, + rt_via_alen:6; + unsigned short rt_via_family; + u8 rt_via[0]; +}; + +static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) +{ + struct mpls_route *rt = NULL; + + if (index < net->mpls.platform_labels) { + struct mpls_route __rcu **platform_label = + rcu_dereference(net->mpls.platform_label); + rt = rcu_dereference(platform_label[index]); + } + return rt; +} + +static bool mpls_output_possible(const struct net_device *dev) +{ + return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); +} + +static unsigned int mpls_rt_header_size(const struct mpls_route *rt) +{ + /* The size of the layer 2.5 labels to be added for this route */ + return rt->rt_labels * sizeof(struct mpls_shim_hdr); +} + +static unsigned int mpls_dev_mtu(const struct net_device *dev) +{ + /* The amount of data the layer 2 frame can hold */ + return dev->mtu; +} + +static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + +static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, + struct mpls_entry_decoded dec) +{ + /* RFC4385 and RFC5586 encode other packets in mpls such that + * they don't conflict with the ip version number, making + * decoding by examining the ip version correct in everything + * except for the strangest cases. + * + * The strange cases if we choose to support them will require + * manual configuration. + */ + struct iphdr *hdr4 = ip_hdr(skb); + bool success = true; + + if (hdr4->version == 4) { + skb->protocol = htons(ETH_P_IP); + csum_replace2(&hdr4->check, + htons(hdr4->ttl << 8), + htons(dec.ttl << 8)); + hdr4->ttl = dec.ttl; + } + else if (hdr4->version == 6) { + struct ipv6hdr *hdr6 = ipv6_hdr(skb); + skb->protocol = htons(ETH_P_IPV6); + hdr6->hop_limit = dec.ttl; + } + else + /* version 0 and version 1 are used by pseudo wires */ + success = false; + return success; +} + +static int mpls_forward(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct net *net = dev_net(dev); + struct mpls_shim_hdr *hdr; + struct mpls_route *rt; + struct mpls_entry_decoded dec; + struct net_device *out_dev; + unsigned int hh_len; + unsigned int new_header_size; + unsigned int mtu; + int err; + + /* Careful this entire function runs inside of an rcu critical section */ + + if (skb->pkt_type != PACKET_HOST) + goto drop; + + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + /* Read and decode the label */ + hdr = mpls_hdr(skb); + dec = mpls_entry_decode(hdr); + + /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + + rt = mpls_route_input_rcu(net, dec.label); + if (!rt) + goto drop; + + /* Find the output device */ + out_dev = rt->rt_dev; + if (!mpls_output_possible(out_dev)) + goto drop; + + if (skb_warn_if_lro(skb)) + goto drop; + + skb_forward_csum(skb); + + /* Verify ttl is valid */ + if (dec.ttl <= 2) + goto drop; + dec.ttl -= 1; + + /* Verify the destination can hold the packet */ + new_header_size = mpls_rt_header_size(rt); + mtu = mpls_dev_mtu(out_dev); + if (mpls_pkt_too_big(skb, mtu - new_header_size)) + goto drop; + + hh_len = LL_RESERVED_SPACE(out_dev); + if (!out_dev->header_ops) + hh_len = 0; + + /* Ensure there is enough space for the headers in the skb */ + if (skb_cow(skb, hh_len + new_header_size)) + goto drop; + + skb->dev = out_dev; + skb->protocol = htons(ETH_P_MPLS_UC); + + if (unlikely(!new_header_size && dec.bos)) { + /* Penultimate hop popping */ + if (!mpls_egress(rt, skb, dec)) + goto drop; + } else { + bool bos; + int i; + skb_push(skb, new_header_size); + skb_reset_network_header(skb); + /* Push the new labels */ + hdr = mpls_hdr(skb); + bos = dec.bos; + for (i = rt->rt_labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); + bos = false; + } + } + + err = neigh_xmit(rt->rt_via_family, out_dev, rt->rt_via, skb); + if (err) + net_dbg_ratelimited("%s: packet transmission failed: %d\n", + __func__, err); + return 0; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type mpls_packet_type __read_mostly = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .func = mpls_forward, +}; + +static struct mpls_route *mpls_rt_alloc(size_t alen) +{ + struct mpls_route *rt; + + rt = kzalloc(GFP_KERNEL, sizeof(*rt) + alen); + if (rt) + rt->rt_via_alen = alen; + return rt; +} + +static void mpls_rt_free(struct mpls_route *rt) +{ + if (rt) + kfree_rcu(rt, rt_rcu); +} + +static void mpls_route_update(struct net *net, unsigned index, + struct net_device *dev, struct mpls_route *new, + const struct nl_info *info) +{ + struct mpls_route *rt, *old = NULL; + + ASSERT_RTNL(); + + rt = net->mpls.platform_label[index]; + if (!dev || (rt && (rt->rt_dev == dev))) { + rcu_assign_pointer(net->mpls.platform_label[index], new); + old = rt; + } + + /* If we removed a route free it now */ + mpls_rt_free(old); +} + +static void mpls_ifdown(struct net_device *dev) +{ + struct net *net = dev_net(dev); + unsigned index; + + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = net->mpls.platform_label[index]; + if (!rt) + continue; + if (rt->rt_dev != dev) + continue; + rt->rt_dev = NULL; + } +} + +static int mpls_dev_notify(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch(event) { + case NETDEV_UNREGISTER: + mpls_ifdown(dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mpls_dev_notifier = { + .notifier_call = mpls_dev_notify, +}; + +static int mpls_net_init(struct net *net) +{ + net->mpls.platform_labels = 0; + net->mpls.platform_label = NULL; + + return 0; +} + +static void mpls_net_exit(struct net *net) +{ + unsigned int index; + + /* An rcu grace period haselapsed since there was a device in + * the network namespace (and thus the last in fqlight packet) + * left this network namespace. This is because + * unregister_netdevice_many and netdev_run_todo has completed + * for each network device that was in this network namespace. + * + * As such no additional rcu synchronization is necessary when + * freeing the platform_label table. + */ + rtnl_lock(); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = net->mpls.platform_label[index]; + rcu_assign_pointer(net->mpls.platform_label[index], NULL); + mpls_rt_free(rt); + } + rtnl_unlock(); + + kvfree(net->mpls.platform_label); +} + +static struct pernet_operations mpls_net_ops = { + .init = mpls_net_init, + .exit = mpls_net_exit, +}; + +static int __init mpls_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); + + err = register_pernet_subsys(&mpls_net_ops); + if (err) + goto out; + + err = register_netdevice_notifier(&mpls_dev_notifier); + if (err) + goto out_unregister_pernet; + + dev_add_pack(&mpls_packet_type); + + err = 0; +out: + return err; + +out_unregister_pernet: + unregister_pernet_subsys(&mpls_net_ops); + goto out; +} +module_init(mpls_init); + +static void __exit mpls_exit(void) +{ + dev_remove_pack(&mpls_packet_type); + unregister_netdevice_notifier(&mpls_dev_notifier); + unregister_pernet_subsys(&mpls_net_ops); +} +module_exit(mpls_exit); + +MODULE_DESCRIPTION("MultiProtocol Label Switching"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_NETPROTO(PF_MPLS); diff --git a/net/mpls/internal.h b/net/mpls/internal.h new file mode 100644 index 000000000000..c2944cb84d48 --- /dev/null +++ b/net/mpls/internal.h @@ -0,0 +1,56 @@ +#ifndef MPLS_INTERNAL_H +#define MPLS_INTERNAL_H + +#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ +#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */ +#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ +#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */ +#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ +#define LABEL_GAL 13 /* RFC5586 */ +#define LABEL_OAM_ALERT 14 /* RFC3429 */ +#define LABEL_EXTENSION 15 /* RFC7274 */ + + +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + +struct mpls_entry_decoded { + u32 label; + u8 ttl; + u8 tc; + u8 bos; +}; + +struct sk_buff; + +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +{ + return (struct mpls_shim_hdr *)skb_network_header(skb); +} + +static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) +{ + struct mpls_shim_hdr result; + result.label_stack_entry = + cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) | + (tc << MPLS_LS_TC_SHIFT) | + (bos ? (1 << MPLS_LS_S_SHIFT) : 0) | + (ttl << MPLS_LS_TTL_SHIFT)); + return result; +} + +static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) +{ + struct mpls_entry_decoded result; + unsigned entry = be32_to_cpu(hdr->label_stack_entry); + + result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; + + return result; +} + +#endif /* MPLS_INTERNAL_H */ -- cgit v1.3-6-gb490 From c32ec2a11321978c34296d9a6bd5b0c31a2eb182 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 4 Mar 2015 12:14:41 +0100 Subject: bcma: make bcma_host_pci_(up|down) calls safe for every config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were providing declarations but actual code was compiled only with CONFIG_BCMA_HOST_PCI set. This could result in: ERROR: "bcma_host_pci_down" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined! ERROR: "bcma_host_pci_up" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined! ERROR: "bcma_host_pci_down" [drivers/net/wireless/b43/b43.ko] undefined! ERROR: "bcma_host_pci_up" [drivers/net/wireless/b43/b43.ko] undefined! Reported-by: Arnd Bergmann Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 037620b3f113..44057b45ed32 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -434,8 +434,17 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, return bcma_find_core_unit(bus, coreid, 0); } +#ifdef CONFIG_BCMA_HOST_PCI extern void bcma_host_pci_up(struct bcma_bus *bus); extern void bcma_host_pci_down(struct bcma_bus *bus); +#else +static inline void bcma_host_pci_up(struct bcma_bus *bus) +{ +} +static inline void bcma_host_pci_down(struct bcma_bus *bus) +{ +} +#endif extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); -- cgit v1.3-6-gb490 From 0a4e699a41f767dff76ca7dc1019b9ca6de3eb42 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 4 Mar 2015 14:24:52 +0100 Subject: bcma: move internal function declarations to private header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not exported nor used anywhere, so there is no reason to put them in public headers. Also drop unused bcma_chipco_(suspend|resume). Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 41 +++++++++++++++++++++++++++++ include/linux/bcma/bcma_driver_chipcommon.h | 11 -------- include/linux/bcma/bcma_driver_gmac_cmn.h | 6 ----- include/linux/bcma/bcma_driver_mips.h | 15 ----------- include/linux/bcma/bcma_driver_pci.h | 2 -- include/linux/bcma/bcma_driver_pcie2.h | 2 -- 6 files changed, 41 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 29565e30700a..5a1d22489afc 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -43,6 +43,9 @@ int bcma_bus_scan(struct bcma_bus *bus); int bcma_sprom_get(struct bcma_bus *bus); /* driver_chipcommon.c */ +void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); +void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); +void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); #ifdef CONFIG_BCMA_DRIVER_MIPS void bcma_chipco_serial_init(struct bcma_drv_cc *cc); extern struct platform_device bcma_pflash_dev; @@ -53,6 +56,8 @@ int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb); void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb); /* driver_chipcommon_pmu.c */ +void bcma_pmu_early_init(struct bcma_drv_cc *cc); +void bcma_pmu_init(struct bcma_drv_cc *cc); u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc); u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc); @@ -102,10 +107,13 @@ static inline void __exit bcma_host_soc_unregister_driver(void) /* driver_pci.c */ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address); +void bcma_core_pci_early_init(struct bcma_drv_pci *pc); +void bcma_core_pci_init(struct bcma_drv_pci *pc); void bcma_core_pci_up(struct bcma_drv_pci *pc); void bcma_core_pci_down(struct bcma_drv_pci *pc); /* driver_pcie2.c */ +void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2); void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2); extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc); @@ -123,6 +131,39 @@ static inline void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc) } #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */ +/************************************************** + * driver_mips.c + **************************************************/ + +#ifdef CONFIG_BCMA_DRIVER_MIPS +unsigned int bcma_core_mips_irq(struct bcma_device *dev); +void bcma_core_mips_early_init(struct bcma_drv_mips *mcore); +void bcma_core_mips_init(struct bcma_drv_mips *mcore); +#else +static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev) +{ + return 0; +} +static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) +{ +} +static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) +{ +} +#endif + +/************************************************** + * driver_gmac_cmn.c + **************************************************/ + +#ifdef CONFIG_BCMA_DRIVER_GMAC_CMN +void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); +#else +static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) +{ +} +#endif + #ifdef CONFIG_BCMA_DRIVER_GPIO /* driver_gpio.c */ int bcma_gpio_init(struct bcma_drv_cc *cc); diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index db6fa217f98b..6cceedf65ca2 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -663,14 +663,6 @@ struct bcma_drv_cc_b { #define bcma_cc_maskset32(cc, offset, mask, set) \ bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set)) -extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); -extern void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); - -extern void bcma_chipco_suspend(struct bcma_drv_cc *cc); -extern void bcma_chipco_resume(struct bcma_drv_cc *cc); - -void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); - extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); @@ -690,9 +682,6 @@ u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value); /* PMU support */ -extern void bcma_pmu_init(struct bcma_drv_cc *cc); -extern void bcma_pmu_early_init(struct bcma_drv_cc *cc); - extern void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value); extern void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset, diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index 4dd1f33e36a2..4354d4ea6713 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -91,10 +91,4 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write16(gc, offset, val) bcma_write16((gc)->core, offset, val) #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) -#ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); -#else -static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } -#endif - #endif /* LINUX_BCMA_DRIVER_GMAC_CMN_H_ */ diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 0b3b32aeeb8a..8eea7f9e33b4 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -39,21 +39,6 @@ struct bcma_drv_mips { u8 early_setup_done:1; }; -#ifdef CONFIG_BCMA_DRIVER_MIPS -extern void bcma_core_mips_init(struct bcma_drv_mips *mcore); -extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore); - -extern unsigned int bcma_core_mips_irq(struct bcma_device *dev); -#else -static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { } -static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { } - -static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev) -{ - return 0; -} -#endif - extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore); #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */ diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 6b8bca67851f..8e90004fdfd7 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,8 +238,6 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); -extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h index d8c43294c527..31e6d17ab798 100644 --- a/include/linux/bcma/bcma_driver_pcie2.h +++ b/include/linux/bcma/bcma_driver_pcie2.h @@ -155,6 +155,4 @@ struct bcma_drv_pcie2 { #define pcie2_set32(pcie2, offset, set) bcma_set32((pcie2)->core, offset, set) #define pcie2_mask32(pcie2, offset, mask) bcma_mask32((pcie2)->core, offset, mask) -void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2); - #endif /* LINUX_BCMA_DRIVER_PCIE2_H_ */ -- cgit v1.3-6-gb490 From 842a9ae08a25671db3d4f689eed68b4d64be15b5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 4 Mar 2015 12:54:21 +0200 Subject: bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi This extends the design in commit 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") with optional set of rules that are needed to meet the IEEE 802.11 and Hotspot 2.0 requirements for ProxyARP. The previously added BR_PROXYARP behavior is left as-is and a new BR_PROXYARP_WIFI alternative is added so that this behavior can be configured from user space when required. In addition, this enables proxyarp functionality for unicast ARP requests for both BR_PROXYARP and BR_PROXYARP_WIFI since it is possible to use unicast as well as broadcast for these frames. The key differences in functionality: BR_PROXYARP: - uses the flag on the bridge port on which the request frame was received to determine whether to reply - block bridge port flooding completely on ports that enable proxy ARP BR_PROXYARP_WIFI: - uses the flag on the bridge port to which the target device of the request belongs - block bridge port flooding selectively based on whether the proxyarp functionality replied Signed-off-by: Jouni Malinen Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + net/bridge/br_forward.c | 3 +++ net/bridge/br_input.c | 17 ++++++++++------- net/bridge/br_netlink.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 2 ++ 7 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index a57bca2ea97e..dad8b00beed2 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -44,6 +44,7 @@ struct br_ip_list { #define BR_PROMISC BIT(7) #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) +#define BR_PROXYARP_WIFI BIT(10) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index dfd0bb22e554..756436e1ce89 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -247,6 +247,7 @@ enum { IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ IFLA_BRPORT_PROXYARP, /* proxy ARP */ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f96933a823e3..1238fabff874 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -188,6 +188,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; + if ((p->flags & BR_PROXYARP_WIFI) && + BR_INPUT_SKB_CB(skb)->proxyarp_replied) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2aa7be3a847..052c5ebbc947 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,7 +60,7 @@ static int br_pass_frame_up(struct sk_buff *skb) } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, - u16 vid) + u16 vid, struct net_bridge_port *p) { struct net_device *dev = br->dev; struct neighbour *n; @@ -68,6 +68,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, u8 *arpptr, *sha; __be32 sip, tip; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + if (dev->flags & IFF_NOARP) return; @@ -105,9 +107,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } f = __br_fdb_get(br, n->ha, vid); - if (f) + if (f && ((p->flags & BR_PROXYARP) || + (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, sha, n->ha, sha); + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } neigh_release(n); } @@ -153,12 +158,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) { - if (IS_ENABLED(CONFIG_INET) && - p->flags & BR_PROXYARP && - skb->protocol == htons(ETH_P_ARP)) - br_do_proxy_arp(skb, br, vid); + if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) + br_do_proxy_arp(skb, br, vid, p); + if (is_broadcast_ether_addr(dest)) { skb2 = skb; unicast = false; } else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c72083968768..8bc6b67457dc 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -143,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || - nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP))) + nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, + !!(p->flags & BR_PROXYARP_WIFI))) return -EMSGSIZE; return 0; @@ -553,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index de0919975a25..c32e279c62f8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -305,6 +305,7 @@ struct br_input_skb_cb { #endif u16 frag_max_size; + bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2de5d91199e8..4905845a94e9 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); +BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_fast_leave, #endif &brport_attr_proxyarp, + &brport_attr_proxyarp_wifi, NULL }; -- cgit v1.3-6-gb490 From 4586f1bb911ce219a4bc1c2a9d6eee2e058b2b51 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:14 -0800 Subject: netdevice: add IPv4 fib add/del ops Add two new ndo ops for IPv4 fib offload support, add and del. Add uses modifiy semantics if fib entry already offloaded. Drivers implementing the new ndo ops will return err<0 if programming device fails, for example if device's tables are full. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 625c8d71511b..45413784a3b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,6 +768,8 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +struct fib_info; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); * Called to notify switch device port of bridge port STP * state change. + * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to add/modify IPv4 route to switch device. + * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1192,6 +1202,18 @@ struct net_device_ops { struct netdev_phys_item_id *psid); int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); + int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); + int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); #endif }; -- cgit v1.3-6-gb490 From d237baa1cbb3a2335357484c1d63a810a01947e2 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Thu, 5 Mar 2015 20:16:12 +0200 Subject: net/mlx4_core: Add basic elements for QCN Add device capability, firmware command opcode and etc prior elements needed for QCN suppprt. Disable SRIOV VF view/access for QCN is disabled. While here, remove a redundant offset definition into the QUERY_DEV_CAP mailbox. Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 13 +++++++++++-- include/linux/mlx4/cmd.h | 13 +++++++++++++ include/linux/mlx4/device.h | 3 ++- 4 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a681d7c0bb9f..20b3c7b21e63 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1499,6 +1499,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_ACCESS_REG_wrapper, }, + { + .opcode = MLX4_CMD_CONGESTION_CTRL_OPCODE, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, /* Native multicast commands are not available for guests */ { .opcode = MLX4_CMD_QP_ATTACH, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 5a21e5dc94cb..242bcee5d774 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -143,7 +143,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [18] = "More than 80 VFs support", [19] = "Performance optimized for limited rule configuration flow steering support", [20] = "Recoverable error events support", - [21] = "Port Remap support" + [21] = "Port Remap support", + [22] = "QCN support" }; int i; @@ -675,7 +676,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a -#define QUERY_DEV_CAP_ETH_PROT_CTRL_OFFSET 0x7a +#define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 @@ -777,6 +778,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); @@ -1149,6 +1153,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, DEV_CAP_EXT_2_FLAG_FSM); MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + /* turn off QCN for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + field &= 0xfe; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + return 0; } diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7b6d4e9ff603..7299e9548906 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -163,6 +163,9 @@ enum { MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64, + + /* Update and read QCN parameters */ + MLX4_CMD_CONGESTION_CTRL_OPCODE = 0x68, }; enum { @@ -233,6 +236,16 @@ struct mlx4_config_dev_params { u8 rx_csum_flags_port_2; }; +enum mlx4_en_congestion_control_algorithm { + MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT = 0, +}; + +enum mlx4_en_congestion_control_opmod { + MLX4_CONGESTION_CONTROL_GET_PARAMS, + MLX4_CONGESTION_CONTROL_GET_STATISTICS, + MLX4_CONGESTION_CONTROL_SET_PARAMS = 4, +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4ebff7e9d02..1cc54822b931 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -203,7 +203,8 @@ enum { MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, - MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21 + MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, + MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, }; enum { -- cgit v1.3-6-gb490 From 8bd63cf1a426e69bf4f611b08978f721e46c194f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:33 +0100 Subject: bridge: move mac header copying into br_netfilter The mac header only has to be copied back into the skb for fragments generated by ip_fragment(), which only happens for bridge forwarded packets with nf-call-iptables=1 && active nf_defrag. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 31 ------------------------------- net/bridge/br_forward.c | 4 +--- net/bridge/br_netfilter.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index c755e4971fa3..332ef8ab37e9 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -44,36 +44,6 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb) skb->protocol = htons(ETH_P_PPP_SES); } -/* Fill in the header for fragmented IP packets handled by - * the IPv4 connection tracking code. - * - * Only used in br_forward.c - */ -static inline int nf_bridge_copy_header(struct sk_buff *skb) -{ - int err; - unsigned int header_size; - - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return err; - - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return 0; -} - -static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) -{ - if (skb->nf_bridge && - skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)) - return nf_bridge_copy_header(skb); - return 0; -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -119,7 +89,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_maybe_copy_header(skb) (0) #define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f96933a823e3..32541d4f72e8 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* ip_fragment doesn't copy the MAC header */ - if (nf_bridge_maybe_copy_header(skb) || - !is_skb_forwardable(skb->dev, skb)) { + if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 0ee453fad3de..e5479112c4a3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -764,6 +764,33 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +static bool nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + unsigned int header_size; + + nf_bridge_update_protocol(skb); + header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + err = skb_cow_head(skb, header_size); + if (err) + return false; + + skb_copy_to_linear_data_offset(skb, -header_size, + skb->nf_bridge->data, header_size); + __skb_push(skb, nf_bridge_encap_header_len(skb)); + return true; +} + +static int br_nf_push_frag_xmit(struct sk_buff *skb) +{ + if (!nf_bridge_copy_header(skb)) { + kfree_skb(skb); + return 0; + } + + return br_dev_queue_push_xmit(skb); +} + static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; @@ -780,7 +807,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_dev_queue_push_xmit); + ret = ip_fragment(skb, br_nf_push_frag_xmit); } else ret = br_dev_queue_push_xmit(skb); -- cgit v1.3-6-gb490 From 4a9d2f200862683d6680d5565f30c126625afe65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:34 +0100 Subject: netfilter: bridge: move nf_bridge_update_protocol to where its used no need to keep it in a header file. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 8 -------- net/bridge/br_netfilter.c | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 332ef8ab37e9..dd580a9a1add 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -36,14 +36,6 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } -static inline void nf_bridge_update_protocol(struct sk_buff *skb) -{ - if (skb->nf_bridge->mask & BRNF_8021Q) - skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) - skb->protocol = htons(ETH_P_PPP_SES); -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index e5479112c4a3..5b3bceb3ee62 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -239,6 +239,14 @@ drop: return -1; } +static void nf_bridge_update_protocol(struct sk_buff *skb) +{ + if (skb->nf_bridge->mask & BRNF_8021Q) + skb->protocol = htons(ETH_P_8021Q); + else if (skb->nf_bridge->mask & BRNF_PPPoE) + skb->protocol = htons(ETH_P_PPP_SES); +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ -- cgit v1.3-6-gb490 From e5de75bf88858f5b3ab11e2504b86ec059f03102 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Mar 2015 12:30:12 +0100 Subject: netfilter: bridge: move DNAT helper to br_netfilter Only one caller, there is no need to keep this in a header. Move it to br_netfilter.c where this belongs to. Based on patch from Florian Westphal. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 12 ------------ net/bridge/br_device.c | 5 +---- net/bridge/br_netfilter.c | 32 ++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 5 +++++ 4 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index dd580a9a1add..bb39113ea596 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -44,18 +44,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) } int br_handle_frame_finish(struct sk_buff *skb); -/* Only used in br_device.c */ -static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - skb_pull(skb, ETH_HLEN); - nf_bridge->mask ^= BRNF_BRIDGED_DNAT; - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); - skb->dev = nf_bridge->physindev; - return br_handle_frame_finish(skb); -} /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ffd379db5938..294cbcc49263 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -36,13 +36,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) u16 vid = 0; rcu_read_lock(); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { - br_nf_pre_routing_finish_bridge_slow(skb); + if (br_nf_prerouting_finish_bridge(skb)) { rcu_read_unlock(); return NETDEV_TX_OK; } -#endif u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ef1fe281ca11..a8361c7cdf81 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -892,6 +892,38 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + skb_pull(skb, ETH_HLEN); + nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + skb->dev = nf_bridge->physindev; + br_handle_frame_finish(skb); +} + +int br_nf_prerouting_finish_bridge(struct sk_buff *skb) +{ + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge); + void br_netfilter_enable(void) { } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index de0919975a25..d63fc17fe4f4 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -764,10 +764,15 @@ static inline int br_vlan_enabled(struct net_bridge *br) /* br_netfilter.c */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +int br_nf_prerouting_finish_bridge(struct sk_buff *skb); int br_nf_core_init(void); void br_nf_core_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else +static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb) +{ + return 0; +} static inline int br_nf_core_init(void) { return 0; } static inline void br_nf_core_fini(void) {} #define br_netfilter_rtable_init(x) -- cgit v1.3-6-gb490 From aa836df958886e57ff0d43fb3d79d1af4aec0cc8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 14:31:20 -0700 Subject: net: core: add of_find_net_device_by_node() Add a helper function which allows getting the struct net_device pointer associated with a given struct device_node pointer. This is useful for instance for DSA Ethernet devices not backed by a platform_device, but a PCI device. Since we need to access net_class which is not accessible outside of net/core/net-sysfs.c, this helper function is also added here and gated with CONFIG_OF_NET. Network devices initialized with SET_NETDEV_DEV() are also taken into account by checking for dev->parent first and then falling back to checking the device pointer within struct net_device. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_net.h | 8 ++++++++ net/core/net-sysfs.c | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 34597c8c1a4c..9cd72aab76fe 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -9,8 +9,11 @@ #ifdef CONFIG_OF_NET #include + +struct net_device; extern int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np) { @@ -21,6 +24,11 @@ static inline const void *of_get_mac_address(struct device_node *np) { return NULL; } + +static inline struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + return NULL; +} #endif #endif /* __LINUX_OF_NET_H */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f2aa73bfb0e4..cf30620a88e1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -1374,6 +1375,30 @@ static struct class net_class = { .namespace = net_namespace, }; +#ifdef CONFIG_OF_NET +static int of_dev_node_match(struct device *dev, const void *data) +{ + int ret = 0; + + if (dev->parent) + ret = dev->parent->of_node == data; + + return ret == 0 ? dev->of_node == data : ret; +} + +struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + struct device *dev; + + dev = class_find_device(&net_class, NULL, np, of_dev_node_match); + if (!dev) + return NULL; + + return to_net_dev(dev); +} +EXPORT_SYMBOL(of_find_net_device_by_node); +#endif + /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -- cgit v1.3-6-gb490 From f8f2147150de303e814c0452075d467734d3544b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 9 Mar 2015 13:59:09 -0700 Subject: switchdev: add netlink flags to IPv4 FIB add op Pass in the netlink flags (NLM_F_*) into switchdev driver for IPv4 FIB add op to allow driver to 1) optimize hardware updates, 2) handle ip route prepend and append commands correctly. Suggested-by: Jamal Hadi Salim Suggested-by: Roopa Prabhu Signed-off-by: Scott Feldman Reviewed-by: Simon Horman Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 3 ++- include/linux/netdevice.h | 3 ++- include/net/switchdev.h | 6 ++++-- net/ipv4/fib_trie.c | 5 ++++- net/switchdev/switchdev.c | 7 +++++-- 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 65e140315a58..223348d8cc07 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4152,7 +4152,8 @@ static int rocker_port_switch_port_stp_update(struct net_device *dev, u8 state) static int rocker_port_switch_fib_ipv4_add(struct net_device *dev, __be32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { struct rocker_port *rocker_port = netdev_priv(dev); int flags = 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45413784a3b1..1354ae83efc8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1035,7 +1035,7 @@ struct fib_info; * state change. * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 tb_id); + * u8 tos, u8 type, u32 nlflags, u32 tb_id); * Called to add/modify IPv4 route to switch device. * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, * int dst_len, struct fib_info *fi, @@ -1207,6 +1207,7 @@ struct net_device_ops { int dst_len, struct fib_info *fi, u8 tos, u8 type, + u32 nlflags, u32 tb_id); int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, __be32 dst, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 933fac410a7a..1a9382febcc3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -1,6 +1,7 @@ /* * include/net/switchdev.h - Switch device API * Copyright (c) 2014 Jiri Pirko + * Copyright (c) 2014-2015 Scott Feldman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,7 +53,7 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); + u8 tos, u8 type, u32 nlflags, u32 tb_id); int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id); void netdev_switch_fib_ipv4_abort(struct fib_info *fi); @@ -117,7 +118,8 @@ static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device * static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 90955455884e..fcfa9825a816 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1155,6 +1155,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) err = netdev_switch_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, cfg->fc_type, + cfg->fc_nlflags, tb->tb_id); if (err) { netdev_switch_fib_ipv4_abort(fi); @@ -1201,7 +1202,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) /* (Optionally) offload fib entry to switch hardware. */ err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, - cfg->fc_type, tb->tb_id); + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); if (err) { netdev_switch_fib_ipv4_abort(fi); goto out_free_new_fa; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index aba6aa2656d8..8cf42a69baf4 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1,6 +1,7 @@ /* * net/switchdev/switchdev.c - Switch device API * Copyright (c) 2014 Jiri Pirko + * Copyright (c) 2014-2015 Scott Feldman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -294,12 +295,13 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) * @fi: route FIB info structure * @tos: route TOS * @type: route type + * @nlflags: netlink flags passed in (NLM_F_*) * @tb_id: route table ID * * Add IPv4 route entry to switch device. */ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + u8 tos, u8 type, u32 nlflags, u32 tb_id) { struct net_device *dev; const struct net_device_ops *ops; @@ -324,7 +326,8 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (ops->ndo_switch_fib_ipv4_add) { err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len, - fi, tos, type, tb_id); + fi, tos, type, nlflags, + tb_id); if (!err) fi->fib_flags |= RTNH_F_EXTERNAL; } -- cgit v1.3-6-gb490 From 59e33c2b021322db92ca27c4d9958e57630443b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 15:44:13 -0700 Subject: net: phy: bcm7xxx: add alternate id for 7439 BCM7439 has an alternate PHY OUI: 0xae025080 which is to be found in some variants of this chip. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 1 + include/linux/brcmphy.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 974ec4515269..64c74c6a4828 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -396,6 +396,7 @@ static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), + BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), { .phy_id = PHY_ID_BCM7425, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7ccd928cc1f2..cab606617522 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -19,6 +19,7 @@ #define PHY_ID_BCM7425 0x03625e60 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 +#define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 #define PHY_BCM_OUI_MASK 0xfffffc00 -- cgit v1.3-6-gb490 From 491da2a477077357c8206a601559e2ea58f224db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 07:15:52 -0700 Subject: net: constify sock_diag_check_cookie() sock_diag_check_cookie() second parameter is constant Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 +- net/core/sock_diag.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 46cca4c06848..b5ad7d35a636 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -19,7 +19,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -int sock_diag_check_cookie(void *sk, __u32 *cookie); +int sock_diag_check_cookie(void *sk, const __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index ad704c757bb4..96e70ee05a8d 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -13,7 +13,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); -int sock_diag_check_cookie(void *sk, __u32 *cookie) +int sock_diag_check_cookie(void *sk, const __u32 *cookie) { if ((cookie[0] != INET_DIAG_NOCOOKIE || cookie[1] != INET_DIAG_NOCOOKIE) && -- cgit v1.3-6-gb490 From 34160ea3f9c96b5ae71a11459f9b9f6c298b8930 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 07:15:54 -0700 Subject: inet_diag: add const to inet_diag_req_v2 diag dumpers should not modify the request. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 43 ++++++++++++++++++++++--------------------- net/dccp/diag.c | 7 ++++--- net/ipv4/inet_diag.c | 22 +++++++++++----------- net/ipv4/tcp_diag.c | 4 ++-- net/ipv4/udp_diag.c | 22 +++++++++++++--------- 5 files changed, 52 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 46da02410a09..ac48b10c9395 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -11,33 +11,34 @@ struct sk_buff; struct netlink_callback; struct inet_diag_handler { - void (*dump)(struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - struct nlattr *bc); - - int (*dump_one)(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req); - - void (*idiag_get_info)(struct sock *sk, - struct inet_diag_msg *r, - void *info); - __u16 idiag_type; + void (*dump)(struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc); + + int (*dump_one)(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req); + + void (*idiag_get_info)(struct sock *sk, + struct inet_diag_msg *r, + void *info); + __u16 idiag_type; }; struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + struct sk_buff *skb, const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *r, - struct nlattr *bc); + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req); + struct sk_buff *in_skb, const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 028fc43aacbd..5a45f8de5d99 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -49,13 +49,14 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); } -static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) +static int dccp_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cd261f6e3abb..ac3bfb458afd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -71,7 +71,7 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) } int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) @@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *req, + const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) @@ -223,7 +223,7 @@ static int inet_csk_diag_fill(struct sock *sk, static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct inet_diag_req_v2 *req, + const struct inet_diag_req_v2 *req, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -277,7 +277,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) @@ -293,7 +293,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); struct sk_buff *rep; @@ -358,7 +358,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; int err; @@ -626,7 +626,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) @@ -667,7 +667,7 @@ static void twsk_build_assert(void) static int inet_twsk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { twsk_build_assert(); @@ -770,7 +770,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, const struct nlattr *bc) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -842,7 +842,7 @@ out: void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { struct net *net = sock_net(skb->sk); int i, num, s_i, s_num; @@ -978,7 +978,7 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, + const struct inet_diag_req_v2 *r, struct nlattr *bc) { const struct inet_diag_handler *handler; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0d73f9ddb55b..86dc119a3815 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,13 +34,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 4a000f1dd757..2dbfc1f1f7b3 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -18,8 +18,9 @@ #include static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *req, - struct nlattr *bc) + struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + struct nlattr *bc) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -31,7 +32,8 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) { int err = -EINVAL; struct sock *sk; @@ -90,8 +92,9 @@ out_nosk: return err; } -static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) +static void udp_dump(struct udp_table *table, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); @@ -144,13 +147,13 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, struct nlattr *bc) { udp_dump(&udp_table, skb, cb, r, bc); } static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return udp_dump_one(&udp_table, in_skb, nlh, req); } @@ -170,13 +173,14 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r, + struct nlattr *bc) { udp_dump(&udplite_table, skb, cb, r, bc); } static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + const struct inet_diag_req_v2 *req) { return udp_dump_one(&udplite_table, in_skb, nlh, req); } -- cgit v1.3-6-gb490 From 988dfbd795cf08b00576c1ced4210281b2bccffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Mar 2015 09:27:55 +1100 Subject: rhashtable: Move hash_rnd into bucket_table Currently hash_rnd is a parameter that users can set. However, no existing users set this parameter. It is also something that people are unlikely to want to set directly since it's just a random number. In preparation for allowing the reseeding/rehashing of rhashtable, this patch moves hash_rnd into bucket_table so that it's now an internal state rather than a parameter. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++-- lib/rhashtable.c | 24 +++++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d438eeb08bff..5ef8ea551556 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,12 +49,14 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets */ struct bucket_table { size_t size; + u32 hash_rnd; unsigned int locks_mask; spinlock_t *locks; @@ -72,7 +74,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking * @nulls_base: Base value to generate nulls marker @@ -85,7 +86,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - u32 hash_rnd; size_t max_shift; size_t min_shift; u32 nulls_base; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b5344ef4c684..ba15dceee27f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -66,25 +66,28 @@ static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) return hash & (tbl->size - 1); } -static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) +static u32 obj_raw_hashfn(struct rhashtable *ht, const void *ptr) { + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); u32 hash; if (unlikely(!ht->p.key_len)) - hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + hash = ht->p.obj_hashfn(ptr, tbl->hash_rnd); else hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, - ht->p.hash_rnd); + tbl->hash_rnd); return hash >> HASH_RESERVED_SPACE; } static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) { - return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE; + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + return ht->p.hashfn(key, len, tbl->hash_rnd) >> HASH_RESERVED_SPACE; } -static u32 head_hashfn(const struct rhashtable *ht, +static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) { @@ -92,7 +95,7 @@ static u32 head_hashfn(const struct rhashtable *ht, } #ifdef CONFIG_PROVE_LOCKING -static void debug_dump_buckets(const struct rhashtable *ht, +static void debug_dump_buckets(struct rhashtable *ht, const struct bucket_table *tbl) { struct rhash_head *he; @@ -385,6 +388,8 @@ int rhashtable_expand(struct rhashtable *ht) if (new_tbl == NULL) return -ENOMEM; + new_tbl->hash_rnd = old_tbl->hash_rnd; + atomic_inc(&ht->shift); /* Make insertions go into the new, empty table right away. Deletions @@ -476,6 +481,8 @@ int rhashtable_shrink(struct rhashtable *ht) if (new_tbl == NULL) return -ENOMEM; + new_tbl->hash_rnd = tbl->hash_rnd; + rcu_assign_pointer(ht->future_tbl, new_tbl); synchronize_rcu(); @@ -1099,14 +1106,13 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (tbl == NULL) return -ENOMEM; + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + atomic_set(&ht->nelems, 0); atomic_set(&ht->shift, ilog2(tbl->size)); RCU_INIT_POINTER(ht->tbl, tbl); RCU_INIT_POINTER(ht->future_tbl, tbl); - if (!ht->p.hash_rnd) - get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); - INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; -- cgit v1.3-6-gb490 From 33d6737761ea425d43f3b6e4561573a4020982f2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Mar 2015 16:57:11 -0700 Subject: of: mdio: export of_mdio_parse_addr Export of_mdio_parse_addr() which allows parsing a given Ethernet PHY node MDIO address, verify it is within the allowed range, and return its value. This is going to be useful for the DSA code which needs to deal with multiple layers of MDIO buses. Signed-off-by: Florian Fainelli Acked-by: Rob Herring Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 3 ++- include/linux/of_mdio.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 1bd43053b8c7..0c064485d1c2 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -88,7 +88,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi return 0; } -static int of_mdio_parse_addr(struct device *dev, const struct device_node *np) +int of_mdio_parse_addr(struct device *dev, const struct device_node *np) { u32 addr; int ret; @@ -108,6 +108,7 @@ static int of_mdio_parse_addr(struct device *dev, const struct device_node *np) return addr; } +EXPORT_SYMBOL(of_mdio_parse_addr); /** * of_mdiobus_register - Register mii_bus and create PHYs from the device tree diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index d449018d0726..8f2237eb3485 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -24,6 +24,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, phy_interface_t iface); extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); +extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) @@ -60,6 +61,12 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } + +static inline int of_mdio_parse_addr(struct device *dev, + const struct device_node *np) +{ + return -ENOSYS; +} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) -- cgit v1.3-6-gb490 From 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2015 18:53:14 -0700 Subject: net: add real socket cookies A long standing problem in netlink socket dumps is the use of kernel socket addresses as cookies. 1) It is a security concern. 2) Sockets can be reused quite quickly, so there is no guarantee a cookie is used once and identify a flow. 3) request sock, establish sock, and timewait socks for a given flow have different cookies. Part of our effort to bring better TCP statistics requires to switch to a different allocator. In this patch, I chose to use a per network namespace 64bit generator, and to use it only in the case a socket needs to be dumped to netlink. (This might be refined later if needed) Note that I tried to carry cookies from request sock, to establish sock, then timewait sockets. Signed-off-by: Eric Dumazet Cc: Eric Salo Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 4 ++-- include/net/inet_sock.h | 2 ++ include/net/inet_timewait_sock.h | 1 + include/net/net_namespace.h | 2 ++ include/net/sock.h | 3 +++ net/core/sock.c | 1 + net/core/sock_diag.c | 37 +++++++++++++++++++++++++++---------- net/dccp/ipv4.c | 2 ++ net/ipv4/inet_connection_sock.c | 2 ++ net/ipv4/inet_diag.c | 14 +++++++++----- net/ipv4/inet_timewait_sock.c | 1 + net/ipv4/syncookies.c | 1 + net/ipv4/tcp_input.c | 2 ++ 13 files changed, 55 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index b5ad7d35a636..083ac388098e 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -int sock_diag_check_cookie(void *sk, const __u32 *cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie); +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index eb16c7beed1e..e565afdc14ad 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -77,6 +77,8 @@ struct inet_request_sock { #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr #define ir_iif req.__req_common.skc_bound_dev_if +#define ir_cookie req.__req_common.skc_cookie +#define ireq_net req.__req_common.skc_net kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6c566034e26d..b7ce1003c429 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -122,6 +122,7 @@ struct inet_timewait_sock { #define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr #define tw_dport __tw_common.skc_dport #define tw_num __tw_common.skc_num +#define tw_cookie __tw_common.skc_cookie int tw_timeout; volatile unsigned char tw_substate; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2cb9acb618e9..e086f4030dd2 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -56,6 +56,8 @@ struct net { #endif spinlock_t rules_mod_lock; + atomic64_t cookie_gen; + struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ diff --git a/include/net/sock.h b/include/net/sock.h index 250822cc1e02..d996c633bec2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -199,6 +199,8 @@ struct sock_common { struct in6_addr skc_v6_rcv_saddr; #endif + atomic64_t skc_cookie; + /* * fields between dontcopy_begin/dontcopy_end * are not copied in sock_copy() @@ -329,6 +331,7 @@ struct sock { #define sk_net __sk_common.skc_net #define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr +#define sk_cookie __sk_common.skc_cookie socket_lock_t sk_lock; struct sk_buff_head sk_receive_queue; diff --git a/net/core/sock.c b/net/core/sock.c index 726e1f99aa8d..a9a9c2ff9260 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); + atomic64_set(&newsk->sk_cookie, 0); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 96e70ee05a8d..74dddf84adcd 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); -int sock_diag_check_cookie(void *sk, const __u32 *cookie) +static u64 sock_gen_cookie(struct sock *sk) { - if ((cookie[0] != INET_DIAG_NOCOOKIE || - cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) - return -ESTALE; - else + while (1) { + u64 res = atomic64_read(&sk->sk_cookie); + + if (res) + return res; + res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + atomic64_cmpxchg(&sk->sk_cookie, 0, res); + } +} + +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) +{ + u64 res; + + if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE) return 0; + + res = sock_gen_cookie(sk); + if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1]) + return -ESTALE; + + return 0; } EXPORT_SYMBOL_GPL(sock_diag_check_cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie) +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie) { - cookie[0] = (u32)(unsigned long)sk; - cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + u64 res = sock_gen_cookie(sk); + + cookie[0] = (u32)res; + cookie[1] = (u32)(res >> 32); } EXPORT_SYMBOL_GPL(sock_diag_save_cookie); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e45b968613a4..207281ae3536 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->ireq_net = sock_net(sk); + atomic64_set(&ireq->ir_cookie, 0); /* * Step 3: Process LISTEN state diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..34581f928afa 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newsk->sk_write_space = sk_stream_write_space; newsk->sk_mark = inet_rsk(req)->ir_mark; + atomic64_set(&newsk->sk_cookie, + atomic64_read(&inet_rsk(req)->ir_cookie)); newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ac3bfb458afd..29317ff4a007 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk, user_ns, portid, seq, nlmsg_flags, unlh); } -static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, +static int inet_twsk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { + struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; s32 tmo; @@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_retrans = 0; r->id.idiag_if = tw->tw_bound_dev_if; - sock_diag_save_cookie(tw, r->id.idiag_cookie); + sock_diag_save_cookie(sk, r->id.idiag_cookie); r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; @@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, + return inet_twsk_diag_fill(sk, skb, r, portid, seq, nlmsg_flags, unlh); return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, @@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_twsk_diag_fill(inet_twsk(sk), skb, r, + return inet_twsk_diag_fill(sk, skb, r, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_retrans = req->num_retrans; r->id.idiag_if = sk->sk_bound_dev_if; - sock_diag_save_cookie(req, r->id.idiag_cookie); + + BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != + offsetof(struct sock, sk_cookie)); + sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie); tmo = req->expires - jiffies; if (tmo < 0) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 6d592f8555fb..2bd980526631 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_ipv6only = 0; tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; + atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, hold_net(sock_net(sk))); /* * Because we use RCU lookups, we should not set tw_refcnt diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 45fe60c5238e..ece31b426013 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->listener = NULL; + ireq->ireq_net = sock_net(sk); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb4cf8b8e121..d7045f5f6ebf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); + inet_rsk(req)->ireq_net = sock_net(sk); + atomic64_set(&inet_rsk(req)->ir_cookie, 0); af_ops->init_req(req, sk, skb); -- cgit v1.3-6-gb490 From efd7ef1c1929d7a0329d4349252863c04d6f1729 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:04:08 -0500 Subject: net: Kill hold_net release_net hold_net and release_net were an idea that turned out to be useless. The code has been disabled since 2008. Kill the code it is long past due. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- include/net/fib_rules.h | 9 +-------- include/net/net_namespace.h | 29 ----------------------------- include/net/sock.h | 2 +- net/core/dev.c | 2 -- net/core/fib_rules.c | 17 +++-------------- net/core/neighbour.c | 9 ++------- net/core/net_namespace.c | 11 ----------- net/core/sock.c | 1 - net/ipv4/fib_semantics.c | 3 +-- net/ipv4/inet_hashtables.c | 3 +-- net/ipv4/inet_timewait_sock.c | 3 +-- net/ipv6/addrlabel.c | 5 +---- net/ipv6/ip6_flowlabel.c | 3 +-- net/openvswitch/datapath.c | 4 +--- 15 files changed, 14 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1354ae83efc8..cede40d9cac9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1864,8 +1864,7 @@ static inline void dev_net_set(struct net_device *dev, struct net *net) { #ifdef CONFIG_NET_NS - release_net(dev->nd_net); - dev->nd_net = hold_net(net); + dev->nd_net = net; #endif } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 88d2ae526961..6d67383a5114 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -95,17 +95,10 @@ static inline void fib_rule_get(struct fib_rule *rule) atomic_inc(&rule->refcnt); } -static inline void fib_rule_put_rcu(struct rcu_head *head) -{ - struct fib_rule *rule = container_of(head, struct fib_rule, rcu); - release_net(rule->fr_net); - kfree(rule); -} - static inline void fib_rule_put(struct fib_rule *rule) { if (atomic_dec_and_test(&rule->refcnt)) - call_rcu(&rule->rcu, fib_rule_put_rcu); + kfree_rcu(rule, rcu); } static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e086f4030dd2..fab51ceeabf3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -49,11 +49,6 @@ struct net { atomic_t count; /* To decided when the network * namespace should be shut down. */ -#ifdef NETNS_REFCNT_DEBUG - atomic_t use_count; /* To track references we - * destroy on demand - */ -#endif spinlock_t rules_mod_lock; atomic64_t cookie_gen; @@ -236,30 +231,6 @@ int net_eq(const struct net *net1, const struct net *net2) #endif -#ifdef NETNS_REFCNT_DEBUG -static inline struct net *hold_net(struct net *net) -{ - if (net) - atomic_inc(&net->use_count); - return net; -} - -static inline void release_net(struct net *net) -{ - if (net) - atomic_dec(&net->use_count); -} -#else -static inline struct net *hold_net(struct net *net) -{ - return net; -} - -static inline void release_net(struct net *net) -{ -} -#endif - #ifdef CONFIG_NET_NS static inline void write_pnet(struct net **pnet, struct net *net) diff --git a/include/net/sock.h b/include/net/sock.h index d996c633bec2..95b2c1c220f9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2204,7 +2204,7 @@ static inline void sk_change_net(struct sock *sk, struct net *net) if (!net_eq(current_net, net)) { put_net(current_net); - sock_net_set(sk, hold_net(net)); + sock_net_set(sk, net); } } diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..39fe369b46ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6841,8 +6841,6 @@ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - release_net(dev_net(dev)); - netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index b55677fed1c8..68ea6950cad1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; - r->fr_net = hold_net(ops->fro_net); + r->fr_net = ops->fro_net; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -116,7 +116,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops) if (ops->family == o->family) goto errout; - hold_net(net); list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: @@ -160,15 +159,6 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } } -static void fib_rules_put_rcu(struct rcu_head *head) -{ - struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); - struct net *net = ops->fro_net; - - release_net(net); - kfree(ops); -} - void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -178,7 +168,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); - call_rcu(&ops->rcu, fib_rules_put_rcu); + kfree_rcu(ops, rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -303,7 +293,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) err = -ENOMEM; goto errout; } - rule->fr_net = hold_net(net); + rule->fr_net = net; if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); @@ -423,7 +413,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) return 0; errout_free: - release_net(rule->fr_net); kfree(rule); errout: rules_ops_put(ops); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ad07990e943d..0e8b32efc031 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -591,7 +591,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; - write_pnet(&n->net, hold_net(net)); + write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -600,7 +600,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (tbl->pconstructor && tbl->pconstructor(n)) { if (dev) dev_put(dev); - release_net(net); kfree(n); n = NULL; goto out; @@ -634,7 +633,6 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); return 0; } @@ -657,7 +655,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(pneigh_net(n)); kfree(n); continue; } @@ -1428,11 +1425,10 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; - write_pnet(&p->net, hold_net(net)); + write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - release_net(net); dev_put(dev); kfree(p); return NULL; @@ -1472,7 +1468,6 @@ EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { - release_net(neigh_parms_net(parms)); kfree(parms); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb5290b8c428..e5e96b0f6717 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -236,10 +236,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); -#ifdef NETNS_REFCNT_DEBUG - atomic_set(&net->use_count, 0); -#endif - list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) @@ -294,13 +290,6 @@ out_free: static void net_free(struct net *net) { -#ifdef NETNS_REFCNT_DEBUG - if (unlikely(atomic_read(&net->use_count) != 0)) { - pr_emerg("network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } -#endif kfree(rcu_access_pointer(net->gen)); kmem_cache_free(net_cachep, net); } diff --git a/net/core/sock.c b/net/core/sock.c index a9a9c2ff9260..c8842f279f7a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1455,7 +1455,6 @@ void sk_release_kernel(struct sock *sk) sock_hold(sk); sock_release(sk->sk_socket); - release_net(sock_net(sk)); sock_net_set(sk, get_net(&init_net)); sock_put(sk); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c6d267442dac..66c1e4fbf884 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -213,7 +213,6 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - release_net(fi->fib_net); if (fi->fib_metrics != (u32 *) dst_default_metrics) kfree(fi->fib_metrics); kfree(fi); @@ -814,7 +813,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } else fi->fib_metrics = (u32 *) dst_default_metrics; - fi->fib_net = hold_net(net); + fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9111a4e22155..f6a12b97d12b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -61,7 +61,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - write_pnet(&tb->ib_net, hold_net(net)); + write_pnet(&tb->ib_net, net); tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; @@ -79,7 +79,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2bd980526631..86ebf020925b 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -98,7 +98,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw) #ifdef SOCK_REFCNT_DEBUG pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - release_net(twsk_net(tw)); kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } @@ -196,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); - twsk_net_set(tw, hold_net(sock_net(sk))); + twsk_net_set(tw, sock_net(sk)); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index e43e79d0a612..59c793040498 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -129,9 +129,6 @@ static const __net_initconst struct ip6addrlbl_init_table /* Object management */ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) { -#ifdef CONFIG_NET_NS - release_net(p->lbl_net); -#endif kfree(p); } @@ -241,7 +238,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->label = label; INIT_HLIST_NODE(&newp->list); #ifdef CONFIG_NET_NS - newp->lbl_net = hold_net(net); + newp->lbl_net = net; #endif atomic_set(&newp->refcnt, 1); return newp; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f45d6db50a45..457303886fd4 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -100,7 +100,6 @@ static void fl_free(struct ip6_flowlabel *fl) if (fl) { if (fl->share == IPV6_FL_S_PROCESS) put_pid(fl->owner.pid); - release_net(fl->fl_net); kfree(fl->opt); kfree_rcu(fl, rcu); } @@ -403,7 +402,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, } } - fl->fl_net = hold_net(net); + fl->fl_net = net; fl->expires = jiffies; err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); if (err) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 5bae7243c577..096c6276e6b9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -203,7 +203,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); - release_net(ovs_dp_get_net(dp)); kfree(dp->ports); kfree(dp); } @@ -1501,7 +1500,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (dp == NULL) goto err_free_reply; - ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); + ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); @@ -1575,7 +1574,6 @@ err_destroy_percpu: err_destroy_table: ovs_flow_tbl_destroy(&dp->table); err_free_dp: - release_net(ovs_dp_get_net(dp)); kfree(dp); err_free_reply: kfree_skb(reply); -- cgit v1.3-6-gb490 From 0c5c9fb55106333e773de8c9dd321fa8240caeb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:06:44 -0500 Subject: net: Introduce possible_net_t Having to say > #ifdef CONFIG_NET_NS > struct net *net; > #endif in structures is a little bit wordy and a little bit error prone. Instead it is possible to say: > typedef struct { > #ifdef CONFIG_NET_NS > struct net *net; > #endif > } possible_net_t; And then in a header say: > possible_net_t net; Which is cleaner and easier to use and easier to test, as the possible_net_t is always there no matter what the compile options. Further this allows read_pnet and write_pnet to be functions in all cases which is better at catching typos. This change adds possible_net_t, updates the definitions of read_pnet and write_pnet, updates optional struct net * variables that write_pnet uses on to have the type possible_net_t, and finally fixes up the b0rked users of read_pnet and write_pnet. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++------ include/net/cfg80211.h | 4 +--- include/net/genetlink.h | 4 +--- include/net/inet_hashtables.h | 4 +--- include/net/ip_vs.h | 8 ++++---- include/net/neighbour.h | 8 ++------ include/net/net_namespace.h | 23 +++++++++++++---------- include/net/netfilter/nf_conntrack.h | 5 ++--- include/net/sock.h | 4 +--- include/net/xfrm.h | 8 ++------ net/9p/trans_fd.c | 4 ++-- net/ipv4/ipmr.c | 4 +--- net/ipv6/addrlabel.c | 8 ++------ net/ipv6/ip6mr.c | 4 +--- net/openvswitch/datapath.h | 4 +--- net/packet/internal.h | 4 +--- 16 files changed, 37 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cede40d9cac9..ddab1a2a07a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1721,9 +1721,7 @@ struct net_device { struct netpoll_info __rcu *npinfo; #endif -#ifdef CONFIG_NET_NS - struct net *nd_net; -#endif + possible_net_t nd_net; /* mid-layer private */ union { @@ -1863,9 +1861,7 @@ struct net *dev_net(const struct net_device *dev) static inline void dev_net_set(struct net_device *dev, struct net *net) { -#ifdef CONFIG_NET_NS - dev->nd_net = net; -#endif + write_pnet(&dev->nd_net, net); } static inline bool netdev_uses_dsa(struct net_device *dev) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 64e09e1e8099..f977abec07f6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3183,10 +3183,8 @@ struct wiphy { const struct ieee80211_ht_cap *ht_capa_mod_mask; const struct ieee80211_vht_cap *vht_capa_mod_mask; -#ifdef CONFIG_NET_NS /* the network namespace this phy lives in currently */ - struct net *_net; -#endif + possible_net_t _net; #ifdef CONFIG_CFG80211_WEXT const struct iw_handler_def *wext; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 0574abd3db86..a9af1cc8c1bc 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -92,9 +92,7 @@ struct genl_info { struct genlmsghdr * genlhdr; void * userhdr; struct nlattr ** attrs; -#ifdef CONFIG_NET_NS - struct net * _net; -#endif + possible_net_t _net; void * user_ptr[2]; struct sock * dst_sk; }; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index dd1950a7e273..bcd64756e5fe 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -76,9 +76,7 @@ struct inet_ehash_bucket { * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { -#ifdef CONFIG_NET_NS - struct net *ib_net; -#endif + possible_net_t ib_net; unsigned short port; signed char fastreuse; signed char fastreuseport; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 20fd23398537..4e3731ee4eac 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -47,13 +47,13 @@ static inline struct net *skb_net(const struct sk_buff *skb) * Start with the most likely hit * End with BUG */ - if (likely(skb->dev && skb->dev->nd_net)) + if (likely(skb->dev && dev_net(skb->dev))) return dev_net(skb->dev); if (skb_dst(skb) && skb_dst(skb)->dev) return dev_net(skb_dst(skb)->dev); WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", __func__, __LINE__); - if (likely(skb->sk && skb->sk->sk_net)) + if (likely(skb->sk && sock_net(skb->sk))) return sock_net(skb->sk); pr_err("There is no net ptr to find in the skb in %s() line:%d\n", __func__, __LINE__); @@ -71,11 +71,11 @@ static inline struct net *skb_sknet(const struct sk_buff *skb) #ifdef CONFIG_NET_NS #ifdef CONFIG_IP_VS_DEBUG /* Start with the most likely hit */ - if (likely(skb->sk && skb->sk->sk_net)) + if (likely(skb->sk && sock_net(skb->sk))) return sock_net(skb->sk); WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", __func__, __LINE__); - if (likely(skb->dev && skb->dev->nd_net)) + if (likely(skb->dev && dev_net(skb->dev))) return dev_net(skb->dev); pr_err("There is no net ptr to find in the skb in %s() line:%d\n", __func__, __LINE__); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index d48b8ec8b5f4..e7bdf5170802 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -65,9 +65,7 @@ enum { }; struct neigh_parms { -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; struct net_device *dev; struct list_head list; int (*neigh_setup)(struct neighbour *); @@ -167,9 +165,7 @@ struct neigh_ops { struct pneigh_entry { struct pneigh_entry *next; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; struct net_device *dev; u8 flags; u8 key[0]; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fab51ceeabf3..f733656404de 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -231,24 +231,27 @@ int net_eq(const struct net *net1, const struct net *net2) #endif +typedef struct { #ifdef CONFIG_NET_NS + struct net *net; +#endif +} possible_net_t; -static inline void write_pnet(struct net **pnet, struct net *net) +static inline void write_pnet(possible_net_t *pnet, struct net *net) { - *pnet = net; +#ifdef CONFIG_NET_NS + pnet->net = net; +#endif } -static inline struct net *read_pnet(struct net * const *pnet) +static inline struct net *read_pnet(const possible_net_t *pnet) { - return *pnet; -} - +#ifdef CONFIG_NET_NS + return pnet->net; #else - -#define write_pnet(pnet, net) do { (void)(net);} while (0) -#define read_pnet(pnet) (&init_net) - + return &init_net; #endif +} #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 74f271a172dd..095433b8a8b0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -95,9 +95,8 @@ struct nf_conn { /* Timer function; drops refcnt when it goes off. */ struct timer_list timeout; -#ifdef CONFIG_NET_NS - struct net *ct_net; -#endif + possible_net_t ct_net; + /* all members below initialized via memset */ u8 __nfct_init_offset[0]; diff --git a/include/net/sock.h b/include/net/sock.h index 95b2c1c220f9..9411c3421dd3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -190,9 +190,7 @@ struct sock_common { struct hlist_nulls_node skc_portaddr_node; }; struct proto *skc_prot; -#ifdef CONFIG_NET_NS - struct net *skc_net; -#endif + possible_net_t skc_net; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr skc_v6_daddr; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dc4865e90fe4..d0ac7d7be8a7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -126,9 +126,7 @@ struct xfrm_state_walk { /* Full description of state of transformer. */ struct xfrm_state { -#ifdef CONFIG_NET_NS - struct net *xs_net; -#endif + possible_net_t xs_net; union { struct hlist_node gclist; struct hlist_node bydst; @@ -522,9 +520,7 @@ struct xfrm_policy_queue { }; struct xfrm_policy { -#ifdef CONFIG_NET_NS - struct net *xp_net; -#endif + possible_net_t xp_net; struct hlist_node bydst; struct hlist_node byidx; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 80d08f6664cb..3e3d82d8ff70 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -940,7 +940,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", @@ -988,7 +988,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d78427652d2..5b188832800f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -73,9 +73,7 @@ struct mr_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock __rcu *mroute_sk; struct timer_list ipmr_expire_timer; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 59c793040498..3cc50e2d3bf5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -29,9 +29,7 @@ * Policy Table */ struct ip6addrlbl_entry { -#ifdef CONFIG_NET_NS - struct net *lbl_net; -#endif + possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; @@ -237,9 +235,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); -#ifdef CONFIG_NET_NS - newp->lbl_net = net; -#endif + write_pnet(&newp->lbl_net, net); atomic_set(&newp->refcnt, 1); return newp; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 34b682617f50..4b9315aa273e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -56,9 +56,7 @@ struct mr6_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock *mroute6_sk; struct timer_list ipmr_expire_timer; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 3ece94563079..4ec4a480b147 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -84,10 +84,8 @@ struct datapath { /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; -#ifdef CONFIG_NET_NS /* Network namespace ref. */ - struct net *net; -#endif + possible_net_t net; u32 user_features; }; diff --git a/net/packet/internal.h b/net/packet/internal.h index cdddf6a30399..fe6e20caea1d 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -74,9 +74,7 @@ extern struct mutex fanout_mutex; #define PACKET_FANOUT_MAX 256 struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; unsigned int num_members; u16 id; u8 type; -- cgit v1.3-6-gb490 From 80f1d68ccba70b1060c9c7360ca83da430f66bed Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Mar 2015 17:21:42 +0100 Subject: ebpf: verifier: check that call reg with ARG_ANYTHING is initialized I noticed that a helper function with argument type ARG_ANYTHING does not need to have an initialized value (register). This can worst case lead to unintented stack memory leakage in future helper functions if they are not carefully designed, or unintended application behaviour in case the application developer was not careful enough to match a correct helper function signature in the API. The underlying issue is that ARG_ANYTHING should actually be split into two different semantics: 1) ARG_DONTCARE for function arguments that the helper function does not care about (in other words: the default for unused function arguments), and 2) ARG_ANYTHING that is an argument actually being used by a helper function and *guaranteed* to be an initialized register. The current risk is low: ARG_ANYTHING is only used for the 'flags' argument (r4) in bpf_map_update_elem() that internally does strict checking. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +++- kernel/bpf/verifier.c | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a884f5a2c503..80f2e0fc3d02 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -44,7 +44,7 @@ struct bpf_map_type_list { /* function argument constraints */ enum bpf_arg_type { - ARG_ANYTHING = 0, /* any argument is ok */ + ARG_DONTCARE = 0, /* unused argument in helper function */ /* the following constraints used to prototype * bpf_map_lookup/update/delete_elem() functions @@ -58,6 +58,8 @@ enum bpf_arg_type { */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + + ARG_ANYTHING, /* any (initialized) argument is ok */ }; /* type of values returned from helper functions */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bdf4192a889b..e6b522496250 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -755,7 +755,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, enum bpf_reg_type expected_type; int err = 0; - if (arg_type == ARG_ANYTHING) + if (arg_type == ARG_DONTCARE) return 0; if (reg->type == NOT_INIT) { @@ -763,6 +763,9 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } + if (arg_type == ARG_ANYTHING) + return 0; + if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; -- cgit v1.3-6-gb490 From a5b6846f9e1a080493210013385c28faecee36f0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Mar 2015 15:28:40 +0100 Subject: rhashtable: kill ht->shift atomic operations Commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") changed ht->shift to be atomic, which is actually unnecessary. Instead of leaving the current shift in the core rhashtable structure, it can be cached inside the individual bucket tables. There, it will only be initialized once during a new table allocation in the shrink/expansion slow path, and from then onward it stays immutable for the rest of the bucket table liftime. That allows shift to be non-atomic. The patch also moves hash_rnd management into the table setup. The rhashtable structure now consumes 3 instead of 4 cachelines. Signed-off-by: Daniel Borkmann Cc: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 6 ++--- lib/rhashtable.c | 55 +++++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5ef8ea551556..c93ff8ac474a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -50,6 +50,7 @@ struct rhash_head { * struct bucket_table - Table of hash buckets * @size: Number of hash buckets * @hash_rnd: Random seed to fold into hash + * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets @@ -57,6 +58,7 @@ struct rhash_head { struct bucket_table { size_t size; u32 hash_rnd; + u32 shift; unsigned int locks_mask; spinlock_t *locks; @@ -99,7 +101,6 @@ struct rhashtable_params { * @tbl: Bucket table * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table - * @shift: Current size (1 << shift) * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -110,12 +111,11 @@ struct rhashtable { struct bucket_table __rcu *tbl; struct bucket_table __rcu *future_tbl; atomic_t nelems; - atomic_t shift; + bool being_destroyed; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; struct list_head walkers; - bool being_destroyed; }; /** diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 68210cc2bab8..adea791ea3ab 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -147,7 +147,7 @@ static void bucket_table_free(const struct bucket_table *tbl) } static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, - size_t nbuckets) + size_t nbuckets, u32 hash_rnd) { struct bucket_table *tbl = NULL; size_t size; @@ -162,6 +162,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; tbl->size = nbuckets; + tbl->shift = ilog2(nbuckets); + tbl->hash_rnd = hash_rnd; if (alloc_bucket_locks(ht, tbl) < 0) { bucket_table_free(tbl); @@ -177,25 +179,27 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, /** * rht_grow_above_75 - returns true if nelems > 0.75 * table-size * @ht: hash table - * @new_size: new table size + * @tbl: current table */ -static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static bool rht_grow_above_75(const struct rhashtable *ht, + const struct bucket_table *tbl) { /* Expand table when exceeding 75% load */ - return atomic_read(&ht->nelems) > (new_size / 4 * 3) && - (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); + return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && + (!ht->p.max_shift || tbl->shift < ht->p.max_shift); } /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table - * @new_size: new table size + * @tbl: current table */ -static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +static bool rht_shrink_below_30(const struct rhashtable *ht, + const struct bucket_table *tbl) { /* Shrink table beneath 30% load */ - return atomic_read(&ht->nelems) < (new_size * 3 / 10) && - (atomic_read(&ht->shift) > ht->p.min_shift); + return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && + tbl->shift > ht->p.min_shift; } static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) @@ -310,16 +314,11 @@ int rhashtable_expand(struct rhashtable *ht) ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, old_tbl->hash_rnd); if (new_tbl == NULL) return -ENOMEM; - new_tbl->hash_rnd = old_tbl->hash_rnd; - - atomic_inc(&ht->shift); - rhashtable_rehash(ht, new_tbl); - return 0; } EXPORT_SYMBOL_GPL(rhashtable_expand); @@ -342,20 +341,15 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); */ int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, tbl->size / 2); + new_tbl = bucket_table_alloc(ht, old_tbl->size / 2, old_tbl->hash_rnd); if (new_tbl == NULL) return -ENOMEM; - new_tbl->hash_rnd = tbl->hash_rnd; - - atomic_dec(&ht->shift); - rhashtable_rehash(ht, new_tbl); - return 0; } EXPORT_SYMBOL_GPL(rhashtable_shrink); @@ -376,9 +370,9 @@ static void rht_deferred_worker(struct work_struct *work) list_for_each_entry(walker, &ht->walkers, list) walker->resize = true; - if (rht_grow_above_75(ht, tbl->size)) + if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); - else if (rht_shrink_below_30(ht, tbl->size)) + else if (rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); unlock: mutex_unlock(&ht->mutex); @@ -431,7 +425,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); - if (no_resize_running && rht_grow_above_75(ht, tbl->size)) + if (no_resize_running && rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); exit: @@ -539,7 +533,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) bool no_resize_running = tbl == old_tbl; atomic_dec(&ht->nelems); - if (no_resize_running && rht_shrink_below_30(ht, tbl->size)) + if (no_resize_running && rht_shrink_below_30(ht, tbl)) schedule_work(&ht->run_work); } @@ -913,6 +907,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; + u32 hash_rnd; size = HASH_DEFAULT_SIZE; @@ -939,14 +934,14 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) else ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - tbl = bucket_table_alloc(ht, size); + get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + + tbl = bucket_table_alloc(ht, size, hash_rnd); if (tbl == NULL) return -ENOMEM; - get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); - atomic_set(&ht->nelems, 0); - atomic_set(&ht->shift, ilog2(tbl->size)); + RCU_INIT_POINTER(ht->tbl, tbl); RCU_INIT_POINTER(ht->future_tbl, tbl); -- cgit v1.3-6-gb490 From 9c51026509d7fd11d84e0035008e1a9768960f2b Mon Sep 17 00:00:00 2001 From: Syed Asifful Dayyan Date: Fri, 6 Mar 2015 18:40:42 +0100 Subject: brcmfmac: Add support for BCM4345 SDIO chipset. These changes add support for BCM4345 SDIO chipset. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Signed-off-by: Syed Asifful Dayyan Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 1 + drivers/net/wireless/brcm80211/brcmfmac/chip.c | 4 ++++ drivers/net/wireless/brcm80211/brcmfmac/sdio.c | 5 +++++ drivers/net/wireless/brcm80211/include/brcm_hw_ids.h | 1 + include/linux/mmc/sdio_ids.h | 1 + 5 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index ffb0e2d382ea..43995308307d 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -1096,6 +1096,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354), { /* end: all zeroes */ } }; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c index 04d2ca0d87d6..e679edca081d 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c @@ -491,6 +491,10 @@ static void brcmf_chip_get_raminfo(struct brcmf_chip_priv *ci) case BRCM_CC_43362_CHIP_ID: ci->pub.ramsize = 0x3c000; break; + case BRCM_CC_4345_CHIP_ID: + ci->pub.ramsize = 0xc8000; + ci->pub.rambase = 0x198000; + break; case BRCM_CC_4339_CHIP_ID: case BRCM_CC_4354_CHIP_ID: case BRCM_CC_4356_CHIP_ID: diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c index c9a9ff191616..973fa2afc2fc 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c @@ -617,6 +617,8 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = { #define BCM43362_NVRAM_NAME "brcm/brcmfmac43362-sdio.txt" #define BCM4339_FIRMWARE_NAME "brcm/brcmfmac4339-sdio.bin" #define BCM4339_NVRAM_NAME "brcm/brcmfmac4339-sdio.txt" +#define BCM4345_FIRMWARE_NAME "brcm/brcmfmac4345-sdio.bin" +#define BCM4345_NVRAM_NAME "brcm/brcmfmac4345-sdio.txt" #define BCM4354_FIRMWARE_NAME "brcm/brcmfmac4354-sdio.bin" #define BCM4354_NVRAM_NAME "brcm/brcmfmac4354-sdio.txt" @@ -640,6 +642,8 @@ MODULE_FIRMWARE(BCM43362_FIRMWARE_NAME); MODULE_FIRMWARE(BCM43362_NVRAM_NAME); MODULE_FIRMWARE(BCM4339_FIRMWARE_NAME); MODULE_FIRMWARE(BCM4339_NVRAM_NAME); +MODULE_FIRMWARE(BCM4345_FIRMWARE_NAME); +MODULE_FIRMWARE(BCM4345_NVRAM_NAME); MODULE_FIRMWARE(BCM4354_FIRMWARE_NAME); MODULE_FIRMWARE(BCM4354_NVRAM_NAME); @@ -669,6 +673,7 @@ static const struct brcmf_firmware_names brcmf_fwname_data[] = { { BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4335) }, { BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, BRCMF_FIRMWARE_NVRAM(BCM43362) }, { BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) }, + { BRCM_CC_4345_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4345) }, { BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4354) } }; diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h index 2124a17d0bfd..b599e7e41148 100644 --- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h @@ -37,6 +37,7 @@ #define BRCM_CC_43362_CHIP_ID 43362 #define BRCM_CC_4335_CHIP_ID 0x4335 #define BRCM_CC_4339_CHIP_ID 0x4339 +#define BRCM_CC_4345_CHIP_ID 0x4345 #define BRCM_CC_4354_CHIP_ID 0x4354 #define BRCM_CC_4356_CHIP_ID 0x4356 #define BRCM_CC_43566_CHIP_ID 43566 diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 996807963716..91397858dc95 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_VENDOR_ID_INTEL 0x0089 -- cgit v1.3-6-gb490 From 702131e2a393b45174be326f1dbe20b658b4f157 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Thu, 5 Mar 2015 18:25:10 +0100 Subject: bcma: move PCI IRQ control function to host specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function isn't really related to any bus core. It touches PCI device config registers only, so move it to the (PCI) host file. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/driver_pci.c | 33 ------------------------- drivers/bcma/host_pci.c | 34 ++++++++++++++++++++++++++ drivers/net/wireless/b43/main.c | 2 +- drivers/net/wireless/brcm80211/brcmsmac/main.c | 2 +- include/linux/bcma/bcma.h | 9 +++++++ include/linux/bcma/bcma_driver_pci.h | 2 -- 6 files changed, 45 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index cfd35bc1c5a3..f499a469e66d 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -282,39 +282,6 @@ void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) } EXPORT_SYMBOL_GPL(bcma_core_pci_power_save); -int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, - bool enable) -{ - struct pci_dev *pdev; - u32 coremask, tmp; - int err = 0; - - if (bus->hosttype != BCMA_HOSTTYPE_PCI) { - /* This bcma device is not on a PCI host-bus. So the IRQs are - * not routed through the PCI core. - * So we must not enable routing through the PCI core. */ - goto out; - } - - pdev = bus->host_pci; - - err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp); - if (err) - goto out; - - coremask = BIT(core->core_index) << 8; - if (enable) - tmp |= coremask; - else - tmp &= ~coremask; - - err = pci_write_config_dword(pdev, BCMA_PCI_IRQMASK, tmp); - -out: - return err; -} -EXPORT_SYMBOL_GPL(bcma_core_pci_irq_ctl); - static void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend) { u32 w; diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index a62a2f9091f5..0856189c065f 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -351,3 +351,37 @@ void bcma_host_pci_down(struct bcma_bus *bus) bcma_core_pci_down(&bus->drv_pci[0]); } EXPORT_SYMBOL_GPL(bcma_host_pci_down); + +/* See also si_pci_setup */ +int bcma_host_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, + bool enable) +{ + struct pci_dev *pdev; + u32 coremask, tmp; + int err = 0; + + if (bus->hosttype != BCMA_HOSTTYPE_PCI) { + /* This bcma device is not on a PCI host-bus. So the IRQs are + * not routed through the PCI core. + * So we must not enable routing through the PCI core. */ + goto out; + } + + pdev = bus->host_pci; + + err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp); + if (err) + goto out; + + coremask = BIT(core->core_index) << 8; + if (enable) + tmp |= coremask; + else + tmp &= ~coremask; + + err = pci_write_config_dword(pdev, BCMA_PCI_IRQMASK, tmp); + +out: + return err; +} +EXPORT_SYMBOL_GPL(bcma_host_pci_irq_ctl); diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index ac99798570e8..998490a7b167 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4866,7 +4866,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev) switch (dev->dev->bus_type) { #ifdef CONFIG_B43_BCMA case B43_BUS_BCMA: - bcma_core_pci_irq_ctl(dev->dev->bdev->bus, + bcma_host_pci_irq_ctl(dev->dev->bdev->bus, dev->dev->bdev, true); bcma_host_pci_up(dev->dev->bdev->bus); break; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index c84af1dfc88f..369527e27689 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -4959,7 +4959,7 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw) * Configure pci/pcmcia here instead of in brcms_c_attach() * to allow mfg hotswap: down, hotswap (chip power cycle), up. */ - bcma_core_pci_irq_ctl(wlc_hw->d11core->bus, wlc_hw->d11core, + bcma_host_pci_irq_ctl(wlc_hw->d11core->bus, wlc_hw->d11core, true); /* diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 44057b45ed32..e34f906647d3 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -437,6 +437,8 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, #ifdef CONFIG_BCMA_HOST_PCI extern void bcma_host_pci_up(struct bcma_bus *bus); extern void bcma_host_pci_down(struct bcma_bus *bus); +extern int bcma_host_pci_irq_ctl(struct bcma_bus *bus, + struct bcma_device *core, bool enable); #else static inline void bcma_host_pci_up(struct bcma_bus *bus) { @@ -444,6 +446,13 @@ static inline void bcma_host_pci_up(struct bcma_bus *bus) static inline void bcma_host_pci_down(struct bcma_bus *bus) { } +static inline int bcma_host_pci_irq_ctl(struct bcma_bus *bus, + struct bcma_device *core, bool enable) +{ + if (bus->hosttype == BCMA_HOSTTYPE_PCI) + return -ENOTSUPP; + return 0; +} #endif extern bool bcma_core_is_enabled(struct bcma_device *core); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 8e90004fdfd7..3a468687c170 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,8 +238,6 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, - struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); -- cgit v1.3-6-gb490 From 982a40f5c0bb03368989a6b1ae833b474854e931 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Thu, 5 Mar 2015 18:25:11 +0100 Subject: bcma: allow disabling (not building) PCI driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It isn't required for bcma bus on SoCs, so provide some empty functions and allow disabling it. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/Kconfig | 4 ++-- drivers/bcma/bcma_private.h | 20 ++++++++++++++++++++ include/linux/bcma/bcma_driver_pci.h | 6 ++++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig index 9be17d3431bb..1500b7120fc7 100644 --- a/drivers/bcma/Kconfig +++ b/drivers/bcma/Kconfig @@ -45,9 +45,9 @@ config BCMA_HOST_SOC If unsure, say N -# TODO: make it depend on PCI when ready config BCMA_DRIVER_PCI - bool + bool "BCMA Broadcom PCI core driver" + depends on BCMA && PCI default y help BCMA bus may have many versions of PCIe core. This driver diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 5a1d22489afc..15f2b2e242ea 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -106,15 +106,35 @@ static inline void __exit bcma_host_soc_unregister_driver(void) #endif /* CONFIG_BCMA_HOST_SOC && CONFIG_OF */ /* driver_pci.c */ +#ifdef CONFIG_BCMA_DRIVER_PCI u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address); void bcma_core_pci_early_init(struct bcma_drv_pci *pc); void bcma_core_pci_init(struct bcma_drv_pci *pc); void bcma_core_pci_up(struct bcma_drv_pci *pc); void bcma_core_pci_down(struct bcma_drv_pci *pc); +#else +static inline void bcma_core_pci_early_init(struct bcma_drv_pci *pc) +{ + WARN_ON(pc->core->bus->hosttype == BCMA_HOSTTYPE_PCI); +} +static inline void bcma_core_pci_init(struct bcma_drv_pci *pc) +{ + /* Initialization is required for PCI hosted bus */ + WARN_ON(pc->core->bus->hosttype == BCMA_HOSTTYPE_PCI); +} +#endif /* driver_pcie2.c */ +#ifdef CONFIG_BCMA_DRIVER_PCI void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2); void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2); +#else +static inline void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2) +{ + /* Initialization is required for PCI hosted bus */ + WARN_ON(pcie2->core->bus->hosttype == BCMA_HOSTTYPE_PCI); +} +#endif extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 3a468687c170..5ba6918ca20b 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,7 +238,13 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) +#ifdef CONFIG_BCMA_DRIVER_PCI extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); +#else +static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) +{ +} +#endif extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); -- cgit v1.3-6-gb490 From 3f3c4bb5ec7c645d1151e1e8d6e56c71a050cf85 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 4 Mar 2015 21:19:59 +0100 Subject: mac802154: correct max sifs size handling This patch fix the max sifs size correction when the IEEE802154_HW_TX_OMIT_CKSUM flag is set. With this flag the sk_buff doesn't contain the CRC, because the transceiver will add the CRC while transmit. Also add some defines for the max sifs frame size value and frame check sequence according to 802.15.4 standard. Signed-off-by: Alexander Aring Acked-by: Marc Kleine-Budde Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 ++ net/mac802154/util.c | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 40b0ab953937..8872ca103d06 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -30,6 +30,7 @@ #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 +#define IEEE802154_FCS_LEN 2 #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff @@ -39,6 +40,7 @@ #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 +#define IEEE802154_MAX_SIFS_FRAME_SIZE 18 #define IEEE802154_MAX_CHANNEL 26 #define IEEE802154_MAX_PAGE 31 diff --git a/net/mac802154/util.c b/net/mac802154/util.c index 5fc979027919..150bf807e572 100644 --- a/net/mac802154/util.c +++ b/net/mac802154/util.c @@ -65,8 +65,19 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, { if (ifs_handling) { struct ieee802154_local *local = hw_to_local(hw); + u8 max_sifs_size; - if (skb->len > 18) + /* If transceiver sets CRC on his own we need to use lifs + * threshold len above 16 otherwise 18, because it's not + * part of skb->len. + */ + if (hw->flags & IEEE802154_HW_TX_OMIT_CKSUM) + max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE - + IEEE802154_FCS_LEN; + else + max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE; + + if (skb->len > max_sifs_size) hrtimer_start(&local->ifs_timer, ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC), HRTIMER_MODE_REL); -- cgit v1.3-6-gb490 From eddee5ba34eb6c9890ef106f19ead2b370e5342f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:20 +1100 Subject: rhashtable: Fix walker behaviour during rehash Previously whenever the walker encountered a resize it simply snaps back to the beginning and starts again. However, this only works if the rehash started and completed while the walker was idle. If the walker attempts to restart while the rehash is still ongoing, we may miss objects that we shouldn't have. This patch fixes this by making the walker walk the old table followed by the new table just like all other readers. If a rehash is detected we will still signal our caller of the fact so they can prepare for duplicates but we will simply continue the walk onto the new table after the old one is finished either by us or by the rehasher. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 +++--- lib/rhashtable.c | 69 ++++++++++++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c93ff8ac474a..4192682c1d5c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -53,6 +53,7 @@ struct rhash_head { * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets + * @walkers: List of active walkers * @buckets: size * hash buckets */ struct bucket_table { @@ -61,6 +62,7 @@ struct bucket_table { u32 shift; unsigned int locks_mask; spinlock_t *locks; + struct list_head walkers; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -104,7 +106,6 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping - * @walkers: List of active walkers * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -115,17 +116,16 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; - struct list_head walkers; }; /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers - * @resize: Resize event occured + * @tbl: The table that we were walking over */ struct rhashtable_walker { struct list_head list; - bool resize; + struct bucket_table *tbl; }; /** diff --git a/lib/rhashtable.c b/lib/rhashtable.c index fc0d451279f0..f7c76079f8f1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -170,6 +170,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; } + INIT_LIST_HEAD(&tbl->walkers); + for (i = 0; i < nbuckets; i++) INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); @@ -264,6 +266,7 @@ static void rhashtable_rehash(struct rhashtable *ht, struct bucket_table *new_tbl) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct rhashtable_walker *walker; unsigned old_hash; get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd)); @@ -284,6 +287,9 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); + list_for_each_entry(walker, &old_tbl->walkers, list) + walker->tbl = NULL; + /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. @@ -358,7 +364,6 @@ static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; - struct rhashtable_walker *walker; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); @@ -367,9 +372,6 @@ static void rht_deferred_worker(struct work_struct *work) tbl = rht_dereference(ht->tbl, ht); - list_for_each_entry(walker, &ht->walkers, list) - walker->resize = true; - if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); else if (rht_shrink_below_30(ht, tbl)) @@ -725,11 +727,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; - INIT_LIST_HEAD(&iter->walker->list); - iter->walker->resize = false; - mutex_lock(&ht->mutex); - list_add(&iter->walker->list, &ht->walkers); + iter->walker->tbl = rht_dereference(ht->tbl, ht); + list_add(&iter->walker->list, &iter->walker->tbl->walkers); mutex_unlock(&ht->mutex); return 0; @@ -745,7 +745,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); void rhashtable_walk_exit(struct rhashtable_iter *iter) { mutex_lock(&iter->ht->mutex); - list_del(&iter->walker->list); + if (iter->walker->tbl) + list_del(&iter->walker->list); mutex_unlock(&iter->ht->mutex); kfree(iter->walker); } @@ -767,12 +768,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); */ int rhashtable_walk_start(struct rhashtable_iter *iter) { + struct rhashtable *ht = iter->ht; + + mutex_lock(&ht->mutex); + + if (iter->walker->tbl) + list_del(&iter->walker->list); + rcu_read_lock(); - if (iter->walker->resize) { - iter->slot = 0; - iter->skip = 0; - iter->walker->resize = false; + mutex_unlock(&ht->mutex); + + if (!iter->walker->tbl) { + iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -794,13 +802,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { - const struct bucket_table *tbl; + struct bucket_table *tbl = iter->walker->tbl; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; void *obj = NULL; - tbl = rht_dereference_rcu(ht->tbl, ht); - if (p) { p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); goto next; @@ -826,17 +832,18 @@ next: iter->skip = 0; } - iter->p = NULL; - -out: - if (iter->walker->resize) { - iter->p = NULL; + iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht); + if (iter->walker->tbl != tbl) { iter->slot = 0; iter->skip = 0; - iter->walker->resize = false; return ERR_PTR(-EAGAIN); } + iter->walker->tbl = NULL; + iter->p = NULL; + +out: + return obj; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); @@ -849,7 +856,24 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next); */ void rhashtable_walk_stop(struct rhashtable_iter *iter) { + struct rhashtable *ht; + struct bucket_table *tbl = iter->walker->tbl; + rcu_read_unlock(); + + if (!tbl) + return; + + ht = iter->ht; + + mutex_lock(&ht->mutex); + if (rht_dereference(ht->tbl, ht) == tbl || + rht_dereference(ht->future_tbl, ht) == tbl) + list_add(&iter->walker->list, &tbl->walkers); + else + iter->walker->tbl = NULL; + mutex_unlock(&ht->mutex); + iter->p = NULL; } EXPORT_SYMBOL_GPL(rhashtable_walk_stop); @@ -927,7 +951,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); memcpy(&ht->p, params, sizeof(*params)); - INIT_LIST_HEAD(&ht->walkers); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); -- cgit v1.3-6-gb490 From 9d901bc05153bbf33b5da2cd6266865e531f0545 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:23 +1100 Subject: rhashtable: Free bucket tables asynchronously after rehash There is in fact no need to wait for an RCU grace period in the rehash function, since all insertions are guaranteed to go into the new table through spin locks. This patch uses call_rcu to free the old/rehashed table at our leisure. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ lib/rhashtable.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4192682c1d5c..a0abddd226b3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -54,6 +54,7 @@ struct rhash_head { * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers + * @rcu: RCU structure for freeing the table * @buckets: size * hash buckets */ struct bucket_table { @@ -63,6 +64,7 @@ struct bucket_table { unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; + struct rcu_head rcu; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e55bbc84c449..36fb0910bec2 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -141,6 +141,11 @@ static void bucket_table_free(const struct bucket_table *tbl) kvfree(tbl); } +static void bucket_table_free_rcu(struct rcu_head *head) +{ + bucket_table_free(container_of(head, struct bucket_table, rcu)); +} + static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets) { @@ -288,9 +293,7 @@ static void rhashtable_rehash(struct rhashtable *ht, * table, and thus no references to the old table will * remain. */ - synchronize_rcu(); - - bucket_table_free(old_tbl); + call_rcu(&old_tbl->rcu, bucket_table_free_rcu); } /** -- cgit v1.3-6-gb490 From 63d512d0cffcae40505d9448abd509972465e846 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:24 +1100 Subject: rhashtable: Add rehash counter to bucket_table This patch adds a rehash counter to bucket_table to indicate the last bucket that has been rehashed. This serves two purposes: 1. Any bucket that has been rehashed can never gain a new object. 2. If the rehash counter reaches the size of the table, the table will forever remain empty. This patch also downsizes bucket_table->size to an unsigned int since we do not support sizes greater than 32 bits yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- lib/rhashtable.c | 1 + lib/test_rhashtable.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a0abddd226b3..ed7562ad4ca0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,6 +49,7 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] @@ -58,7 +59,8 @@ struct rhash_head { * @buckets: size * hash buckets */ struct bucket_table { - size_t size; + unsigned int size; + unsigned int rehash; u32 hash_rnd; u32 shift; unsigned int locks_mask; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 36fb0910bec2..ff4ea1704546 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -260,6 +260,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) spin_lock_bh(old_bucket_lock); while (!rhashtable_rehash_one(ht, old_hash)) ; + old_tbl->rehash++; spin_unlock_bh(old_bucket_lock); } diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 67c7593d1dd6..16974fd89e4e 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -80,7 +80,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet) rcu_cnt = cnt = 0; if (!quiet) - pr_info(" [%#4x/%zu]", i, tbl->size); + pr_info(" [%#4x/%u]", i, tbl->size); rht_for_each_entry_rcu(obj, pos, tbl, i, node) { cnt++; -- cgit v1.3-6-gb490 From c4db8848af6af92f90462258603be844baeab44d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:25 +1100 Subject: rhashtable: Move future_tbl into struct bucket_table This patch moves future_tbl to open up the possibility of having multiple rehashes on the same table. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- lib/rhashtable.c | 27 +++++++++++---------------- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ed7562ad4ca0..1695378b3c5b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -56,6 +56,7 @@ struct rhash_head { * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table + * @future_tbl: Table under construction during rehashing * @buckets: size * hash buckets */ struct bucket_table { @@ -68,6 +69,8 @@ struct bucket_table { struct list_head walkers; struct rcu_head rcu; + struct bucket_table __rcu *future_tbl; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -105,7 +108,6 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously @@ -114,7 +116,6 @@ struct rhashtable_params { */ struct rhashtable { struct bucket_table __rcu *tbl; - struct bucket_table __rcu *future_tbl; atomic_t nelems; bool being_destroyed; struct rhashtable_params p; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ff4ea1704546..9d53a46dcca9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -207,8 +207,9 @@ static bool rht_shrink_below_30(const struct rhashtable *ht, static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { - struct bucket_table *new_tbl = rht_dereference(ht->future_tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl = + rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl; struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; int err = -ENOENT; struct rhash_head *head, *next, *entry; @@ -273,10 +274,8 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. - * The synchronize_rcu() guarantees for the new table to be picked up - * so no new additions go into the old table while we relink. */ - rcu_assign_pointer(ht->future_tbl, new_tbl); + rcu_assign_pointer(old_tbl->future_tbl, new_tbl); /* Ensure the new table is visible to readers. */ smp_wmb(); @@ -400,7 +399,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, * also grab the bucket lock in old_tbl because until the * rehash completes ht->tbl won't be changed. */ - tbl = rht_dereference_rcu(ht->future_tbl, ht); + tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; if (tbl != old_tbl) { hash = head_hashfn(ht, tbl, obj); spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); @@ -525,7 +524,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) * visible then that guarantees the entry to still be in * old_tbl if it exists. */ - tbl = rht_dereference_rcu(ht->future_tbl, ht); + tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; if (!ret && old_tbl != tbl) ret = __rhashtable_remove(ht, tbl, obj); @@ -599,7 +598,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg) { - const struct bucket_table *tbl, *old_tbl; + const struct bucket_table *tbl; struct rhash_head *he; u32 hash; @@ -618,9 +617,8 @@ restart: /* Ensure we see any new tables. */ smp_rmb(); - old_tbl = tbl; - tbl = rht_dereference_rcu(ht->future_tbl, ht); - if (unlikely(tbl != old_tbl)) + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) goto restart; rcu_read_unlock(); @@ -830,14 +828,13 @@ next: iter->skip = 0; } - iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht); - if (iter->walker->tbl != tbl) { + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker->tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); } - iter->walker->tbl = NULL; iter->p = NULL; out: @@ -865,8 +862,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; mutex_lock(&ht->mutex); - if (rht_dereference(ht->tbl, ht) == tbl || - rht_dereference(ht->future_tbl, ht) == tbl) + if (tbl->rehash < tbl->size) list_add(&iter->walker->list, &tbl->walkers); else iter->walker->tbl = NULL; @@ -961,7 +957,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) atomic_set(&ht->nelems, 0); RCU_INIT_POINTER(ht->tbl, tbl); - RCU_INIT_POINTER(ht->future_tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); -- cgit v1.3-6-gb490 From 03e69b508b6f7c51743055c9f61d1dfeadf4b635 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:16 +0100 Subject: ebpf: add prandom helper for packet sampling This work is similar to commit 4cd3675ebf74 ("filter: added BPF random opcode") and adds a possibility for packet sampling in eBPF. Currently, this is only possible in classic BPF and useful to combine sampling with f.e. packet sockets, possible also with tc. Example function proto-type looks like: u32 (*prandom_u32)(void) = (void *)BPF_FUNC_get_prandom_u32; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/core.c | 2 ++ kernel/bpf/helpers.c | 12 ++++++++++++ net/core/filter.c | 2 ++ 5 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80f2e0fc3d02..50bf95e29a96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,4 +154,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_get_prandom_u32_proto; + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3fa1af8a58d7..1c2ca2b477c8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -165,6 +165,7 @@ enum bpf_func_id { BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ + BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 50603aec766a..c1dbbb5d289b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -661,6 +661,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; const struct bpf_func_proto bpf_map_delete_elem_proto __weak; +const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a3c7701a8b5e..95eb59a045ea 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -11,6 +11,7 @@ */ #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -87,3 +88,14 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; + +static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return prandom_u32(); +} + +const struct bpf_func_proto bpf_get_prandom_u32_proto = { + .func = bpf_get_prandom_u32, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; diff --git a/net/core/filter.c b/net/core/filter.c index 7a4eb7030dba..4344db39af2e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1139,6 +1139,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; default: return NULL; } -- cgit v1.3-6-gb490 From c04167ce2ca0ecaeaafef006cb0d65cf01b68e42 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:17 +0100 Subject: ebpf: add helper for obtaining current processor id This patch adds the possibility to obtain raw_smp_processor_id() in eBPF. Currently, this is only possible in classic BPF where commit da2033c28226 ("filter: add SKF_AD_RXHASH and SKF_AD_CPU") has added facilities for this. Perhaps most importantly, this would also allow us to track per CPU statistics with eBPF maps, or to implement a poor-man's per CPU data structure through eBPF maps. Example function proto-type looks like: u32 (*smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 12 ++++++++++++ net/core/filter.c | 2 ++ 5 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50bf95e29a96..30bfd331882a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,5 +155,6 @@ extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; +extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1c2ca2b477c8..de1f63668daf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -166,6 +166,7 @@ enum bpf_func_id { BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ + BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1dbbb5d289b..4139a0f8b558 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -662,6 +662,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto __weak; const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; +const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 95eb59a045ea..bd7f5988ed9c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -12,6 +12,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -99,3 +100,14 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return raw_smp_processor_id(); +} + +const struct bpf_func_proto bpf_get_smp_processor_id_proto = { + .func = bpf_get_smp_processor_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; diff --git a/net/core/filter.c b/net/core/filter.c index 4344db39af2e..33310eee6134 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1141,6 +1141,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_map_delete_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return NULL; } -- cgit v1.3-6-gb490 From 9bac3d6d548e5cc925570b263f35b70a00a00ffd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 13 Mar 2015 11:57:42 -0700 Subject: bpf: allow extended BPF programs access skb fields introduce user accessible mirror of in-kernel 'struct sk_buff': struct __sk_buff { __u32 len; __u32 pkt_type; __u32 mark; __u32 queue_mapping; }; bpf programs can do: int bpf_prog(struct __sk_buff *skb) { __u32 var = skb->pkt_type; which will be compiled to bpf assembler as: dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type) bpf verifier will check validity of access and will convert it to: dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset)) dst_reg &= 7 since skb->pkt_type is a bitfield. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +- include/uapi/linux/bpf.h | 10 ++++ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 152 ++++++++++++++++++++++++++++++++++++++++++----- net/core/filter.c | 100 +++++++++++++++++++++++++------ 5 files changed, 234 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30bfd331882a..280a315de8d6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -103,6 +103,9 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + + u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn); }; struct bpf_prog_type_list { @@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index de1f63668daf..929545a27546 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -170,4 +170,14 @@ enum bpf_func_id { __BPF_FUNC_MAX_ID, }; +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 669719ccc9ee..ea75c654af1b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr) goto free_prog; /* run eBPF verifier */ - err = bpf_check(prog, attr); + err = bpf_check(&prog, attr); if (err < 0) goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e6b522496250..c22ebd36fa4b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env) return err; } else if (class == BPF_LDX) { - if (BPF_MODE(insn->code) != BPF_MEM || - insn->imm != 0) { - verbose("BPF_LDX uses reserved fields\n"); - return -EINVAL; - } + enum bpf_reg_type src_reg_type; + + /* check for reserved fields is already done */ + /* check src operand */ err = check_reg_arg(regs, insn->src_reg, SRC_OP); if (err) @@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env) if (err) return err; + src_reg_type = regs[insn->src_reg].type; + + if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) { + /* saw a valid insn + * dst_reg = *(u32 *)(src_reg + off) + * use reserved 'imm' field to mark this insn + */ + insn->imm = src_reg_type; + + } else if (src_reg_type != insn->imm && + (src_reg_type == PTR_TO_CTX || + insn->imm == PTR_TO_CTX)) { + /* ABuser program is trying to use the same insn + * dst_reg = *(u32*) (src_reg + off) + * with different pointer types: + * src_reg == ctx in one branch and + * src_reg == stack|map in some other branch. + * Reject it. + */ + verbose("same insn cannot be used with different pointers\n"); + return -EINVAL; + } + } else if (class == BPF_STX) { if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) int i, j; for (i = 0; i < insn_cnt; i++, insn++) { + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) != BPF_MEM || + insn->imm != 0)) { + verbose("BPF_LDX uses reserved fields\n"); + return -EINVAL; + } + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_map *map; struct fd f; @@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) insn->src_reg = 0; } +static void adjust_branches(struct bpf_prog *prog, int pos, int delta) +{ + struct bpf_insn *insn = prog->insnsi; + int insn_cnt = prog->len; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (BPF_CLASS(insn->code) != BPF_JMP || + BPF_OP(insn->code) == BPF_CALL || + BPF_OP(insn->code) == BPF_EXIT) + continue; + + /* adjust offset of jmps if necessary */ + if (i < pos && i + insn->off + 1 > pos) + insn->off += delta; + else if (i > pos && i + insn->off + 1 < pos) + insn->off -= delta; + } +} + +/* convert load instructions that access fields of 'struct __sk_buff' + * into sequence of instructions that access fields of 'struct sk_buff' + */ +static int convert_ctx_accesses(struct verifier_env *env) +{ + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + u32 cnt; + int i; + + if (!env->prog->aux->ops->convert_ctx_access) + return 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LDX | BPF_MEM | BPF_W)) + continue; + + if (insn->imm != PTR_TO_CTX) { + /* clear internal mark */ + insn->imm = 0; + continue; + } + + cnt = env->prog->aux->ops-> + convert_ctx_access(insn->dst_reg, insn->src_reg, + insn->off, insn_buf); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } + + if (cnt == 1) { + memcpy(insn, insn_buf, sizeof(*insn)); + continue; + } + + /* several new insns need to be inserted. Make room for them */ + insn_cnt += cnt - 1; + new_prog = bpf_prog_realloc(env->prog, + bpf_prog_size(insn_cnt), + GFP_USER); + if (!new_prog) + return -ENOMEM; + + new_prog->len = insn_cnt; + + memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1, + sizeof(*insn) * (insn_cnt - i - cnt)); + + /* copy substitute insns in place of load instruction */ + memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt); + + /* adjust branches in the whole program */ + adjust_branches(new_prog, i, cnt - 1); + + /* keep walking new program and skip insns we just inserted */ + env->prog = new_prog; + insn = new_prog->insnsi + i + cnt - 1; + i += cnt - 1; + } + + return 0; +} + static void free_states(struct verifier_env *env) { struct verifier_state_list *sl, *sln; @@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env) kfree(env->explored_states); } -int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; struct verifier_env *env; int ret = -EINVAL; - if (prog->len <= 0 || prog->len > BPF_MAXINSNS) + if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; /* 'struct verifier_env' can be global, but since it's not small, @@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) if (!env) return -ENOMEM; - env->prog = prog; + env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ mutex_lock(&bpf_verifier_lock); @@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) if (ret < 0) goto skip_full_check; - env->explored_states = kcalloc(prog->len, + env->explored_states = kcalloc(env->prog->len, sizeof(struct verifier_state_list *), GFP_USER); ret = -ENOMEM; @@ -1968,6 +2083,10 @@ skip_full_check: while (pop_stack(env, NULL) >= 0); free_states(env); + if (ret == 0) + /* program is valid, convert *(u32*)(ctx + off) accesses */ + ret = convert_ctx_accesses(env); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ @@ -1983,18 +2102,18 @@ skip_full_check: if (ret == 0 && env->used_map_cnt) { /* if program passed verifier, update used_maps in bpf_prog_info */ - prog->aux->used_maps = kmalloc_array(env->used_map_cnt, - sizeof(env->used_maps[0]), - GFP_KERNEL); + env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, + sizeof(env->used_maps[0]), + GFP_KERNEL); - if (!prog->aux->used_maps) { + if (!env->prog->aux->used_maps) { ret = -ENOMEM; goto free_log_buf; } - memcpy(prog->aux->used_maps, env->used_maps, + memcpy(env->prog->aux->used_maps, env->used_maps, sizeof(env->used_maps[0]) * env->used_map_cnt); - prog->aux->used_map_cnt = env->used_map_cnt; + env->prog->aux->used_map_cnt = env->used_map_cnt; /* program is valid. Convert pseudo bpf_ld_imm64 into generic * bpf_ld_imm64 instructions @@ -2006,11 +2125,12 @@ free_log_buf: if (log_level) vfree(log_buf); free_env: - if (!prog->aux->used_maps) + if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); + *prog = env->prog; kfree(env); mutex_unlock(&bpf_verifier_lock); return ret; diff --git a/net/core/filter.c b/net/core/filter.c index 33310eee6134..4e9dd0ad0d5b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return prandom_u32(); } +static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (skb_field) { + case SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; + + case SKF_AD_PKTTYPE: + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); +#endif + break; + + case SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, queue_mapping)); + break; + } + + return insn - insn_buf; +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct bpf_insn **insnp) { struct bpf_insn *insn = *insnp; + u32 cnt; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: @@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - PKT_TYPE_OFFSET()); - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); -#ifdef __BIG_ENDIAN_BITFIELD - insn++; - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); -#endif + cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_IFINDEX: @@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - - *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, mark)); + cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_RXHASH: @@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, queue_mapping)); + cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: @@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { - /* skb fields cannot be accessed yet */ - return false; + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* check bounds */ + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + /* all __sk_buff fields are __u32 */ + if (size != 4) + return false; + + return true; +} + +static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, len): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, len)); + break; + + case offsetof(struct __sk_buff, mark): + return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, pkt_type): + return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, queue_mapping): + return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + } + + return insn - insn_buf; } static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; static struct bpf_prog_type_list sk_filter_type __read_mostly = { -- cgit v1.3-6-gb490 From 4170604feec780d00e7511c24fa0f6e5c2e4ed75 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 15 Mar 2015 21:07:14 -0700 Subject: switchdev: add swdev ops As discussed at netconf, introduce swdev_ops as first step to move switchdev ops from ndo to swdev. This will keep switchdev from cluttering up ndo ops space. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/net/switchdev.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddab1a2a07a0..9e8a2a933c68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1577,6 +1577,9 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; const struct forwarding_accel_ops *fwd_ops; +#ifdef CONFIG_NET_SWITCHDEV + const struct swdev_ops *swdev_ops; +#endif const struct header_ops *header_ops; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 1a9382febcc3..e5de53f92482 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -14,6 +14,44 @@ #include #include +struct fib_info; + +/** + * struct switchdev_ops - switchdev operations + * + * int (*swdev_parent_id_get)(struct net_device *dev, + * struct netdev_phys_item_id *psid); + * Called to get an ID of the switch chip this port is part of. + * If driver implements this, it indicates that it represents a port + * of a switch chip. + * + * int (*swdev_port_stp_update)(struct net_device *dev, u8 state); + * Called to notify switch device port of bridge port STP + * state change. + * + * int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 nlflags, u32 tb_id); + * Called to add/modify IPv4 route to switch device. + * + * int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to delete IPv4 route from switch device. + */ +struct swdev_ops { + int (*swdev_parent_id_get)(struct net_device *dev, + struct netdev_phys_item_id *psid); + int (*swdev_port_stp_update)(struct net_device *dev, u8 state); + int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, + u32 tb_id); + int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); +}; + enum netdev_switch_notifier_type { NETDEV_SWITCH_FDB_ADD = 1, NETDEV_SWITCH_FDB_DEL, -- cgit v1.3-6-gb490 From 812a1c3ff3ee9d5100e0e71edb06681014e84a9b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 15 Mar 2015 21:07:16 -0700 Subject: netdev: remove ndo ops for switchdev Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e8a2a933c68..dd1d069758be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,8 +768,6 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -struct fib_info; - /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1024,23 +1022,6 @@ struct fib_info; * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. - * - * int (*ndo_switch_parent_id_get)(struct net_device *dev, - * struct netdev_phys_item_id *psid); - * Called to get an ID of the switch chip this port is part of. - * If driver implements this, it indicates that it represents a port - * of a switch chip. - * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); - * Called to notify switch device port of bridge port STP - * state change. - * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, - * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 nlflags, u32 tb_id); - * Called to add/modify IPv4 route to switch device. - * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, - * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 tb_id); - * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1197,25 +1178,6 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); -#ifdef CONFIG_NET_SWITCHDEV - int (*ndo_switch_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*ndo_switch_port_stp_update)(struct net_device *dev, - u8 state); - int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, - __be32 dst, - int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, - u32 tb_id); - int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, - __be32 dst, - int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 tb_id); -#endif }; /** -- cgit v1.3-6-gb490 From c055d5b03bb4cb69d349d787c9787c0383abd8b2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 05:08:19 +0100 Subject: netfilter: bridge: query conntrack about skb dnat ask conntrack instead of storing ipv4 address in nf_bridge_info->data. Ths avoids the need to use ->data during NF_PRE_ROUTING. Only two functions that need ->data remain. These will be addressed in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 6 ------ net/bridge/br_netfilter.c | 27 +++++++++++++++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index bb39113ea596..de123d769ffc 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -54,12 +54,6 @@ static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) return 0; } -struct bridge_skb_cb { - union { - __be32 ipv4; - } daddr; -}; - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index b260a97275db..261fcd5a42d6 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,17 +37,16 @@ #include #include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + #include #include "br_private.h" #ifdef CONFIG_SYSCTL #include #endif -#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ - (skb->nf_bridge->data))->daddr.ipv4) -#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) -#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; @@ -322,6 +321,22 @@ free_skb: return 0; } +static bool dnat_took_place(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_untracked(ct)) + return false; + + return test_bit(IPS_DST_NAT_BIT, &ct->status); +#else + return false; +#endif +} + /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * @@ -625,7 +640,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_DROP; if (!setup_pre_routing(skb)) return NF_DROP; - store_orig_dstaddr(skb); + skb->protocol = htons(ETH_P_IP); NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, -- cgit v1.3-6-gb490 From e4bb9bcbfb7d67431dfd49860f62770a7f40193b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 10:36:48 +0100 Subject: netfilter: bridge: remove BRNF_STATE_BRIDGED flag Its not needed anymore since 2bf540b73ed5b ([NETFILTER]: bridge-netfilter: remove deferred hooks). Before this it was possible to have physoutdev set for locally generated packets -- this isn't the case anymore: BRNF_STATE_BRIDGED flag is set when we assign nf_bridge->physoutdev, so physoutdev != NULL means BRNF_STATE_BRIDGED is set. If physoutdev is NULL, then we are looking at locally-delivered and routed packet. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - net/bridge/br_netfilter.c | 9 ++++++--- net/netfilter/xt_physdev.c | 3 +-- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index de123d769ffc..ed0d3bf953c3 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -19,7 +19,6 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 -#define BRNF_BRIDGED 0x04 #define BRNF_NF_BRIDGE_PREROUTING 0x08 #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 261fcd5a42d6..bd2d24d1ff21 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -736,8 +736,6 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) return NF_DROP; - /* The physdev module checks on this */ - nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); @@ -857,7 +855,12 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; - if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in + * on a bridge, but was delivered locally and is now being routed: + * + * POST_ROUTING was already invoked from the ip stack. + */ + if (!nf_bridge || !nf_bridge->physoutdev) return NF_ACCEPT; if (!realoutdev) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index f440f57a452f..50a52043650f 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -56,8 +56,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && - (!!(nf_bridge->mask & BRNF_BRIDGED) ^ - !(info->invert & XT_PHYSDEV_OP_BRIDGED))) + (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN && -- cgit v1.3-6-gb490 From e03826d5045e81a66a4fad7be9a8ecdaeb7911cf Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 17 Mar 2015 15:56:04 +0530 Subject: regulator: palmas: Correct TPS659038 register definition for REGEN2 The register offset for REGEN2_CTRL in different for TPS659038 chip as when compared with other Palmas family PMICs. In the case of TPS659038 the wrong offset pointed to PLLEN_CTRL and was causing a hang. Correcting the same. Signed-off-by: Keerthy Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/regulator/palmas-regulator.c | 4 ++++ include/linux/mfd/palmas.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 9205f433573c..18198316b6cf 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev) if (!pmic) return -ENOMEM; + if (of_device_is_compatible(node, "ti,tps659038-pmic")) + palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr = + TPS659038_REGEN2_CTRL; + pmic->dev = &pdev->dev; pmic->palmas = palmas; palmas->pmic = pmic; diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index fb0390a1a498..ee7b1ce7a6f8 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -2999,6 +2999,9 @@ enum usb_irq_events { #define PALMAS_GPADC_TRIM15 0x0E #define PALMAS_GPADC_TRIM16 0x0F +/* TPS659038 regen2_ctrl offset iss different from palmas */ +#define TPS659038_REGEN2_CTRL 0x12 + /* TPS65917 Interrupt registers */ /* Registers for function INTERRUPT */ -- cgit v1.3-6-gb490 From a2f4870697a5bcf4a87073ec6b32dd2928c1211d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 17 Mar 2015 12:23:19 -0400 Subject: fs: make sure the timestamps for lazytime inodes eventually get written Jan Kara pointed out that if there is an inode which is constantly getting dirtied with I_DIRTY_PAGES, an inode with an updated timestamp will never be written since inode->dirtied_when is constantly getting updated. We fix this by adding an extra field to the inode, dirtied_time_when, so inodes with a stale dirtytime can get detected and handled. In addition, if we have a dirtytime inode caused by an atime update, and there is no write activity on the file system, we need to have a secondary system to make sure these inodes get written out. We do this by setting up a second delayed work structure which wakes up the CPU much more rarely compared to writeback_expire_centisecs. Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/fs-writeback.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++------- include/linux/fs.h | 1 + 2 files changed, 73 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..2cfcd74faf87 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,6 +53,18 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; +/* + * If an inode is constantly having its pages dirtied, but then the + * updates stop dirtytime_expire_interval seconds in the past, it's + * possible for the worst case time between when an inode has its + * timestamps updated and when they finally get written out to be two + * dirtytime_expire_intervals. We set the default to 12 hours (in + * seconds), which means most of the time inodes will have their + * timestamps written to disk after 12 hours, but in the worst case a + * few inodes might not their timestamps updated for 24 hours. + */ +unsigned int dirtytime_expire_interval = 12 * 60 * 60; + /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, if ((flags & EXPIRE_DIRTY_ATIME) == 0) older_than_this = work->older_than_this; - else if ((work->reason == WB_REASON_SYNC) == 0) { - expire_time = jiffies - (HZ * 86400); + else if (!work->for_sync) { + expire_time = jiffies - (dirtytime_expire_interval * HZ); older_than_this = &expire_time; } while (!list_empty(delaying_queue)) { @@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, */ redirty_tail(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &wb->b_dirty_time); } else { /* The inode is clean. Remove from writeback lists. */ @@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; - if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && - (inode->i_state & I_DIRTY_TIME)) || - (inode->i_state & I_DIRTY_TIME_EXPIRED)) { - dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; - trace_writeback_lazytime(inode); - } + if (inode->i_state & I_DIRTY_TIME) { + if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || + unlikely(time_after(jiffies, + (inode->dirtied_time_when + + dirtytime_expire_interval * HZ)))) { + dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; + trace_writeback_lazytime(inode); + } + } else + inode->i_state &= ~I_DIRTY_TIME_EXPIRED; inode->i_state &= ~dirty; /* @@ -1131,6 +1149,45 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) rcu_read_unlock(); } +/* + * Wake up bdi's periodically to make sure dirtytime inodes gets + * written back periodically. We deliberately do *not* check the + * b_dirtytime list in wb_has_dirty_io(), since this would cause the + * kernel to be constantly waking up once there are any dirtytime + * inodes on the system. So instead we define a separate delayed work + * function which gets called much more rarely. (By default, only + * once every 12 hours.) + * + * If there is any other write activity going on in the file system, + * this function won't be necessary. But if the only thing that has + * happened on the file system is a dirtytime inode caused by an atime + * update, we need this infrastructure below to make sure that inode + * eventually gets pushed out to disk. + */ +static void wakeup_dirtytime_writeback(struct work_struct *w); +static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); + +static void wakeup_dirtytime_writeback(struct work_struct *w) +{ + struct backing_dev_info *bdi; + + rcu_read_lock(); + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { + if (list_empty(&bdi->wb.b_dirty_time)) + continue; + bdi_wakeup_thread(bdi); + } + rcu_read_unlock(); + schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); +} + +static int __init start_dirtytime_writeback(void) +{ + schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); + return 0; +} +__initcall(start_dirtytime_writeback); + static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1269,8 +1326,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; - list_move(&inode->i_wb_list, dirtytime ? - &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); + if (dirtytime) + inode->dirtied_time_when = jiffies; + if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) + list_move(&inode->i_wb_list, &bdi->wb.b_dirty); + else + list_move(&inode->i_wb_list, + &bdi->wb.b_dirty_time); spin_unlock(&bdi->wb.list_lock); trace_writeback_dirty_inode_enqueue(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index b4d71b5e1ff2..f4131e8ead74 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -604,6 +604,7 @@ struct inode { struct mutex i_mutex; unsigned long dirtied_when; /* jiffies of first dirtying */ + unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ -- cgit v1.3-6-gb490 From 1efff914afac8a965ad63817ecf8861a927c2ace Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 17 Mar 2015 12:23:32 -0400 Subject: fs: add dirtytime_expire_seconds sysctl Add a tuning knob so we can adjust the dirtytime expiration timeout, which is very useful for testing lazytime. Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/fs-writeback.c | 11 +++++++++++ include/linux/writeback.h | 3 +++ kernel/sysctl.c | 8 ++++++++ 3 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2cfcd74faf87..32a8bbd7a9ad 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1188,6 +1188,17 @@ static int __init start_dirtytime_writeback(void) } __initcall(start_dirtytime_writeback); +int dirtytime_interval_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret == 0 && write) + mod_delayed_work(system_wq, &dirtytime_work, 0); + return ret; +} + static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 00048339c23e..b2dd371ec0ca 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -130,6 +130,7 @@ extern int vm_dirty_ratio; extern unsigned long vm_dirty_bytes; extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; +extern unsigned int dirtytime_expire_interval; extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; @@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write, extern int dirty_bytes_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +int dirtytime_interval_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; int dirty_writeback_centisecs_handler(struct ctl_table *, int, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 88ea2d6e0031..ce410bb9f2e1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1227,6 +1227,14 @@ static struct ctl_table vm_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &zero, }, + { + .procname = "dirtytime_expire_seconds", + .data = &dirtytime_expire_interval, + .maxlen = sizeof(dirty_expire_interval), + .mode = 0644, + .proc_handler = dirtytime_interval_handler, + .extra1 = &zero, + }, { .procname = "nr_pdflush_threads", .mode = 0444 /* read-only */, -- cgit v1.3-6-gb490 From 9439ce00f208d95703a6725e4ea986dd90e37ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:29 -0700 Subject: tcp: rename struct tcp_request_sock listener The listener field in struct tcp_request_sock is a pointer back to the listener. We now have req->rsk_listener, so TCP only needs one boolean and not a full pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/core/request_sock.c | 18 +++++++----------- net/ipv4/inet_connection_sock.c | 7 +++---- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_fastopen.c | 7 +------ net/ipv4/tcp_input.c | 2 +- net/ipv6/syncookies.c | 2 +- 7 files changed, 15 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 97dbf16f7d9d..f869ae8afbaf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -111,7 +111,7 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; - struct sock *listener; /* needed for TFO */ + bool tfo_listener; u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ diff --git a/net/core/request_sock.c b/net/core/request_sock.c index e910317ef6d9..cc39a2aa663a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) * case might also exist in tcp_v4_hnd_req() that will trigger this locking * order. * - * When a TFO req is created, it needs to sock_hold its listener to prevent - * the latter data structure from going away. - * - * This function also sets "treq->listener" to NULL and unreference listener - * socket. treq->listener is used by the listener so it is protected by the + * This function also sets "treq->tfo_listener" to false. + * treq->tfo_listener is used by the listener so it is protected by the * fastopenq->lock in this function. */ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset) { - struct sock *lsk = tcp_rsk(req)->listener; - struct fastopen_queue *fastopenq = - inet_csk(lsk)->icsk_accept_queue.fastopenq; + struct sock *lsk = req->rsk_listener; + struct fastopen_queue *fastopenq; + + fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; if (req->sk) /* the child socket hasn't been accepted yet */ goto out; @@ -179,7 +177,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * special RST handling below. */ spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); reqsk_put(req); return; } @@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq->qlen++; out: spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3390ba6f96b2..741f0d96a7f7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -325,7 +325,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) sk_acceptq_removed(sk); if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { spin_lock_bh(&queue->fastopenq->lock); - if (tcp_rsk(req)->listener) { + if (tcp_rsk(req)->tfo_listener) { /* We are still waiting for the final ACK from 3WHS * so can't free req now. Instead, we set req->sk to * NULL to signify that the child socket is taken @@ -817,9 +817,9 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); - if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(tcp_sk(child)->fastopen_rsk != req); - BUG_ON(sk != tcp_rsk(req)->listener); + BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if * an inbound pkt destined for child is @@ -828,7 +828,6 @@ void inet_csk_listen_stop(struct sock *sk) * tcp_v4_destroy_sock(). */ tcp_sk(child)->fastopen_rsk = NULL; - sock_put(sk); } inet_csk_destroy_sock(child); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index eb940750bb1b..574b67765a06 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,7 +345,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; - treq->listener = NULL; + treq->tfo_listener = false; ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 84381319e1bc..186fd394ec0a 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, tp = tcp_sk(child); tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; + tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fbe518981d36..a94ddb96fc85 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6120,7 +6120,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (err || want_cookie) goto drop_and_free; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 039e74dd29fe..1ef0c926ce9d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -195,7 +195,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->listener = NULL; + treq->tfo_listener = false; ireq->ireq_family = AF_INET6; if (security_inet_conn_request(sk, skb, req)) -- cgit v1.3-6-gb490 From cf39284d41f67964cf42b21bb386c012cf5b7f65 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 18 Mar 2015 08:57:41 +0800 Subject: regulator: Fix documentation for regmap in the config dev_get_regulator() does not exist, fix the typo. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5a02bb..045f709cb89b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -316,7 +316,7 @@ struct regulator_desc { * @driver_data: private regulator data * @of_node: OpenFirmware node to parse for device tree bindings (may be * NULL). - * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is + * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is * insufficient. * @ena_gpio_initialized: GPIO controlling regulator enable was properly * initialized, meaning that >= 0 is a valid gpio -- cgit v1.3-6-gb490 From 6aebd940840a4d3a0a8ffc5883d3892f4bd61e90 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:15 +1100 Subject: rhashtable: Remove shift from bucket_table Keeping both size and shift is silly. We only need one. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 -- lib/rhashtable.c | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 1695378b3c5b..f16e85692959 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -51,7 +51,6 @@ struct rhash_head { * @size: Number of hash buckets * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash - * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers @@ -63,7 +62,6 @@ struct bucket_table { unsigned int size; unsigned int rehash; u32 hash_rnd; - u32 shift; unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 09a7ada89ade..097400362467 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -162,7 +162,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; tbl->size = nbuckets; - tbl->shift = ilog2(nbuckets); if (alloc_bucket_locks(ht, tbl) < 0) { bucket_table_free(tbl); @@ -189,7 +188,7 @@ static bool rht_grow_above_75(const struct rhashtable *ht, { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && - (!ht->p.max_shift || tbl->shift < ht->p.max_shift); + (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift)); } /** @@ -202,7 +201,7 @@ static bool rht_shrink_below_30(const struct rhashtable *ht, { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && - tbl->shift > ht->p.min_shift; + tbl->size > (1 << ht->p.min_shift); } static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) -- cgit v1.3-6-gb490 From c2e213cff701fce71a0aba8de82f2c2a4acf52ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:16 +1100 Subject: rhashtable: Introduce max_size/min_size This patch adds the parameters max_size and min_size which are meant to replace max_shift and min_shift. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++++ lib/rhashtable.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f16e85692959..81267fef85d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -85,6 +85,8 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key @@ -97,6 +99,8 @@ struct rhashtable_params { size_t head_offset; size_t max_shift; size_t min_shift; + unsigned int max_size; + unsigned int min_size; u32 nulls_base; size_t locks_mul; rht_hashfn_t hashfn; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 097400362467..c4061bbd0113 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -27,7 +27,7 @@ #include #define HASH_DEFAULT_SIZE 64UL -#define HASH_MIN_SIZE 4UL +#define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 128UL /* Base bits plus 1 bit for nulls marker */ @@ -188,7 +188,8 @@ static bool rht_grow_above_75(const struct rhashtable *ht, { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && - (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift)); + (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift)) && + (!ht->p.max_size || tbl->size < ht->p.max_size); } /** @@ -201,7 +202,8 @@ static bool rht_shrink_below_30(const struct rhashtable *ht, { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && - tbl->size > (1 << ht->p.min_shift); + tbl->size > (1 << ht->p.min_shift) && + tbl->size > ht->p.min_size; } static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) @@ -873,7 +875,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - 1UL << params->min_shift); + max(1UL << params->min_shift, + (unsigned long)params->min_size)); } /** @@ -935,6 +938,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) params->min_shift = max_t(size_t, params->min_shift, ilog2(HASH_MIN_SIZE)); + params->min_size = max(params->min_size, HASH_MIN_SIZE); if (params->nelem_hint) size = rounded_hashtable_size(params); -- cgit v1.3-6-gb490 From e2e21c1c5808e5dfd88d3606cd6386cf85f6f5b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:21 +1100 Subject: rhashtable: Remove max_shift and min_shift Now that nobody uses max_shift and min_shift, we can safely remove them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ---- lib/rhashtable.c | 7 +------ 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 81267fef85d7..99425f2be708 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -83,8 +83,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @max_shift: Maximum number of shifts while expanding - * @min_shift: Minimum number of shifts while shrinking * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -97,8 +95,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - size_t max_shift; - size_t min_shift; unsigned int max_size; unsigned int min_size; u32 nulls_base; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c4061bbd0113..5f8fe3e88219 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -188,7 +188,6 @@ static bool rht_grow_above_75(const struct rhashtable *ht, { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && - (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift)) && (!ht->p.max_size || tbl->size < ht->p.max_size); } @@ -202,7 +201,6 @@ static bool rht_shrink_below_30(const struct rhashtable *ht, { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && - tbl->size > (1 << ht->p.min_shift) && tbl->size > ht->p.min_size; } @@ -875,8 +873,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - max(1UL << params->min_shift, - (unsigned long)params->min_size)); + (unsigned long)params->min_size); } /** @@ -936,8 +933,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) return -EINVAL; - params->min_shift = max_t(size_t, params->min_shift, - ilog2(HASH_MIN_SIZE)); params->min_size = max(params->min_size, HASH_MIN_SIZE); if (params->nelem_hint) -- cgit v1.3-6-gb490 From f0b7d43c8a28155f50adb087a563cfc97566e477 Mon Sep 17 00:00:00 2001 From: Brad Campbell Date: Tue, 17 Mar 2015 16:25:46 -0400 Subject: cc2520: Add support for CC2591 amplifier. The TI CC2521 is an RF power amplifier that is designed to interface with the CC2520. Conveniently, it directly interfaces with the CC2520 and does not require any pins to be connected to a microcontroller/processor. Adding a CC2591 increases the CC2520's range, which is useful for border router and other wall-powered applications. Using the CC2591 with the CC2520 requires configuring the CC2520 GPIOs that are connected to the CC2591 to correctly set the CC2591 into TX and RX modes. Further, TI recommends that the CC2520_TXPOWER and CC2520_AGCCTRL1 registers are set differently to maximize the CC2591's performance. These settings are covered in TI Application Note AN065. This patch adds an optional `amplified` field to the cc2520 entry in the device tree. If present, the CC2520 will be configured to operate with a CC2591. The expected pin mapping is: CC2520 GPIO0 --> CC2591 EN CC2520 GPIO5 --> CC2591 PAEN Signed-off-by: Brad Campbell Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- .../devicetree/bindings/net/ieee802154/cc2520.txt | 4 ++ drivers/net/ieee802154/cc2520.c | 55 ++++++++++++++++++---- include/linux/spi/cc2520.h | 1 + 3 files changed, 52 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt index 0071883c08d8..fb6d49f184ed 100644 --- a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt +++ b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt @@ -13,11 +13,15 @@ Required properties: - cca-gpio: GPIO spec for the CCA pin - vreg-gpio: GPIO spec for the VREG pin - reset-gpio: GPIO spec for the RESET pin +Optional properties: + - amplified: include if the CC2520 is connected to a CC2591 amplifier + Example: cc2520@0 { compatible = "ti,cc2520"; reg = <0>; spi-max-frequency = <4000000>; + amplified; pinctrl-names = "default"; pinctrl-0 = <&cc2520_cape_pins>; fifo-gpio = <&gpio1 18 0>; diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 233b6c6017d4..f833b8bb6663 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -738,6 +738,8 @@ static int cc2520_get_platform_data(struct spi_device *spi, pdata->vreg = of_get_named_gpio(np, "vreg-gpio", 0); pdata->reset = of_get_named_gpio(np, "reset-gpio", 0); + pdata->amplified = of_property_read_bool(np, "amplified"); + return 0; } @@ -746,6 +748,11 @@ static int cc2520_hw_init(struct cc2520_private *priv) u8 status = 0, state = 0xff; int ret; int timeout = 100; + struct cc2520_platform_data pdata; + + ret = cc2520_get_platform_data(priv->spi, &pdata); + if (ret) + goto err_ret; ret = cc2520_read_register(priv, CC2520_FSMSTAT1, &state); if (ret) @@ -768,11 +775,47 @@ static int cc2520_hw_init(struct cc2520_private *priv) dev_vdbg(&priv->spi->dev, "oscillator brought up\n"); - /* Registers default value: section 28.1 in Datasheet */ - ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF7); - if (ret) - goto err_ret; + /* If the CC2520 is connected to a CC2591 amplifier, we must both + * configure GPIOs on the CC2520 to correctly configure the CC2591 + * and change a couple settings of the CC2520 to work with the + * amplifier. See section 8 page 17 of TI application note AN065. + * http://www.ti.com/lit/an/swra229a/swra229a.pdf + */ + if (pdata.amplified) { + ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF9); + if (ret) + goto err_ret; + ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x16); + if (ret) + goto err_ret; + + ret = cc2520_write_register(priv, CC2520_GPIOCTRL0, 0x46); + if (ret) + goto err_ret; + + ret = cc2520_write_register(priv, CC2520_GPIOCTRL5, 0x47); + if (ret) + goto err_ret; + + ret = cc2520_write_register(priv, CC2520_GPIOPOLARITY, 0x1e); + if (ret) + goto err_ret; + + ret = cc2520_write_register(priv, CC2520_TXCTRL, 0xc1); + if (ret) + goto err_ret; + } else { + ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF7); + if (ret) + goto err_ret; + + ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x11); + if (ret) + goto err_ret; + } + + /* Registers default value: section 28.1 in Datasheet */ ret = cc2520_write_register(priv, CC2520_CCACTRL0, 0x1A); if (ret) goto err_ret; @@ -797,10 +840,6 @@ static int cc2520_hw_init(struct cc2520_private *priv) if (ret) goto err_ret; - ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x11); - if (ret) - goto err_ret; - ret = cc2520_write_register(priv, CC2520_ADCTEST0, 0x10); if (ret) goto err_ret; diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h index 85b8ee67e937..e741e8baad92 100644 --- a/include/linux/spi/cc2520.h +++ b/include/linux/spi/cc2520.h @@ -21,6 +21,7 @@ struct cc2520_platform_data { int sfd; int reset; int vreg; + bool amplified; }; #endif -- cgit v1.3-6-gb490 From 822b3b2ebfff8e9b3d006086c527738a7ca00cd0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Mar 2015 14:57:33 +0200 Subject: net: Add max rate tx queue attribute This adds a tx_maxrate attribute to the tx queue sysfs entry allowing for max-rate limiting. Along with DCB-ETS and BQL this provides another knob to tune queue performance. The limit units are Mbps. By default it is disabled. To disable the rate limitation after it has been set for a queue, it should be set to zero. Signed-off-by: John Fastabend Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-queues | 8 +++ Documentation/networking/scaling.txt | 9 ++++ include/linux/netdevice.h | 8 +++ net/core/net-sysfs.c | 67 +++++++++++++++++++----- 4 files changed, 80 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 5e9aeb91d355..0c0df91b1516 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -24,6 +24,14 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. +What: /sys/class//queues/tx-/tx_maxrate +Date: March 2015 +KernelVersion: 4.1 +Contact: netdev@vger.kernel.org +Description: + A Mbps max-rate set for the queue, a value of zero means disabled, + default is disabled. + What: /sys/class//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index 99ca40e8e810..cbfac0949635 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -421,6 +421,15 @@ best CPUs to share a given queue are probably those that share the cache with the CPU that processes transmit completions for that queue (transmit interrupts). +Per TX Queue rate limitation: +============================= + +These are rate-limitation mechanisms implemented by HW, where currently +a max-rate attribute is supported, by setting a Mbps value to + +/sys/class/net//queues/tx-/tx_maxrate + +A value of zero means disabled, and this is the default. Further Information =================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd1d069758be..76c5de4978a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -587,6 +587,7 @@ struct netdev_queue { #ifdef CONFIG_BQL struct dql dql; #endif + unsigned long tx_maxrate; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -1022,6 +1023,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. + * int (*ndo_set_tx_maxrate)(struct net_device *dev, + * int queue_index, u32 maxrate); + * Called when a user wants to set a max-rate limitation of specific + * TX queue. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1178,6 +1183,9 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); + int (*ndo_set_tx_maxrate)(struct net_device *dev, + int queue_index, + u32 maxrate); }; /** diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index cf30620a88e1..7e58bd7ec232 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -951,6 +951,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } +#ifdef CONFIG_XPS +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + +static ssize_t show_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + return sprintf(buf, "%lu\n", queue->tx_maxrate); +} + +static ssize_t set_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + int err, index = get_netdev_queue_index(queue); + u32 rate = 0; + + err = kstrtou32(buf, 10, &rate); + if (err < 0) + return err; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = -EOPNOTSUPP; + if (dev->netdev_ops->ndo_set_tx_maxrate) + err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + + rtnl_unlock(); + if (!err) { + queue->tx_maxrate = rate; + return len; + } + return err; +} + +static struct netdev_queue_attribute queue_tx_maxrate = + __ATTR(tx_maxrate, S_IRUGO | S_IWUSR, + show_tx_maxrate, set_tx_maxrate); +#endif + static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); @@ -1065,18 +1119,6 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static unsigned int get_netdev_queue_index(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - unsigned int i; - - i = queue - dev->_tx; - BUG_ON(i >= dev->num_tx_queues); - - return i; -} - - static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { @@ -1153,6 +1195,7 @@ static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, + &queue_tx_maxrate.attr, #endif NULL }; -- cgit v1.3-6-gb490 From fc31e2560a2443410fe45c27116fae736541a7b5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 18 Mar 2015 14:57:34 +0200 Subject: net/mlx4_core: Add basic support for QP max-rate limiting Add the low-level device commands and definitions used for QP max-rate limiting. This is done through the following elements: - read rate-limit device caps in QUERY_DEV_CAP: number of different rates and the min/max rates in Kbs/Mbs/Gbs units - enhance the QP context struct to contain rate limit units and value - allow to do run time rate-limit setting to QPs through the update-qp firmware command - QP rate-limiting is disallowed for VFs Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 33 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ drivers/net/ethernet/mellanox/mlx4/qp.c | 5 ++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 6 +++- include/linux/mlx4/device.h | 17 +++++++++++ include/linux/mlx4/qp.h | 14 ++++++--- 7 files changed, 72 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 242bcee5d774..4a471f5d1b56 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -144,7 +144,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [19] = "Performance optimized for limited rule configuration flow steering support", [20] = "Recoverable error events support", [21] = "Port Remap support", - [22] = "QCN support" + [22] = "QCN support", + [23] = "QP rate limiting support" }; int i; @@ -697,6 +698,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac +#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc +#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0 +#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2 + dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -904,6 +909,18 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET); dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + dev_cap->rl_caps.num_rates = size; + if (dev_cap->rl_caps.num_rates) { + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET); + dev_cap->rl_caps.max_val = size & 0xfff; + dev_cap->rl_caps.max_unit = size >> 14; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET); + dev_cap->rl_caps.min_val = size & 0xfff; + dev_cap->rl_caps.min_unit = size >> 14; + } + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; @@ -979,6 +996,15 @@ void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->dmfs_high_rate_qpn_base); mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n", dev_cap->dmfs_high_rate_qpn_range); + + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) { + struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps; + + mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n", + rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val, + rl_caps->min_unit, rl_caps->min_val); + } + dump_dev_cap_flags(dev, dev_cap->flags); dump_dev_cap_flags2(dev, dev_cap->flags2); } @@ -1075,6 +1101,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, u64 flags; int err = 0; u8 field; + u16 field16; u32 bmme_flags, field32; int real_port; int slave_port; @@ -1158,6 +1185,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0xfe; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + /* turn off QP max-rate limiting for guests */ + field16 = 0; + MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index f44f7f6017ed..863655bd3947 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -127,6 +127,7 @@ struct mlx4_dev_cap { u32 max_counters; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + struct mlx4_rate_limit_caps rl_caps; struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7e487223489a..43aa76775b5f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -489,6 +489,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE; } + dev->caps.rl_caps = dev_cap->rl_caps; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] = dev->caps.dmfs_high_rate_qpn_range; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index eda29dbbfcd2..69e4462e4ee4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -442,6 +442,11 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN); } + if (attr & MLX4_UPDATE_QP_RATE_LIMIT) { + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_RATE_LIMIT; + cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val); + } + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); cmd->qp_mask = cpu_to_be64(qp_mask); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d43e25914d19..c258f8625aac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2947,8 +2947,12 @@ static int verify_qp_parameters(struct mlx4_dev *dev, qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); - if (slave != mlx4_master_func_num(dev)) + if (slave != mlx4_master_func_num(dev)) { qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + /* setting QP rate-limit is disallowed for VFs */ + if (qp_ctx->rate_limit_params) + return -EPERM; + } switch (qp_type) { case MLX4_QP_ST_RC: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1cc54822b931..4550c67b92e4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,6 +205,7 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 }; enum { @@ -450,6 +451,21 @@ enum mlx4_module_id { MLX4_MODULE_ID_QSFP28 = 0x11, }; +enum { /* rl */ + MLX4_QP_RATE_LIMIT_NONE = 0, + MLX4_QP_RATE_LIMIT_KBS = 1, + MLX4_QP_RATE_LIMIT_MBS = 2, + MLX4_QP_RATE_LIMIT_GBS = 3 +}; + +struct mlx4_rate_limit_caps { + u16 num_rates; /* Number of different rates */ + u8 min_unit; + u16 min_val; + u8 max_unit; + u16 max_val; +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -565,6 +581,7 @@ struct mlx4_caps { u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; u32 vf_caps; + struct mlx4_rate_limit_caps rl_caps; }; struct mlx4_buf_list { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 551f85456c11..1023ebe035b7 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -207,14 +207,16 @@ struct mlx4_qp_context { __be32 msn; __be16 rq_wqe_counter; __be16 sq_wqe_counter; - u32 reserved3[2]; + u32 reserved3; + __be16 rate_limit_params; + __be16 reserved4; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; - u8 reserved4[2]; + u8 reserved5[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; - u32 reserved5[10]; + u32 reserved6[10]; }; struct mlx4_update_qp_context { @@ -229,6 +231,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; enum { @@ -428,7 +431,8 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 + MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 }; enum mlx4_update_qp_params_flags { @@ -438,6 +442,8 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; u32 flags; + u16 rate_unit; + u16 rate_val; }; int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, -- cgit v1.3-6-gb490 From 6eada0110c8984477f5f1e57a0b7f7b2fc841e30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Mar 2015 14:05:33 -0700 Subject: netns: constify net_hash_mix() and various callers const qualifiers ease code review by making clear which objects are not written in a function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 2 +- include/net/inet_hashtables.h | 6 +++--- include/net/netns/hash.h | 4 ++-- net/ipv4/devinet.c | 2 +- net/ipv4/inet_hashtables.c | 6 +++--- net/ipv4/ping.c | 6 +++--- net/ipv4/udp.c | 10 +++++----- net/ipv6/inet6_hashtables.c | 10 +++++----- net/ipv6/udp.c | 16 ++++++++-------- 9 files changed, 31 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 247cfdcc4b08..87c094961bd5 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -34,7 +34,7 @@ static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) -static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask) +static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { return (num + net_hash_mix(net)) & mask; } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index bcd64756e5fe..eb1963af0ebd 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -221,8 +221,8 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); -static inline int inet_bhashfn(struct net *net, const __u16 lport, - const int bhash_size) +static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, + const u32 bhash_size) { return (lport + net_hash_mix(net)) & (bhash_size - 1); } @@ -231,7 +231,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum); /* These can have wildcards, don't try too hard. */ -static inline int inet_lhashfn(struct net *net, const unsigned short num) +static inline u32 inet_lhashfn(const struct net *net, const unsigned short num) { return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1); } diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h index c06ac58ca107..69a6715d9f3f 100644 --- a/include/net/netns/hash.h +++ b/include/net/netns/hash.h @@ -5,7 +5,7 @@ struct net; -static inline unsigned int net_hash_mix(struct net *net) +static inline u32 net_hash_mix(const struct net *net) { #ifdef CONFIG_NET_NS /* @@ -13,7 +13,7 @@ static inline unsigned int net_hash_mix(struct net *net) * always zeroed */ - return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT); + return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT); #else return 0; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5105759e4e00..375dc71b9a64 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -107,7 +107,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; -static u32 inet_addr_hash(struct net *net, __be32 addr) +static u32 inet_addr_hash(const struct net *net, __be32 addr) { u32 val = (__force u32) addr ^ net_hash_mix(net); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c28bca4cc15b..330a08bcd1c1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,9 +24,9 @@ #include #include -static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +static u32 inet_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 inet_ehash_secret __read_mostly; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index fd88f868776f..344e7cdfb8d4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -64,11 +64,11 @@ EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; -static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask) +static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { - int res = (num + net_hash_mix(net)) & mask; + u32 res = (num + net_hash_mix(net)) & mask; - pr_debug("hash(%d) = %d\n", num, res); + pr_debug("hash(%u) = %u\n", num, res); return res; } EXPORT_SYMBOL_GPL(ping_hash); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f27556e2158b..294af16633af 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -318,8 +318,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); } -static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, - unsigned int port) +static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, + unsigned int port) { return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } @@ -421,9 +421,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } -static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +static u32 udp_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 udp_ehash_secret __read_mostly; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 051dffb49c90..df7df99d1d7e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,11 +23,11 @@ #include #include -static unsigned int inet6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +static u32 inet6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 70568a4548e4..7fe0329c0d37 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,11 +53,11 @@ #include #include "udp_impl.h" -static unsigned int udp6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +static u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; @@ -104,9 +104,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } -static unsigned int udp6_portaddr_hash(struct net *net, - const struct in6_addr *addr6, - unsigned int port) +static u32 udp6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) { unsigned int hash, mix = net_hash_mix(net); -- cgit v1.3-6-gb490 From 54ff9ef36bdf84d469a098cbf8e2a103fbc77054 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 18 Mar 2015 14:50:43 -0300 Subject: ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join, drop} in favor of their inner __ ones, which doesn't grab rtnl. As these functions need to operate on a locked socket, we can't be grabbing rtnl by then. It's too late and doing so causes reversed locking. So this patch: - move rtnl handling to callers instead while already fixing some reversed locking situations, like on vxlan and ipvs code. - renames __ ones to not have the __ mark: __ip_mc_{join,leave}_group -> ip_mc_{join,leave}_group __ipv6_sock_mc_{join,drop} -> ipv6_sock_mc_{join,drop} Signed-off-by: Marcelo Ricardo Leitner Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 5 ++++- include/linux/igmp.h | 2 -- include/net/ipv6.h | 4 ---- net/ipv4/devinet.c | 4 ++-- net/ipv4/igmp.c | 41 ++++++++--------------------------------- net/ipv4/ip_sockglue.c | 21 +++++++++++++++------ net/ipv6/addrconf.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 21 +++++++++++++-------- net/ipv6/mcast.c | 30 +++--------------------------- net/netfilter/ipvs/ip_vs_sync.c | 2 ++ net/tipc/udp_media.c | 4 ++-- 11 files changed, 51 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 25d92d4fc625..8b8ca7492d56 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1097,7 +1097,6 @@ EXPORT_SYMBOL_GPL(vxlan_sock_release); /* Callback to update multicast group membership when first VNI on * multicast asddress is brought up - * Done as workqueue because ip_mc_join_group acquires RTNL. */ static void vxlan_igmp_join(struct work_struct *work) { @@ -1107,6 +1106,7 @@ static void vxlan_igmp_join(struct work_struct *work) union vxlan_addr *ip = &vxlan->default_dst.remote_ip; int ifindex = vxlan->default_dst.remote_ifindex; + rtnl_lock(); lock_sock(sk); if (ip->sa.sa_family == AF_INET) { struct ip_mreqn mreq = { @@ -1122,6 +1122,7 @@ static void vxlan_igmp_join(struct work_struct *work) #endif } release_sock(sk); + rtnl_unlock(); vxlan_sock_release(vs); dev_put(vxlan->dev); @@ -1136,6 +1137,7 @@ static void vxlan_igmp_leave(struct work_struct *work) union vxlan_addr *ip = &vxlan->default_dst.remote_ip; int ifindex = vxlan->default_dst.remote_ifindex; + rtnl_lock(); lock_sock(sk); if (ip->sa.sa_family == AF_INET) { struct ip_mreqn mreq = { @@ -1152,6 +1154,7 @@ static void vxlan_igmp_leave(struct work_struct *work) } release_sock(sk); + rtnl_unlock(); vxlan_sock_release(vs); dev_put(vxlan->dev); diff --git a/include/linux/igmp.h b/include/linux/igmp.h index b5a6470e686c..2c677afeea47 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,9 +111,7 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); -extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); -extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b7673065c074..e7ba9758a345 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -942,10 +942,6 @@ void ipv6_sysctl_unregister(void); int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); -int __ipv6_sock_mc_join(struct sock *sk, int ifindex, - const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); -int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, - const struct in6_addr *addr); #endif /* _NET_IPV6_H */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 375dc71b9a64..975ee5e30c64 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -560,9 +560,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) lock_sock(sk); if (join) - ret = __ip_mc_join_group(sk, &mreq); + ret = ip_mc_join_group(sk, &mreq); else - ret = __ip_mc_leave_group(sk, &mreq); + ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); return ret; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5cb1ef4ce292..ad3f866085de 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1850,7 +1850,10 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; } -int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) +/* Join a multicast group + */ + +int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) { __be32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml, *i; @@ -1898,20 +1901,6 @@ int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) done: return err; } -EXPORT_SYMBOL(__ip_mc_join_group); - -/* Join a multicast group - */ -int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) -{ - int ret; - - rtnl_lock(); - ret = __ip_mc_join_group(sk, imr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ip_mc_join_group); static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, @@ -1934,7 +1923,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } -int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) +int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; @@ -1979,18 +1968,6 @@ int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) out: return ret; } -EXPORT_SYMBOL(__ip_mc_leave_group); - -int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) -{ - int ret; - - rtnl_lock(); - ret = __ip_mc_leave_group(sk, imr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ip_mc_leave_group); int ip_mc_source(int add, int omode, struct sock *sk, struct @@ -2010,7 +1987,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; @@ -2124,9 +2101,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); done: - rtnl_unlock(); if (leavegroup) - return ip_mc_leave_group(sk, &imr); + err = ip_mc_leave_group(sk, &imr); return err; } @@ -2148,7 +2124,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; @@ -2210,7 +2186,6 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) pmc->sfmode = msf->imsf_fmode; err = 0; done: - rtnl_unlock(); if (leavegroup) err = ip_mc_leave_group(sk, &imr); return err; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5171709199f4..f6a0d54b308a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -541,9 +541,18 @@ static bool setsockopt_needs_rtnl(int optname) switch (optname) { case IP_ADD_MEMBERSHIP: case IP_ADD_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_MSFILTER: + case IP_UNBLOCK_SOURCE: + case MCAST_BLOCK_SOURCE: + case MCAST_MSFILTER: case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_UNBLOCK_SOURCE: return true; } return false; @@ -861,9 +870,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, } if (optname == IP_ADD_MEMBERSHIP) - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); else - err = __ip_mc_leave_group(sk, &mreq); + err = ip_mc_leave_group(sk, &mreq); break; } case IP_MSFILTER: @@ -928,7 +937,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; mreq.imr_address.s_addr = mreqs.imr_interface; mreq.imr_ifindex = 0; - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); if (err && err != -EADDRINUSE) break; omode = MCAST_INCLUDE; @@ -960,9 +969,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, mreq.imr_ifindex = greq.gr_interface; if (optname == MCAST_JOIN_GROUP) - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); else - err = __ip_mc_leave_group(sk, &mreq); + err = ip_mc_leave_group(sk, &mreq); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -1005,7 +1014,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, mreq.imr_multiaddr = psin->sin_addr; mreq.imr_address.s_addr = 0; mreq.imr_ifindex = greqs.gsr_interface; - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); if (err && err != -EADDRINUSE) break; greqs.gsr_interface = mreq.imr_ifindex; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 88d2cf0cae52..158378e73f0a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2473,9 +2473,9 @@ static int ipv6_mc_config(struct sock *sk, bool join, lock_sock(sk); if (join) - ret = __ipv6_sock_mc_join(sk, ifindex, addr); + ret = ipv6_sock_mc_join(sk, ifindex, addr); else - ret = __ipv6_sock_mc_drop(sk, ifindex, addr); + ret = ipv6_sock_mc_drop(sk, ifindex, addr); release_sock(sk); return ret; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f2b731df8d77..cc5883791bac 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -124,6 +124,11 @@ static bool setsockopt_needs_rtnl(int optname) case IPV6_DROP_MEMBERSHIP: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: return true; } return false; @@ -597,9 +602,9 @@ done: break; if (optname == IPV6_ADD_MEMBERSHIP) - retv = __ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else - retv = __ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } case IPV6_JOIN_ANYCAST: @@ -638,11 +643,11 @@ done: } psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) - retv = __ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); + retv = ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); else - retv = __ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); + retv = ipv6_sock_mc_drop(sk, greq.gr_interface, + &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -674,8 +679,8 @@ done: struct sockaddr_in6 *psin6; psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; - retv = __ipv6_sock_mc_join(sk, greqs.gsr_interface, - &psin6->sin6_addr); + retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, + &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1dd1fedff9f4..cbb66fd3da6d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -132,7 +132,7 @@ static int unsolicited_report_interval(struct inet6_dev *idev) return iv > 0 ? iv : 1; } -int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) +int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; struct ipv6_mc_socklist *mc_lst; @@ -199,24 +199,12 @@ int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *add return 0; } -EXPORT_SYMBOL(__ipv6_sock_mc_join); - -int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) -{ - int ret; - - rtnl_lock(); - ret = __ipv6_sock_mc_join(sk, ifindex, addr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ipv6_sock_mc_join); /* * socket leave on multicast group */ -int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) +int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; @@ -255,18 +243,6 @@ int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *add return -EADDRNOTAVAIL; } -EXPORT_SYMBOL(__ipv6_sock_mc_drop); - -int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) -{ - int ret; - - rtnl_lock(); - ret = __ipv6_sock_mc_drop(sk, ifindex, addr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ipv6_sock_mc_drop); /* called with rcu_read_lock() */ @@ -460,7 +436,7 @@ done: read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) - return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); + err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 08d95559b6f7..19b9cce6c210 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1405,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) mreq.imr_ifindex = dev->ifindex; + rtnl_lock(); lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); + rtnl_unlock(); return ret; } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index fc2fb11a354d..04836dd70c2b 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -246,11 +246,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) return 0; mreqn.imr_multiaddr = remote->ipv4; mreqn.imr_ifindex = ub->ifindex; - err = __ip_mc_join_group(sk, &mreqn); + err = ip_mc_join_group(sk, &mreqn); } else { if (!ipv6_addr_is_multicast(&remote->ipv6)) return 0; - err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); } return err; } -- cgit v1.3-6-gb490 From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Mar 2015 20:23:15 -0600 Subject: net: add support for phys_port_name Similar to port id allow netdevices to specify port names and export the name via sysfs. Drivers can implement the netdevice operation to assist udev in having sane default names for the devices using the rule: $ cat /etc/udev/rules.d/80-net-setup-link.rules SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="", NAME="$attr{phys_port_name}" Use of phys_name versus phys_id was suggested-by Jiri Pirko. Signed-off-by: David Ahern Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net | 8 ++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/dev.c | 18 ++++++++++++++++++ net/core/net-sysfs.c | 23 +++++++++++++++++++++++ net/core/rtnetlink.c | 21 +++++++++++++++++++++ 6 files changed, 75 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index beb8ec4dabbc..5ecfd72ba684 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -188,6 +188,14 @@ Description: Indicates the interface unique physical port identifier within the NIC, as a string. +What: /sys/class/net//phys_port_name +Date: March 2015 +KernelVersion: 4.0 +Contact: netdev@vger.kernel.org +Description: + Indicates the interface physical port name within the NIC, + as a string. + What: /sys/class/net//speed Date: October 2009 KernelVersion: 2.6.33 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76c5de4978a8..ec8f9b5f6500 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1164,6 +1164,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_phys_port_name)(struct net_device *dev, + char *name, size_t len); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); @@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 756436e1ce89..7158fd00a109 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -147,6 +147,7 @@ enum { IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 39fe369b46ad..a1f24151db5b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5911,6 +5911,24 @@ int dev_get_phys_port_id(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_id); +/** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7e58bd7ec232..cc5cf689809c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -418,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_port_id); +static ssize_t phys_port_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) { + char name[IFNAMSIZ]; + + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sprintf(buf, "%s\n", name); + } + rtnl_unlock(); + + return ret; +} +static DEVICE_ATTR_RO(phys_port_name); + static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -465,6 +487,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, NULL, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25b4b5d23485..6abe634c666c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) +{ + char name[IFNAMSIZ]; + int err; + + err = dev_get_phys_port_name(dev, name, sizeof(name)); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; @@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_port_name_fill(skb, dev)) + goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; -- cgit v1.3-6-gb490 From 99c4a26a159b28fa46a3e746a9b41b297e73d261 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Mar 2015 22:52:33 -0400 Subject: net: Fix high overhead of vlan sub-device teardown. When a networking device is taken down that has a non-trivial number of VLAN devices configured under it, we eat a full synchronize_net() for every such VLAN device. This is because of the call chain: NETDEV_DOWN notifier --> vlan_device_event() --> dev_change_flags() --> __dev_change_flags() --> __dev_close() --> __dev_close_many() --> dev_deactivate_many() --> synchronize_net() This is kind of rediculous because we already have infrastructure for batching doing operation X to a list of net devices so that we only incur one sync. So make use of that by exporting dev_close_many() and adjusting it's interfaace so that the caller can fully manage the batch list. Use this in vlan_device_event() and all the overhead goes away. Reported-by: Salam Noureddine Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/8021q/vlan.c | 16 +++++++++++++--- net/core/dev.c | 10 ++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec8f9b5f6500..76951c5fbedf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2156,6 +2156,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); +int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 64c6bed4a3d3..98a30a5b8664 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -413,7 +413,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan_transfer_features(dev, vlandev); break; - case NETDEV_DOWN: + case NETDEV_DOWN: { + struct net_device *tmp; + LIST_HEAD(close_list); + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); @@ -425,11 +428,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) - dev_change_flags(vlandev, flgs & ~IFF_UP); + list_add(&vlandev->close_list, &close_list); + } + + dev_close_many(&close_list, false); + + list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { netif_stacked_transfer_operstate(dev, vlandev); + list_del_init(&vlandev->close_list); } + list_del(&close_list); break; - + } case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { diff --git a/net/core/dev.c b/net/core/dev.c index a1f24151db5b..5d43e010ef87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev) return retval; } -static int dev_close_many(struct list_head *head) +int dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head) list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); + if (unlink) + list_del_init(&dev->close_list); } return 0; } +EXPORT_SYMBOL(dev_close_many); /** * dev_close - shutdown an interface. @@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single); + dev_close_many(&single, true); list_del(&single); } return 0; @@ -5986,7 +5988,7 @@ static void rollback_registered_many(struct list_head *head) /* If device is running, close it first. */ list_for_each_entry(dev, head, unreg_list) list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ -- cgit v1.3-6-gb490 From 5067c0469c643512f24786990e315f9c15cc7d24 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 12 Mar 2015 10:32:18 -0700 Subject: ata: Add a new flag to destinguish sas controller SAS controller has its own tag allocation, which doesn't directly match to ATA tag, so SAS and SATA have different code path for ata tags. Originally we use port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set ->scsi_host too, so we can't use it for the destinguish, we add a new flag for this purpose. Without this patch, the following oops can happen because scsi-mq uses a host-wide tag map shared among all devices with some integer tag values >= ATA_MAX_QUEUE. These unexpectedly high tag values cause __ata_qc_from_tag() to return NULL, which is then dereferenced in ata_qc_new_init(). BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [] ata_qc_new_init+0x3e/0x120 PGD 32adf0067 PUD 32adf1067 PMD 0 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp eeprom w83795 i2c_i801 CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1 Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b 05/04/12 task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000 RIP: 0010:[] [] ata_qc_new_init+0x3e/0x120 RSP: 0018:ffff88032a067858 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230 RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000 R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000 FS: 0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0 Stack: ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978 ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909 Call Trace: [] ata_scsi_translate+0x3d/0x1b0 [] ata_sas_queuecmd+0x149/0x2a0 [] sas_queuecommand+0xa0/0x1f0 [libsas] [] scsi_dispatch_cmd+0xd4/0x1a0 [] scsi_queue_rq+0x66f/0x7f0 [] __blk_mq_run_hw_queue+0x208/0x3f0 [] blk_mq_run_hw_queue+0x88/0xc0 [] blk_mq_insert_request+0xc4/0x130 [] blk_execute_rq_nowait+0x73/0x160 [] sg_common_write+0x3da/0x720 [sg] [] sg_new_write+0x250/0x360 [sg] [] sg_write+0x13b/0x450 [sg] [] vfs_write+0xd1/0x1b0 [] SyS_write+0x54/0xc0 [] system_call_fastpath+0x12/0x17 tj: updated description. Fixes: 12cb5ce101ab ("libata: use blk taging") Reported-and-tested-by: Tony Battersby Signed-off-by: Shaohua Li Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 4 ++-- drivers/scsi/ipr.c | 3 ++- drivers/scsi/libsas/sas_ata.c | 3 ++- include/linux/libata.h | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4c35f0822d06..ef150ebb4c30 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return NULL; /* libsas case */ - if (!ap->scsi_host) { + if (ap->flags & ATA_FLAG_SAS_HOST) { tag = ata_sas_allocate_tag(ap); if (tag < 0) return NULL; @@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) tag = qc->tag; if (likely(ata_tag_valid(tag))) { qc->tag = ATA_TAG_POISON; - if (!ap->scsi_host) + if (ap->flags & ATA_FLAG_SAS_HOST) ata_sas_free_tag(tag, ap); } } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 9219953ee949..d9afc51af7d3 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4_ONLY, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 932d9cc98d2f..9c706d8c1441 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/include/linux/libata.h b/include/linux/libata.h index fc03efa64ffe..6b08cc106c21 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -232,6 +232,7 @@ enum { * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ + ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.3-6-gb490 From 25911556283e5093fca235d7ba03faae7ed5dbf2 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 18 Mar 2015 13:25:26 +0100 Subject: brcmfmac: add support for BCM43430 SDIO chipset This patch added support for the BCM43430 802.11n SDIO chipset. Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 1 + drivers/net/wireless/brcm80211/brcmfmac/chip.c | 18 ++++++++++++++++++ drivers/net/wireless/brcm80211/brcmfmac/sdio.c | 5 +++++ drivers/net/wireless/brcm80211/include/brcm_hw_ids.h | 1 + drivers/net/wireless/brcm80211/include/chipcommon.h | 9 ++++++++- include/linux/mmc/sdio_ids.h | 1 + 6 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index 9667b6aabc00..9b508bd3b839 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -1098,6 +1098,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354), { /* end: all zeroes */ } diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c index 734172570054..ab2fac8b2760 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c @@ -600,6 +600,12 @@ static void brcmf_chip_socram_ramsize(struct brcmf_core_priv *sr, u32 *ramsize, if (sr->chip->pub.chiprev < 2) *srsize = (32 * 1024); break; + case BRCM_CC_43430_CHIP_ID: + /* assume sr for now as we can not check + * firmware sr capability at this point. + */ + *srsize = (64 * 1024); + break; default: break; } @@ -1072,6 +1078,7 @@ static void brcmf_chip_cm3_set_passive(struct brcmf_chip_priv *chip) { struct brcmf_core *core; + struct brcmf_core_priv *sr; brcmf_chip_disable_arm(chip, BCMA_CORE_ARM_CM3); core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_80211); @@ -1081,6 +1088,13 @@ brcmf_chip_cm3_set_passive(struct brcmf_chip_priv *chip) D11_BCMA_IOCTL_PHYCLOCKEN); core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_INTERNAL_MEM); brcmf_chip_resetcore(core, 0, 0, 0); + + /* disable bank #3 remap for this device */ + if (chip->pub.chip == BRCM_CC_43430_CHIP_ID) { + sr = container_of(core, struct brcmf_core_priv, pub); + brcmf_chip_core_write32(sr, SOCRAMREGOFFS(bankidx), 3); + brcmf_chip_core_write32(sr, SOCRAMREGOFFS(bankpda), 0); + } } static bool brcmf_chip_cm3_set_active(struct brcmf_chip_priv *chip) @@ -1188,6 +1202,10 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub) addr = CORE_CC_REG(base, chipcontrol_data); reg = chip->ops->read32(chip->ctx, addr); return (reg & pmu_cc3_mask) != 0; + case BRCM_CC_43430_CHIP_ID: + addr = CORE_CC_REG(base, sr_control1); + reg = chip->ops->read32(chip->ctx, addr); + return reg != 0; default: addr = CORE_CC_REG(base, pmucapabilities_ext); reg = chip->ops->read32(chip->ctx, addr); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c index 38fa0e8f859d..e162860b265c 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c @@ -615,6 +615,8 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = { #define BCM43362_NVRAM_NAME "brcm/brcmfmac43362-sdio.txt" #define BCM4339_FIRMWARE_NAME "brcm/brcmfmac4339-sdio.bin" #define BCM4339_NVRAM_NAME "brcm/brcmfmac4339-sdio.txt" +#define BCM43430_FIRMWARE_NAME "brcm/brcmfmac43430-sdio.bin" +#define BCM43430_NVRAM_NAME "brcm/brcmfmac43430-sdio.txt" #define BCM4345_FIRMWARE_NAME "brcm/brcmfmac4345-sdio.bin" #define BCM4345_NVRAM_NAME "brcm/brcmfmac4345-sdio.txt" #define BCM4354_FIRMWARE_NAME "brcm/brcmfmac4354-sdio.bin" @@ -640,6 +642,8 @@ MODULE_FIRMWARE(BCM43362_FIRMWARE_NAME); MODULE_FIRMWARE(BCM43362_NVRAM_NAME); MODULE_FIRMWARE(BCM4339_FIRMWARE_NAME); MODULE_FIRMWARE(BCM4339_NVRAM_NAME); +MODULE_FIRMWARE(BCM43430_FIRMWARE_NAME); +MODULE_FIRMWARE(BCM43430_NVRAM_NAME); MODULE_FIRMWARE(BCM4345_FIRMWARE_NAME); MODULE_FIRMWARE(BCM4345_NVRAM_NAME); MODULE_FIRMWARE(BCM4354_FIRMWARE_NAME); @@ -671,6 +675,7 @@ static const struct brcmf_firmware_names brcmf_fwname_data[] = { { BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4335) }, { BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, BRCMF_FIRMWARE_NVRAM(BCM43362) }, { BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) }, + { BRCM_CC_43430_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM43430) }, { BRCM_CC_4345_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4345) }, { BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4354) } }; diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h index b599e7e41148..4efdd51af9c8 100644 --- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h @@ -37,6 +37,7 @@ #define BRCM_CC_43362_CHIP_ID 43362 #define BRCM_CC_4335_CHIP_ID 0x4335 #define BRCM_CC_4339_CHIP_ID 0x4339 +#define BRCM_CC_43430_CHIP_ID 43430 #define BRCM_CC_4345_CHIP_ID 0x4345 #define BRCM_CC_4354_CHIP_ID 0x4354 #define BRCM_CC_4356_CHIP_ID 0x4356 diff --git a/drivers/net/wireless/brcm80211/include/chipcommon.h b/drivers/net/wireless/brcm80211/include/chipcommon.h index d242333b7559..e1fd499930a0 100644 --- a/drivers/net/wireless/brcm80211/include/chipcommon.h +++ b/drivers/net/wireless/brcm80211/include/chipcommon.h @@ -183,7 +183,14 @@ struct chipcregs { u8 uart1lsr; u8 uart1msr; u8 uart1scratch; - u32 PAD[126]; + u32 PAD[62]; + + /* save/restore, corerev >= 48 */ + u32 sr_capability; /* 0x500 */ + u32 sr_control0; /* 0x504 */ + u32 sr_control1; /* 0x508 */ + u32 gpio_control; /* 0x50C */ + u32 PAD[60]; /* PMU registers (corerev >= 20) */ u32 pmucontrol; /* 0x600 */ diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 91397858dc95..83430f2ea757 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 -- cgit v1.3-6-gb490 From 488fb86ee91d3b1182c2e30a9f9b45da14eda46f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:56:59 +1100 Subject: rhashtable: Make rhashtable_init params argument const This patch marks the rhashtable_init params argument const as there is no reason to modify it since we will always make a copy of it in the rhashtable. This patch also fixes a bug where we don't actually round up the value of min_size unless it is less than HASH_MIN_SIZE. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- lib/rhashtable.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99425f2be708..c85363c45fc0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -181,7 +181,8 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e75c48d9d82f..e0a9d59f80d6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -870,7 +870,7 @@ out: } EXPORT_SYMBOL_GPL(rhashtable_walk_stop); -static size_t rounded_hashtable_size(struct rhashtable_params *params) +static size_t rounded_hashtable_size(const struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), (unsigned long)params->min_size); @@ -919,7 +919,8 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * .obj_hashfn = my_hash_fn, * }; */ -int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; @@ -946,7 +947,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (params->max_size) ht->p.max_size = rounddown_pow_of_two(params->max_size); - ht->p.min_size = max(params->min_size, HASH_MIN_SIZE); + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); -- cgit v1.3-6-gb490 From 02fd97c3d4a8a14e222b0021c366db7041d28743 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:00 +1100 Subject: rhashtable: Allow hash/comparison functions to be inlined This patch deals with the complaint that we make indirect function calls on the fast paths unnecessarily in rhashtable. We resolve it by moving the fast paths into inline functions that take struct rhashtable_param (which obviously must be the same set of parameters supplied to rhashtable_init) as an argument. The only remaining indirect call is to obj_hashfn (or key_hashfn it obj_hashfn is unset) on the rehash as well as the insert-during- rehash slow path. This patch also extends the support of vairable-length keys to include those where the key is fixed but scattered in the object. For example, in netlink we want to key off the namespace and the portid but they're not next to each other. This patch does this by directly using the object hash function as the indicator of whether the key is accessible or not. It also adds a new function obj_cmpfn to compare a key against an object. This means that the caller no longer needs to supply explicit compare functions. All this is done in a backwards compatible manner so no existing users are affected until they convert to the new interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 386 +++++++++++++++++++++++++++++++++++++++++++++ lib/rhashtable.c | 163 ++++++------------- 2 files changed, 436 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c85363c45fc0..a7188eeb135b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -22,6 +22,7 @@ #include #include #include +#include /* * The end of the chain is marked with a special nulls marks which has @@ -42,6 +43,9 @@ #define RHT_HASH_BITS 27 #define RHT_BASE_SHIFT RHT_HASH_BITS +/* Base bits plus 1 bit for nulls marker */ +#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) + struct rhash_head { struct rhash_head __rcu *next; }; @@ -72,8 +76,20 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); struct rhashtable; @@ -89,6 +105,7 @@ struct rhashtable; * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object */ struct rhashtable_params { size_t nelem_hint; @@ -101,6 +118,7 @@ struct rhashtable_params { size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; }; /** @@ -165,6 +183,83 @@ static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) return ((unsigned long) ptr) >> 1; } +static inline void *rht_obj(const struct rhashtable *ht, + const struct rhash_head *he) +{ + return (char *)he - ht->p.head_offset; +} + +static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, + unsigned int hash) +{ + return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)); +} + +static inline unsigned int rht_head_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const struct rhash_head *he, const struct rhashtable_params params) +{ + const char *ptr = rht_obj(ht, he); + + return likely(params.obj_hashfn) ? + rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_75(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Expand table when exceeding 75% load */ + return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_shrink_below_30(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Shrink table beneath 30% load */ + return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && + tbl->size > ht->p.min_size; +} + +/* The bucket lock is selected based on the hash and protects mutations + * on a group of hash buckets. + * + * A maximum of tbl->size/2 bucket locks is allocated. This ensures that + * a single lock always covers both buckets which may both contains + * entries which link to the same bucket of the old table during resizing. + * This allows to simplify the locking as locking the bucket in both + * tables during resize always guarantee protection. + * + * IMPORTANT: When holding the bucket lock of both the old and new table + * during expansions and shrinking, the old bucket lock must always be + * acquired first. + */ +static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, + unsigned int hash) +{ + return &tbl->locks[hash & tbl->locks_mask]; +} + #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -184,6 +279,9 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj, + struct bucket_table *old_tbl); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); @@ -356,4 +454,292 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + const char *ptr = obj; + + return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); +} + +/** + * rhashtable_lookup_fast - search hash table, inlined version + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + const struct bucket_table *tbl; + struct rhash_head *he; + unsigned hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); +restart: + hash = rht_key_hashfn(ht, tbl, key, params); + rht_for_each_rcu(he, tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + rcu_read_unlock(); + return rht_obj(ht, he); + } + + /* Ensure we see any new tables. */ + smp_rmb(); + + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) + goto restart; + rcu_read_unlock(); + + return NULL; +} + +static inline int __rhashtable_insert_fast( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + int err = -EEXIST; + struct bucket_table *tbl, *new_tbl; + struct rhash_head *head; + spinlock_t *lock; + unsigned hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + /* Because we have already taken the bucket lock in tbl, + * if we find that future_tbl is not yet visible then + * that guarantees all other insertions of the same entry + * will also grab the bucket lock in tbl because until + * the rehash completes ht->tbl won't be changed. + */ + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(new_tbl)) { + err = rhashtable_insert_slow(ht, key, obj, new_tbl); + goto out; + } + + if (!key) + goto skip_lookup; + + rht_for_each(head, tbl, hash) { + if (unlikely(!(params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head))))) + goto out; + } + +skip_lookup: + err = 0; + + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); + +out: + spin_unlock_bh(lock); + rcu_read_unlock(); + + return err; +} + +/** + * rhashtable_insert_fast - insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_insert_fast(ht, NULL, obj, params); +} + +/** + * rhashtable_lookup_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * This lookup function may only be used for fixed key hash table (key_len + * parameter set). It will BUG() if used inappropriately. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_lookup_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, + params); +} + +/** + * rhashtable_lookup_insert_key - search and insert object to hash table + * with explicit key + * @ht: hash table + * @key: key + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + * + * Returns zero on success. + */ +static inline int rhashtable_lookup_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + BUG_ON(!ht->p.obj_hashfn || !key); + + return __rhashtable_insert_fast(ht, key, obj, params); +} + +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj, const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t * lock; + unsigned hash; + int err = -ENOENT; + + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(*pprev, obj->next); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + if (err) + goto out; + + atomic_dec(&ht->nelems); + if (rht_shrink_below_30(ht, tbl)) + schedule_work(&ht->run_work); + +out: + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e0a9d59f80d6..d1d23fb58525 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1,13 +1,13 @@ /* * Resizable, Scalable, Concurrent Hash Table * + * Copyright (c) 2015 Herbert Xu * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * - * Based on the following paper: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -30,53 +30,11 @@ #define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 128UL -/* Base bits plus 1 bit for nulls marker */ -#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) - -/* The bucket lock is selected based on the hash and protects mutations - * on a group of hash buckets. - * - * A maximum of tbl->size/2 bucket locks is allocated. This ensures that - * a single lock always covers both buckets which may both contains - * entries which link to the same bucket of the old table during resizing. - * This allows to simplify the locking as locking the bucket in both - * tables during resize always guarantee protection. - * - * IMPORTANT: When holding the bucket lock of both the old and new table - * during expansions and shrinking, the old bucket lock must always be - * acquired first. - */ -static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash) -{ - return &tbl->locks[hash & tbl->locks_mask]; -} - -static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) -{ - return (void *) he - ht->p.head_offset; -} - -static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) -{ - return (hash >> HASH_RESERVED_SPACE) & (tbl->size - 1); -} - -static u32 key_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, - const void *key) -{ - return rht_bucket_index(tbl, ht->p.hashfn(key, ht->p.key_len, - tbl->hash_rnd)); -} - static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) { - const char *ptr = rht_obj(ht, he); - - return likely(ht->p.key_len) ? - key_hashfn(ht, tbl, ptr + ht->p.key_offset) : - rht_bucket_index(tbl, ht->p.obj_hashfn(ptr, tbl->hash_rnd)); + return rht_head_hashfn(ht, tbl, he, ht->p); } #ifdef CONFIG_PROVE_LOCKING @@ -90,7 +48,7 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { - spinlock_t *lock = bucket_lock(tbl, hash); + spinlock_t *lock = rht_bucket_lock(tbl, hash); return (debug_locks) ? lockdep_is_held(lock) : 1; } @@ -178,32 +136,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return tbl; } -/** - * rht_grow_above_75 - returns true if nelems > 0.75 * table-size - * @ht: hash table - * @tbl: current table - */ -static bool rht_grow_above_75(const struct rhashtable *ht, - const struct bucket_table *tbl) -{ - /* Expand table when exceeding 75% load */ - return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && - (!ht->p.max_size || tbl->size < ht->p.max_size); -} - -/** - * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size - * @ht: hash table - * @tbl: current table - */ -static bool rht_shrink_below_30(const struct rhashtable *ht, - const struct bucket_table *tbl) -{ - /* Shrink table beneath 30% load */ - return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && - tbl->size > ht->p.min_size; -} - static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); @@ -230,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) new_hash = head_hashfn(ht, new_tbl, entry); - new_bucket_lock = bucket_lock(new_tbl, new_hash); + new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); head = rht_dereference_bucket(new_tbl->buckets[new_hash], @@ -255,7 +187,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); spinlock_t *old_bucket_lock; - old_bucket_lock = bucket_lock(old_tbl, old_hash); + old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); spin_lock_bh(old_bucket_lock); while (!rhashtable_rehash_one(ht, old_hash)) @@ -376,6 +308,37 @@ unlock: mutex_unlock(&ht->mutex); } +int rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj, + struct bucket_table *tbl) +{ + struct rhash_head *head; + unsigned hash; + int err = -EEXIST; + + hash = head_hashfn(ht, tbl, obj); + spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + + if (key && rhashtable_lookup_fast(ht, key, ht->p)) + goto exit; + + err = 0; + + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + +exit: + spin_unlock(rht_bucket_lock(tbl, hash)); + + return err; +} +EXPORT_SYMBOL_GPL(rhashtable_insert_slow); + static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, bool (*compare)(void *, void *), void *arg) { @@ -390,7 +353,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, old_tbl = rht_dereference_rcu(ht->tbl, ht); hash = head_hashfn(ht, old_tbl, obj); - old_lock = bucket_lock(old_tbl, hash); + old_lock = rht_bucket_lock(old_tbl, hash); spin_lock_bh(old_lock); @@ -403,7 +366,8 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; if (tbl != old_tbl) { hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + spin_lock_nested(rht_bucket_lock(tbl, hash), + SINGLE_DEPTH_NESTING); } if (compare && @@ -430,7 +394,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, exit: if (tbl != old_tbl) - spin_unlock(bucket_lock(tbl, hash)); + spin_unlock(rht_bucket_lock(tbl, hash)); spin_unlock_bh(old_lock); @@ -471,7 +435,7 @@ static bool __rhashtable_remove(struct rhashtable *ht, bool ret = false; hash = head_hashfn(ht, tbl, obj); - lock = bucket_lock(tbl, hash); + lock = rht_bucket_lock(tbl, hash); spin_lock_bh(lock); @@ -537,19 +501,6 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) } EXPORT_SYMBOL_GPL(rhashtable_remove); -struct rhashtable_compare_arg { - struct rhashtable *ht; - const void *key; -}; - -static bool rhashtable_compare(void *ptr, void *arg) -{ - struct rhashtable_compare_arg *x = arg; - struct rhashtable *ht = x->ht; - - return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len); -} - /** * rhashtable_lookup - lookup key in hash table * @ht: hash table @@ -565,14 +516,7 @@ static bool rhashtable_compare(void *ptr, void *arg) */ void *rhashtable_lookup(struct rhashtable *ht, const void *key) { - struct rhashtable_compare_arg arg = { - .ht = ht, - .key = key, - }; - - BUG_ON(!ht->p.key_len); - - return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg); + return rhashtable_lookup_fast(ht, key, ht->p); } EXPORT_SYMBOL_GPL(rhashtable_lookup); @@ -591,7 +535,8 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup); * Returns the first entry on which the compare function returned true. */ void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, - bool (*compare)(void *, void *), void *arg) + bool (*compare)(void *, void *), + void *arg) { const struct bucket_table *tbl; struct rhash_head *he; @@ -601,7 +546,7 @@ void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, tbl = rht_dereference_rcu(ht->tbl, ht); restart: - hash = key_hashfn(ht, tbl, key); + hash = rht_key_hashfn(ht, tbl, key, ht->p); rht_for_each_rcu(he, tbl, hash) { if (!compare(rht_obj(ht, he), arg)) continue; @@ -643,15 +588,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); */ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) { - struct rhashtable_compare_arg arg = { - .ht = ht, - .key = rht_obj(ht, obj) + ht->p.key_offset, - }; - - BUG_ON(!ht->p.key_len); - - return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, - &arg); + return rhashtable_lookup_insert_fast(ht, obj, ht->p); } EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); @@ -927,8 +864,8 @@ int rhashtable_init(struct rhashtable *ht, size = HASH_DEFAULT_SIZE; - if ((params->key_len && !params->hashfn) || - (!params->key_len && !params->obj_hashfn)) + if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) || + (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) -- cgit v1.3-6-gb490 From dc0ee268d85026530720d8c874716287b7ede25b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:06 +1100 Subject: rhashtable: Rip out obsolete out-of-line interface Now that all rhashtable users have been converted over to the inline interface, this patch removes the unused out-of-line interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +-- lib/rhashtable.c | 284 --------------------------------------------- 2 files changed, 3 insertions(+), 300 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a7188eeb135b..c3034de2c235 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,14 +1,13 @@ /* * Resizable, Scalable, Concurrent Hash Table * + * Copyright (c) 2015 Herbert Xu * Copyright (c) 2014 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * - * Based on the following paper by Josh Triplett, Paul E. McKenney - * and Jonathan Walpole: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -282,22 +281,10 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); -void *rhashtable_lookup(struct rhashtable *ht, const void *key); -void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, - bool (*compare)(void *, void *), void *arg); - -bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); -bool rhashtable_lookup_compare_insert(struct rhashtable *ht, - struct rhash_head *obj, - bool (*compare)(void *, void *), - void *arg); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index d1d23fb58525..83cfedd6612a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -339,290 +339,6 @@ exit: } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); -static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - bool (*compare)(void *, void *), void *arg) -{ - struct bucket_table *tbl, *old_tbl; - struct rhash_head *head; - bool no_resize_running; - unsigned hash; - spinlock_t *old_lock; - bool success = true; - - rcu_read_lock(); - - old_tbl = rht_dereference_rcu(ht->tbl, ht); - hash = head_hashfn(ht, old_tbl, obj); - old_lock = rht_bucket_lock(old_tbl, hash); - - spin_lock_bh(old_lock); - - /* Because we have already taken the bucket lock in old_tbl, - * if we find that future_tbl is not yet visible then that - * guarantees all other insertions of the same entry will - * also grab the bucket lock in old_tbl because until the - * rehash completes ht->tbl won't be changed. - */ - tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; - if (tbl != old_tbl) { - hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(rht_bucket_lock(tbl, hash), - SINGLE_DEPTH_NESTING); - } - - if (compare && - rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, - compare, arg)) { - success = false; - goto exit; - } - - no_resize_running = tbl == old_tbl; - - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); - - if (rht_is_a_nulls(head)) - INIT_RHT_NULLS_HEAD(obj->next, ht, hash); - else - RCU_INIT_POINTER(obj->next, head); - - rcu_assign_pointer(tbl->buckets[hash], obj); - - atomic_inc(&ht->nelems); - if (no_resize_running && rht_grow_above_75(ht, tbl)) - schedule_work(&ht->run_work); - -exit: - if (tbl != old_tbl) - spin_unlock(rht_bucket_lock(tbl, hash)); - - spin_unlock_bh(old_lock); - - rcu_read_unlock(); - - return success; -} - -/** - * rhashtable_insert - insert object into hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Will take a per bucket spinlock to protect against mutual mutations - * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. - * - * It is safe to call this function from atomic context. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) -{ - __rhashtable_insert(ht, obj, NULL, NULL); -} -EXPORT_SYMBOL_GPL(rhashtable_insert); - -static bool __rhashtable_remove(struct rhashtable *ht, - struct bucket_table *tbl, - struct rhash_head *obj) -{ - struct rhash_head __rcu **pprev; - struct rhash_head *he; - spinlock_t * lock; - unsigned hash; - bool ret = false; - - hash = head_hashfn(ht, tbl, obj); - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); - - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { - if (he != obj) { - pprev = &he->next; - continue; - } - - rcu_assign_pointer(*pprev, obj->next); - ret = true; - break; - } - - spin_unlock_bh(lock); - - return ret; -} - -/** - * rhashtable_remove - remove object from hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Since the hash chain is single linked, the removal operation needs to - * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. - * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. - * - * The caller must ensure that no concurrent table mutations occur. It is - * however valid to have concurrent lookups if they are RCU protected. - */ -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) -{ - struct bucket_table *tbl; - bool ret; - - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Because we have already taken (and released) the bucket - * lock in old_tbl, if we find that future_tbl is not yet - * visible then that guarantees the entry to still be in - * the old tbl if it exists. - */ - while (!(ret = __rhashtable_remove(ht, tbl, obj)) && - (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) - ; - - if (ret) { - atomic_dec(&ht->nelems); - if (rht_shrink_below_30(ht, tbl)) - schedule_work(&ht->run_work); - } - - rcu_read_unlock(); - - return ret; -} -EXPORT_SYMBOL_GPL(rhashtable_remove); - -/** - * rhashtable_lookup - lookup key in hash table - * @ht: hash table - * @key: pointer to key - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * This lookup function may only be used for fixed key hash table (key_len - * parameter set). It will BUG() if used inappropriately. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - */ -void *rhashtable_lookup(struct rhashtable *ht, const void *key) -{ - return rhashtable_lookup_fast(ht, key, ht->p); -} -EXPORT_SYMBOL_GPL(rhashtable_lookup); - -/** - * rhashtable_lookup_compare - search hash table with compare function - * @ht: hash table - * @key: the pointer to the key - * @compare: compare function, must return true on match - * @arg: argument passed on to compare function - * - * Traverses the bucket chain behind the provided hash value and calls the - * specified compare function for each entry. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - * - * Returns the first entry on which the compare function returned true. - */ -void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, - bool (*compare)(void *, void *), - void *arg) -{ - const struct bucket_table *tbl; - struct rhash_head *he; - u32 hash; - - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->tbl, ht); -restart: - hash = rht_key_hashfn(ht, tbl, key, ht->p); - rht_for_each_rcu(he, tbl, hash) { - if (!compare(rht_obj(ht, he), arg)) - continue; - rcu_read_unlock(); - return rht_obj(ht, he); - } - - /* Ensure we see any new tables. */ - smp_rmb(); - - tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (unlikely(tbl)) - goto restart; - rcu_read_unlock(); - - return NULL; -} -EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); - -/** - * rhashtable_lookup_insert - lookup and insert object into hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * - * This lookup function may only be used for fixed key hash table (key_len - * parameter set). It will BUG() if used inappropriately. - * - * It is safe to call this function from atomic context. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) -{ - return rhashtable_lookup_insert_fast(ht, obj, ht->p); -} -EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); - -/** - * rhashtable_lookup_compare_insert - search and insert object to hash table - * with compare function - * @ht: hash table - * @obj: pointer to hash head inside object - * @compare: compare function, must return true on match - * @arg: argument passed on to compare function - * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -bool rhashtable_lookup_compare_insert(struct rhashtable *ht, - struct rhash_head *obj, - bool (*compare)(void *, void *), - void *arg) -{ - BUG_ON(!ht->p.key_len); - - return __rhashtable_insert(ht, obj, compare, arg); -} -EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); - /** * rhashtable_walk_init - Initialise an iterator * @ht: Table to walk over -- cgit v1.3-6-gb490 From 6626af692692b52c8f9e20ad8201a3255e5ab25b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 18:18:45 -0400 Subject: rhashtable: Fix undeclared EEXIST build error on ia64 We need to include linux/errno.h in rhashtable.h since it doesn't always get included otherwise. Reported-by: kbuild test robot Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c3034de2c235..89d102270570 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -18,6 +18,7 @@ #define _LINUX_RHASHTABLE_H #include +#include #include #include #include -- cgit v1.3-6-gb490 From d3593b5cef76db45c864de23c599b58198879e8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Mar 2015 17:15:19 -0700 Subject: Revert "selinux: add a skb_owned_by() hook" This reverts commit ca10b9e9a8ca7342ee07065289cbe74ac128c169. No longer needed after commit eb8895debe1baba41fcb62c78a16f0c63c21662a ("tcp: tcp_make_synack() should use sock_wmalloc") When under SYNFLOOD, we build lot of SYNACK and hit false sharing because of multiple modifications done on sk_listener->sk_wmem_alloc Since tcp_make_synack() uses sock_wmalloc(), there is no need to call skb_set_owner_w() again, as this adds two atomic operations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/security.h | 8 -------- net/ipv4/tcp_output.c | 1 - security/capability.c | 6 ------ security/security.c | 5 ----- security/selinux/hooks.c | 7 ------- 5 files changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index a1b7dbd127ff..25a079a7c3b3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1716,7 +1716,6 @@ struct security_operations { int (*tun_dev_attach_queue) (void *security); int (*tun_dev_attach) (struct sock *sk, void *security); int (*tun_dev_open) (void *security); - void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2735,8 +2734,6 @@ int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); -void security_skb_owned_by(struct sk_buff *skb, struct sock *sk); - #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, @@ -2928,11 +2925,6 @@ static inline int security_tun_dev_open(void *security) { return 0; } - -static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) -{ -} - #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c2f0f6065cb1..18474088c3d0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2926,7 +2926,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); - security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) diff --git a/security/capability.c b/security/capability.c index 070dd46f62f4..58a1600c149b 100644 --- a/security/capability.c +++ b/security/capability.c @@ -776,11 +776,6 @@ static int cap_tun_dev_open(void *security) { return 0; } - -static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk) -{ -} - #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1134,7 +1129,6 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, tun_dev_open); set_to_cap_if_null(ops, tun_dev_attach_queue); set_to_cap_if_null(ops, tun_dev_attach); - set_to_cap_if_null(ops, skb_owned_by); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_cap_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/security.c b/security/security.c index e81d5bbe7363..1f475aa53288 100644 --- a/security/security.c +++ b/security/security.c @@ -1359,11 +1359,6 @@ int security_tun_dev_open(void *security) } EXPORT_SYMBOL(security_tun_dev_open); -void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) -{ - security_ops->skb_owned_by(skb, sk); -} - #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4d1a54190388..edc66de39f2e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -51,7 +51,6 @@ #include #include #include /* for local_port_range[] */ -#include #include /* struct or_callable used in sock_rcv_skb */ #include #include @@ -4652,11 +4651,6 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); } -static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) -{ - skb_set_owner_w(skb, sk); -} - static int selinux_secmark_relabel_packet(u32 sid) { const struct task_security_struct *__tsec; @@ -6041,7 +6035,6 @@ static struct security_operations selinux_ops = { .tun_dev_attach_queue = selinux_tun_dev_attach_queue, .tun_dev_attach = selinux_tun_dev_attach, .tun_dev_open = selinux_tun_dev_open, - .skb_owned_by = selinux_skb_owned_by, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, -- cgit v1.3-6-gb490 From 8d0451638ad3f7ccd5250c1dd90e06ad487b2703 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2015 20:55:31 +0100 Subject: netfilter: bridge: kill nf_bridge_pad The br_netfilter frag output function calls skb_cow_head() so in case it needs a larger headroom to e.g. re-add a previously stripped PPPOE or VLAN header things will still work (at cost of reallocation). We can then move nf_bridge_encap_header_len to br_netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 22 ---------------------- net/bridge/br_netfilter.c | 12 ++++++++++++ net/ipv4/ip_output.c | 5 +---- 3 files changed, 13 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ed0d3bf953c3..2734977199ca 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -23,18 +23,6 @@ enum nf_br_hook_priorities { #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 -static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) -{ - switch (skb->protocol) { - case __cpu_to_be16(ETH_P_8021Q): - return VLAN_HLEN; - case __cpu_to_be16(ETH_P_PPP_SES): - return PPPOE_SES_HLEN; - default: - return 0; - } -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -44,15 +32,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) int br_handle_frame_finish(struct sk_buff *skb); -/* This is called by the IP fragmenting code and it ensures there is - * enough room for the encapsulating header (if there is one). */ -static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) -{ - if (skb->nf_bridge) - return nf_bridge_encap_header_len(skb); - return 0; -} - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -62,7 +41,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bd2d24d1ff21..f3884a1b942f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -153,6 +153,18 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) return nf_bridge; } +static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __cpu_to_be16(ETH_P_8021Q): + return VLAN_HLEN; + case __cpu_to_be16(ETH_P_PPP_SES): + return PPPOE_SES_HLEN; + default: + return 0; + } +} + static inline void nf_bridge_push_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a7aea2048a0d..90b49e88e84a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -636,10 +636,7 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ - /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header - */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); + ll_rs = LL_RESERVED_SPACE(rt->dst.dev); /* * Fragment the datagram. -- cgit v1.3-6-gb490 From 2b290bbb60847c0897c047b5214192810de529df Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 04:48:20 +0100 Subject: can: use sock_efree instead of own destructor It is identical to the can destructor. Signed-off-by: Florian Westphal Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index cc00d15c6107..b6a52a4b457a 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -44,16 +44,11 @@ static inline void can_skb_reserve(struct sk_buff *skb) skb_reserve(skb, sizeof(struct can_skb_priv)); } -static inline void can_skb_destructor(struct sk_buff *skb) -{ - sock_put(skb->sk); -} - static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { if (sk) { sock_hold(sk); - skb->destructor = can_skb_destructor; + skb->destructor = sock_efree; skb->sk = sk; } } -- cgit v1.3-6-gb490 From c54eb70e3bb8cd0d7b8564bedab63e834656c567 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Mon, 16 Mar 2015 09:38:13 +0100 Subject: can: add combined rx/tx LED trigger support Add -rxtx trigger, that will be activated both for tx as rx events. This trigger mimics "activity" LED for Ethernet devices. Signed-off-by: Yegor Yefremov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/led.c | 22 +++++++++++++++++++--- include/linux/can/dev.h | 2 ++ include/linux/can/led.h | 6 ++++-- 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c index ab7f1b01be49..c1b667675fa1 100644 --- a/drivers/net/can/led.c +++ b/drivers/net/can/led.c @@ -30,20 +30,28 @@ void can_led_event(struct net_device *netdev, enum can_led_event event) case CAN_LED_EVENT_OPEN: led_trigger_event(priv->tx_led_trig, LED_FULL); led_trigger_event(priv->rx_led_trig, LED_FULL); + led_trigger_event(priv->rxtx_led_trig, LED_FULL); break; case CAN_LED_EVENT_STOP: led_trigger_event(priv->tx_led_trig, LED_OFF); led_trigger_event(priv->rx_led_trig, LED_OFF); + led_trigger_event(priv->rxtx_led_trig, LED_OFF); break; case CAN_LED_EVENT_TX: - if (led_delay) + if (led_delay) { led_trigger_blink_oneshot(priv->tx_led_trig, &led_delay, &led_delay, 1); + led_trigger_blink_oneshot(priv->rxtx_led_trig, + &led_delay, &led_delay, 1); + } break; case CAN_LED_EVENT_RX: - if (led_delay) + if (led_delay) { led_trigger_blink_oneshot(priv->rx_led_trig, &led_delay, &led_delay, 1); + led_trigger_blink_oneshot(priv->rxtx_led_trig, + &led_delay, &led_delay, 1); + } break; } } @@ -55,6 +63,7 @@ static void can_led_release(struct device *gendev, void *res) led_trigger_unregister_simple(priv->tx_led_trig); led_trigger_unregister_simple(priv->rx_led_trig); + led_trigger_unregister_simple(priv->rxtx_led_trig); } /* Register CAN LED triggers for a CAN device @@ -76,11 +85,15 @@ void devm_can_led_init(struct net_device *netdev) "%s-tx", netdev->name); snprintf(priv->rx_led_trig_name, sizeof(priv->rx_led_trig_name), "%s-rx", netdev->name); + snprintf(priv->rxtx_led_trig_name, sizeof(priv->rxtx_led_trig_name), + "%s-rxtx", netdev->name); led_trigger_register_simple(priv->tx_led_trig_name, &priv->tx_led_trig); led_trigger_register_simple(priv->rx_led_trig_name, &priv->rx_led_trig); + led_trigger_register_simple(priv->rxtx_led_trig_name, + &priv->rxtx_led_trig); devres_add(&netdev->dev, res); } @@ -97,7 +110,7 @@ static int can_led_notifier(struct notifier_block *nb, unsigned long msg, if (!priv) return NOTIFY_DONE; - if (!priv->tx_led_trig || !priv->rx_led_trig) + if (!priv->tx_led_trig || !priv->rx_led_trig || !priv->rxtx_led_trig) return NOTIFY_DONE; if (msg == NETDEV_CHANGENAME) { @@ -106,6 +119,9 @@ static int can_led_notifier(struct notifier_block *nb, unsigned long msg, snprintf(name, sizeof(name), "%s-rx", netdev->name); led_trigger_rename_static(name, priv->rx_led_trig); + + snprintf(name, sizeof(name), "%s-rxtx", netdev->name); + led_trigger_rename_static(name, priv->rxtx_led_trig); } return NOTIFY_DONE; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c05ff0f9f9a5..c3a9c8fc60fa 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -61,6 +61,8 @@ struct can_priv { char tx_led_trig_name[CAN_LED_NAME_SZ]; struct led_trigger *rx_led_trig; char rx_led_trig_name[CAN_LED_NAME_SZ]; + struct led_trigger *rxtx_led_trig; + char rxtx_led_trig_name[CAN_LED_NAME_SZ]; #endif }; diff --git a/include/linux/can/led.h b/include/linux/can/led.h index e0475c5cbb92..146de4506d21 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -21,8 +21,10 @@ enum can_led_event { #ifdef CONFIG_CAN_LEDS -/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */ -#define CAN_LED_NAME_SZ (IFNAMSIZ + 4) +/* keep space for interface name + "-tx"/"-rx"/"-rxtx" + * suffix and null terminator + */ +#define CAN_LED_NAME_SZ (IFNAMSIZ + 6) void can_led_event(struct net_device *netdev, enum can_led_event event); void devm_can_led_init(struct net_device *netdev); -- cgit v1.3-6-gb490 From 0345f93138b2224e0d7ce91fcffdb3dd23f364d7 Mon Sep 17 00:00:00 2001 From: "tadeusz.struk@intel.com" Date: Thu, 19 Mar 2015 12:31:25 -0700 Subject: net: socket: add support for async operations Add support for async operations. Signed-off-by: Tadeusz Struk Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + net/compat.c | 2 ++ net/socket.c | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index fab4d0ddf4ed..c9852ef7e317 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -51,6 +51,7 @@ struct msghdr { void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ + struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; struct user_msghdr { diff --git a/net/compat.c b/net/compat.c index 13c0c9a25cd9..c4b6b0f43d5d 100644 --- a/net/compat.c +++ b/net/compat.c @@ -79,6 +79,8 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; + kmsg->msg_iocb = NULL; + err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE, compat_ptr(uiov), nr_segs, UIO_FASTIOV, *iov, iov); diff --git a/net/socket.c b/net/socket.c index 3e776776f42c..073809f4125f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -798,7 +798,8 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *to}; + struct msghdr msg = {.msg_iter = *to, + .msg_iocb = iocb}; ssize_t res; if (file->f_flags & O_NONBLOCK) @@ -819,7 +820,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *from}; + struct msghdr msg = {.msg_iter = *from, + .msg_iocb = iocb}; ssize_t res; if (iocb->ki_pos != 0) @@ -1894,6 +1896,8 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; + kmsg->msg_iocb = NULL; + err = rw_copy_check_uvector(save_addr ? READ : WRITE, uiov, nr_segs, UIO_FASTIOV, *iov, iov); -- cgit v1.3-6-gb490 From 42cb80a2353f42913ae78074ffa1f1b4a49e5436 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:19 -0700 Subject: inet: remove sk_listener parameter from syn_ack_timeout() It is not needed, and req->sk_listener points to the listener anyway. request_sock argument can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- include/net/request_sock.h | 3 +-- include/net/tcp.h | 2 +- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp_timer.c | 8 +++++--- 6 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 439ff698000a..3dca24d3ac67 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -317,6 +317,6 @@ static inline const char *dccp_role(const struct sock *sk) return NULL; } -extern void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req); +extern void dccp_syn_ack_timeout(const struct request_sock *req); #endif /* _LINUX_DCCP_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 6a91261d9b7b..8603c350fad0 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -39,8 +39,7 @@ struct request_sock_ops { void (*send_reset)(struct sock *sk, struct sk_buff *skb); void (*destructor)(struct request_sock *req); - void (*syn_ack_timeout)(struct sock *sk, - struct request_sock *req); + void (*syn_ack_timeout)(const struct request_sock *req); }; int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); diff --git a/include/net/tcp.h b/include/net/tcp.h index 082fd79132b7..1876262afd59 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -433,7 +433,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); -void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); +void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); void tcp_parse_options(const struct sk_buff *skb, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 25a9615b3b88..1f7161e05403 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -576,7 +576,7 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +void dccp_syn_ack_timeout(const struct request_sock *req) { } EXPORT_SYMBOL(dccp_syn_ack_timeout); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7d011e825c48..a12b973164d0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -615,7 +615,7 @@ static void reqsk_timer_handler(unsigned long data) max_retries = defer_accept; syn_ack_recalc(req, thresh, max_retries, defer_accept, &expire, &resend); - req->rsk_ops->syn_ack_timeout(sk_listener, req); + req->rsk_ops->syn_ack_timeout(req); if (!expire && (!resend || !inet_rtx_syn_ack(sk_listener, req) || diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3daa6b5d766d..2568fd282873 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -327,7 +327,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; - req->rsk_ops->syn_ack_timeout(sk, req); + req->rsk_ops->syn_ack_timeout(req); if (req->num_timeout >= max_retries) { tcp_write_err(sk); @@ -539,9 +539,11 @@ static void tcp_write_timer(unsigned long data) sock_put(sk); } -void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +void tcp_syn_ack_timeout(const struct request_sock *req) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); + struct net *net = read_pnet(&inet_rsk(req)->ireq_net); + + NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_SYMBOL(tcp_syn_ack_timeout); -- cgit v1.3-6-gb490 From 85645bab57bfc6b0b43bb96a301c4ef83925c07d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:24 -0700 Subject: ipv4: dccp: handle ICMP messages on DCCP_NEW_SYN_RECV request sockets dccp_v4_err() can restrict lookups to ehash table, and not to listeners. Note this patch creates the infrastructure, but this means that ICMP messages for request sockets are ignored until complete conversion. New dccp_req_err() helper is exported so that we can use it in IPv6 in following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 ++ net/dccp/dccp.h | 1 + net/dccp/ipv4.c | 70 +++++++++++++++++++++++++--------------------------- 3 files changed, 37 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3dca24d3ac67..221025423e6c 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -43,6 +43,7 @@ enum dccp_state { DCCP_CLOSING = TCP_CLOSING, DCCP_TIME_WAIT = TCP_TIME_WAIT, DCCP_CLOSED = TCP_CLOSE, + DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, DCCP_PARTOPEN = TCP_MAX_STATES, DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ DCCP_MAX_STATES @@ -57,6 +58,7 @@ enum { DCCPF_CLOSING = TCPF_CLOSING, DCCPF_TIME_WAIT = TCPF_TIME_WAIT, DCCPF_CLOSED = TCPF_CLOSE, + DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), }; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 2396f50c5b04..bebc735f5afc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -317,6 +317,7 @@ int inet_dccp_listen(struct socket *sock, int backlog); unsigned int dccp_poll(struct file *file, struct socket *sock, poll_table *wait); int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +void dccp_req_err(struct sock *sk, u64 seq); struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb); int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 1f7161e05403..6310b8b19598 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -195,6 +195,32 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) dst->ops->redirect(dst, sk, skb); } +void dccp_req_err(struct sock *sk, u64 seq) + { + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); + + /* + * ICMPs are not backlogged, hence we cannot get an established + * socket here. + */ + WARN_ON(req->sk); + + if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + } else { + /* + * Still in RESPOND, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(req->rsk_listener, req); + } + reqsk_put(req); +} +EXPORT_SYMBOL(dccp_req_err); + /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -227,10 +253,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(net, &dccp_hashinfo, - iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); - if (sk == NULL) { + sk = __inet_lookup_established(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, + iph->saddr, ntohs(dh->dccph_sport), + inet_iif(skb)); + if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -239,6 +266,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) inet_twsk_put(inet_twsk(sk)); return; } + seq = dccp_hdr_seq(dh); + if (sk->sk_state == DCCP_NEW_SYN_RECV) + return dccp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -251,7 +281,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); @@ -288,37 +317,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req; - case DCCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - req = inet_csk_search_req(sk, dh->dccph_dport, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* - * ICMPs are not backlogged, hence we cannot get an established - * socket here. - */ - WARN_ON(req->sk); - - if (!between48(seq, dccp_rsk(req)->dreq_iss, - dccp_rsk(req)->dreq_gss)) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - reqsk_put(req); - goto out; - } - /* - * Still in RESPOND, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req); - reqsk_put(req); - goto out; - case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { -- cgit v1.3-6-gb490 From de91b25c8011089f5dd99b9d24743db1f550ca4b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:20 +1100 Subject: rhashtable: Eliminate unnecessary branch in rht_key_hashfn When rht_key_hashfn is called from rhashtable itself and params is equal to ht->p, there is no point in checking params.key_len and falling back to ht->p.key_len. For some reason gcc couldn't figure out that params is the same as ht->p. So let's help it by only checking params.key_len when it's a constant. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 89d102270570..3851952781d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -199,8 +199,12 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: - ht->p.key_len, + /* params must be equal to ht->p if it isn't constant. */ + unsigned key_len = __builtin_constant_p(params.key_len) ? + (params.key_len ?: ht->p.key_len) : + params.key_len; + + return rht_bucket_index(tbl, params.hashfn(key, key_len, tbl->hash_rnd)); } -- cgit v1.3-6-gb490 From 31ccde2dacea8375c3a7d6fffbf0060ee0d40214 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:21 +1100 Subject: rhashtable: Allow hashfn to be unset Since every current rhashtable user uses jhash as their hash function, the fact that jhash is an inline function causes each user to generate a copy of its code. This function provides a solution to this problem by allowing hashfn to be unset. In which case rhashtable will automatically set it to jhash. Furthermore, if the key length is a multiple of 4, we will switch over to jhash2. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 33 +++++++++++++++++++++++++++------ lib/rhashtable.c | 17 ++++++++++++++++- 2 files changed, 43 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3851952781d7..bc2488b98321 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @hashfn: Function to hash key + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ @@ -125,6 +126,7 @@ struct rhashtable_params { * struct rhashtable - Hash table handle * @tbl: Bucket table * @nelems: Number of elements in table + * @key_len: Key length for hashfn * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -134,6 +136,7 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; bool being_destroyed; + unsigned int key_len; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -199,13 +202,31 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { + unsigned hash; + /* params must be equal to ht->p if it isn't constant. */ - unsigned key_len = __builtin_constant_p(params.key_len) ? - (params.key_len ?: ht->p.key_len) : - params.key_len; + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + else if (params.key_len) { + unsigned key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, tbl->hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), + tbl->hash_rnd); + } else { + unsigned key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else + hash = jhash(key, key_len, tbl->hash_rnd); + } - return rht_bucket_index(tbl, params.hashfn(key, key_len, - tbl->hash_rnd)); + return rht_bucket_index(tbl, hash); } static inline unsigned int rht_head_hashfn( diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 618a3f00d712..798f01d64ab0 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -532,6 +532,11 @@ static size_t rounded_hashtable_size(const struct rhashtable_params *params) (unsigned long)params->min_size); } +static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) +{ + return jhash2(key, length, seed); +} + /** * rhashtable_init - initialize a new hash table * @ht: hash table to be initialized @@ -583,7 +588,7 @@ int rhashtable_init(struct rhashtable *ht, size = HASH_DEFAULT_SIZE; - if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) || + if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -610,6 +615,16 @@ int rhashtable_init(struct rhashtable *ht, else ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; + ht->key_len = ht->p.key_len; + if (!params->hashfn) { + ht->p.hashfn = jhash; + + if (!(ht->key_len & (sizeof(u32) - 1))) { + ht->key_len /= sizeof(u32); + ht->p.hashfn = rhashtable_jhash2; + } + } + tbl = bucket_table_alloc(ht, size); if (tbl == NULL) return -ENOMEM; -- cgit v1.3-6-gb490 From b824478b2145be78ac19e1cf44e2b9036c7a9608 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:26 +1100 Subject: rhashtable: Add multiple rehash support This patch adds the missing bits to allow multiple rehashes. The read-side as well as remove already handle this correctly. So it's only the rehasher and insertion that need modification to handle this. Note that this patch doesn't actually enable it so for now rehashing is still only performed by the worker thread. This patch also disables the explicit expand/shrink interface because the table is meant to expand and shrink automatically, and continuing to export these interfaces unnecessarily complicates the life of the rehasher since the rehash process is now composed of two parts. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 26 +++++++------- lib/rhashtable.c | 87 ++++++++++++++++++++++++++++++++++++++-------- lib/test_rhashtable.c | 24 ------------- 3 files changed, 86 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index bc2488b98321..e8ffcdb5e239 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -308,9 +308,6 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -int rhashtable_expand(struct rhashtable *ht); -int rhashtable_shrink(struct rhashtable *ht); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); @@ -541,17 +538,22 @@ static inline int __rhashtable_insert_fast( rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); - /* Because we have already taken the bucket lock in tbl, - * if we find that future_tbl is not yet visible then - * that guarantees all other insertions of the same entry - * will also grab the bucket lock in tbl because until - * the rehash completes ht->tbl won't be changed. + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9623be345d9c..5e04403e25f5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -136,11 +136,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return tbl; } +static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, + struct bucket_table *tbl) +{ + struct bucket_table *new_tbl; + + do { + new_tbl = tbl; + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } while (tbl); + + return new_tbl; +} + static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct bucket_table *new_tbl = - rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl; + struct bucket_table *new_tbl = rhashtable_last_table(ht, + rht_dereference_rcu(old_tbl->future_tbl, ht)); struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; int err = -ENOENT; struct rhash_head *head, *next, *entry; @@ -196,12 +209,18 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) spin_unlock_bh(old_bucket_lock); } -static void rhashtable_rehash(struct rhashtable *ht, - struct bucket_table *new_tbl) +static int rhashtable_rehash_attach(struct rhashtable *ht, + struct bucket_table *old_tbl, + struct bucket_table *new_tbl) { - struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct rhashtable_walker *walker; - unsigned old_hash; + /* Protect future_tbl using the first bucket lock. */ + spin_lock_bh(old_tbl->locks); + + /* Did somebody beat us to it? */ + if (rcu_access_pointer(old_tbl->future_tbl)) { + spin_unlock_bh(old_tbl->locks); + return -EEXIST; + } /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. @@ -211,6 +230,22 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Ensure the new table is visible to readers. */ smp_wmb(); + spin_unlock_bh(old_tbl->locks); + + return 0; +} + +static int rhashtable_rehash_table(struct rhashtable *ht) +{ + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; + struct rhashtable_walker *walker; + unsigned old_hash; + + new_tbl = rht_dereference(old_tbl->future_tbl, ht); + if (!new_tbl) + return 0; + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) rhashtable_rehash_chain(ht, old_hash); @@ -225,6 +260,8 @@ static void rhashtable_rehash(struct rhashtable *ht, * remain. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + + return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } /** @@ -242,20 +279,25 @@ static void rhashtable_rehash(struct rhashtable *ht, * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_expand(struct rhashtable *ht) +static int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + int err; ASSERT_RHT_MUTEX(ht); + old_tbl = rhashtable_last_table(ht, old_tbl); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); if (new_tbl == NULL) return -ENOMEM; - rhashtable_rehash(ht, new_tbl); - return 0; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_expand); /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups @@ -273,10 +315,11 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_shrink(struct rhashtable *ht) +static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + int err; ASSERT_RHT_MUTEX(ht); @@ -286,19 +329,25 @@ int rhashtable_shrink(struct rhashtable *ht) if (old_tbl->size <= size) return 0; + if (rht_dereference(old_tbl->future_tbl, ht)) + return -EEXIST; + new_tbl = bucket_table_alloc(ht, size); if (new_tbl == NULL) return -ENOMEM; - rhashtable_rehash(ht, new_tbl); - return 0; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_shrink); static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; + int err = 0; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); @@ -306,13 +355,20 @@ static void rht_deferred_worker(struct work_struct *work) goto unlock; tbl = rht_dereference(ht->tbl, ht); + tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); else if (rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); + + err = rhashtable_rehash_table(ht); + unlock: mutex_unlock(&ht->mutex); + + if (err) + schedule_work(&ht->run_work); } int rhashtable_insert_slow(struct rhashtable *ht, const void *key, @@ -323,6 +379,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, unsigned hash; int err = -EEXIST; + tbl = rhashtable_last_table(ht, tbl); hash = head_hashfn(ht, tbl, obj); spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index a2ba6adb60a2..a42a0d44e818 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -155,30 +155,6 @@ static int __init test_rhashtable(struct rhashtable *ht) test_rht_lookup(ht); rcu_read_unlock(); - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table expansion iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_expand(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table shrinkage iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_shrink(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - rcu_read_lock(); test_bucket_stats(ht, true); rcu_read_unlock(); -- cgit v1.3-6-gb490 From ccd57b1bd32460d27bbb9c599e795628a3c66983 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:28 +1100 Subject: rhashtable: Add immediate rehash during insertion This patch reintroduces immediate rehash during insertion. If we find during insertion that the table is full or the chain length exceeds a set limit (currently 16 but may be disabled with insecure_elasticity) then we will force an immediate rehash. The rehash will contain an expansion if the table utilisation exceeds 75%. If this rehash fails then the insertion will fail. Otherwise the insertion will be reattempted in the new hash table. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 42 ++++++++++++++++++++++++++++---- lib/rhashtable.c | 60 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e8ffcdb5e239..f9ecf32bce55 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -103,6 +103,7 @@ struct rhashtable; * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker + * @insecure_elasticity: Set to true to disable chain length checks * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -116,6 +117,7 @@ struct rhashtable_params { unsigned int max_size; unsigned int min_size; u32 nulls_base; + bool insecure_elasticity; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -127,6 +129,7 @@ struct rhashtable_params { * @tbl: Bucket table * @nelems: Number of elements in table * @key_len: Key length for hashfn + * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -137,6 +140,7 @@ struct rhashtable { atomic_t nelems; bool being_destroyed; unsigned int key_len; + unsigned int elasticity; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -266,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, tbl->size > ht->p.min_size; } +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -307,6 +322,7 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); +int rhashtable_insert_rehash(struct rhashtable *ht); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); @@ -529,12 +545,14 @@ static inline int __rhashtable_insert_fast( .ht = ht, .key = key, }; - int err = -EEXIST; struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; + unsigned elasticity; unsigned hash; + int err; +restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); @@ -557,20 +575,34 @@ static inline int __rhashtable_insert_fast( new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); + if (err == -EAGAIN) + goto slow_path; goto out; } - if (!key) - goto skip_lookup; + if (unlikely(rht_grow_above_100(ht, tbl))) { +slow_path: + spin_unlock_bh(lock); + rcu_read_unlock(); + err = rhashtable_insert_rehash(ht); + if (err) + return err; + + goto restart; + } + err = -EEXIST; + elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { - if (unlikely(!(params.obj_cmpfn ? + if (key && + unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head))))) goto out; + if (!--elasticity) + goto slow_path; } -skip_lookup: err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 220a11a13d40..7686c1e9934a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -375,21 +375,76 @@ unlock: schedule_work(&ht->run_work); } +static bool rhashtable_check_elasticity(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned hash) +{ + unsigned elasticity = ht->elasticity; + struct rhash_head *head; + + rht_for_each(head, tbl, hash) + if (!--elasticity) + return true; + + return false; +} + +int rhashtable_insert_rehash(struct rhashtable *ht) +{ + struct bucket_table *old_tbl; + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int size; + int err; + + old_tbl = rht_dereference_rcu(ht->tbl, ht); + tbl = rhashtable_last_table(ht, old_tbl); + + size = tbl->size; + + if (rht_grow_above_75(ht, tbl)) + size *= 2; + /* More than two rehashes (not resizes) detected. */ + else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + return -EBUSY; + + new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); + if (new_tbl == NULL) + return -ENOMEM; + + err = rhashtable_rehash_attach(ht, tbl, new_tbl); + if (err) { + bucket_table_free(new_tbl); + if (err == -EEXIST) + err = 0; + } else + schedule_work(&ht->run_work); + + return err; +} +EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); + int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *tbl) { struct rhash_head *head; unsigned hash; - int err = -EEXIST; + int err; tbl = rhashtable_last_table(ht, tbl); hash = head_hashfn(ht, tbl, obj); spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + err = -EEXIST; if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -EAGAIN; + if (rhashtable_check_elasticity(ht, tbl, hash) || + rht_grow_above_100(ht, tbl)) + goto exit; + err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); @@ -678,6 +733,9 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + if (!params->insecure_elasticity) + ht->elasticity = 16; + if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); else -- cgit v1.3-6-gb490 From 3d1bec99320d4e96897805440f8cf4f68eff226b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 23:36:00 +0100 Subject: ipv6: introduce secret_stable to ipv6_devconf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements the procfs logic for the stable_address knob: The secret is formatted as an ipv6 address and will be stored per interface and per namespace. We track initialized flag and return EIO errors until the secret is set. We don't inherit the secret to newly created namespaces. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 +++ include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4d5169f5d7d1..82806c60aa42 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -53,6 +53,10 @@ struct ipv6_devconf { __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; + struct ipv6_stable_secret { + bool initialized; + struct in6_addr secret; + } stable_secret; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 437a6a4b125a..5efa54ae567c 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -170,6 +170,7 @@ enum { DEVCONF_ACCEPT_RA_FROM_LOCAL, DEVCONF_USE_OPTIMISTIC, DEVCONF_ACCEPT_RA_MTU, + DEVCONF_STABLE_SECRET, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 158378e73f0a..5b967c8a617a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,9 @@ #define INFINITY_LIFE_TIME 0xFFFFFFFF +#define IPV6_MAX_STRLEN \ + sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -202,6 +206,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + } }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -240,6 +247,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + }, }; /* Check if a valid qdisc is available */ @@ -4430,6 +4440,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + /* we omit DEVCONF_STABLE_SECRET for now */ } static inline size_t inet6_ifla6_size(void) @@ -5074,6 +5085,53 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int err; + struct in6_addr addr; + char str[IPV6_MAX_STRLEN]; + struct ctl_table lctl = *ctl; + struct ipv6_stable_secret *secret = ctl->data; + + lctl.maxlen = IPV6_MAX_STRLEN; + lctl.data = str; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (!write && !secret->initialized) { + err = -EIO; + goto out; + } + + if (!write) { + err = snprintf(str, sizeof(str), "%pI6", + &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; + } + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); + if (err || !write) + goto out; + + if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) { + err = -EIO; + goto out; + } + + secret->initialized = true; + secret->secret = addr; + +out: + rtnl_unlock(); + + return err; +} static struct addrconf_sysctl_table { @@ -5346,6 +5404,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, { /* sentinel */ } @@ -5442,6 +5507,9 @@ static int __net_init addrconf_init_net(struct net *net) dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; + dflt->stable_secret.initialized = false; + all->stable_secret.initialized = false; + net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; -- cgit v1.3-6-gb490 From ba7c95ea3870fe7b847466d39a049ab6f156aa2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 09:53:17 +1100 Subject: rhashtable: Fix sleeping inside RCU critical section in walk_stop The commit 963ecbd41a1026d99ec7537c050867428c397b89 ("rhashtable: Fix use-after-free in rhashtable_walk_stop") fixed a real bug but created another one because we may end up sleeping inside an RCU critical section. This patch fixes it properly by replacing the mutex with a spin lock that specifically protects the walker lists. Reported-by: Sasha Levin Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ lib/rhashtable.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f9ecf32bce55..d7be9cb0e91f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -133,6 +133,7 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -144,6 +145,7 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; + spinlock_t lock; }; /** diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 7686c1e9934a..e96ad1a52c90 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -256,8 +256,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht) /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); + spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; + spin_unlock(&ht->lock); /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will @@ -635,12 +637,12 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; - mutex_lock(&ht->mutex); + spin_lock(&ht->lock); if (tbl->rehash < tbl->size) list_add(&iter->walker->list, &tbl->walkers); else iter->walker->tbl = NULL; - mutex_unlock(&ht->mutex); + spin_unlock(&ht->lock); iter->p = NULL; @@ -723,6 +725,7 @@ int rhashtable_init(struct rhashtable *ht, memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); + spin_lock_init(&ht->lock); memcpy(&ht->p, params, sizeof(*params)); if (params->min_size) -- cgit v1.3-6-gb490 From 0117ec1970c5fa9c566045e7df8db76acc8f150e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 18:40:02 +0100 Subject: net: remove never used forwarding_accel_ops pointer from net_device Cc: John Fastabend Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ae69e7df867..08c4ab37189f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1342,7 +1342,6 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations - * @fwd_ops: Management operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1551,7 +1550,6 @@ struct net_device { #endif const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; - const struct forwarding_accel_ops *fwd_ops; #ifdef CONFIG_NET_SWITCHDEV const struct swdev_ops *swdev_ops; #endif -- cgit v1.3-6-gb490 From 27cd5452476978283decb19e429e81fc6c71e74b Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Tue, 24 Mar 2015 14:48:41 +0100 Subject: filter: introduce SKF_AD_VLAN_TPID BPF extension If vlan offloading takes place then vlan header is removed from frame and its contents, both vlan_tci and vlan_proto, is available to user space via TPACKET interface. However, only vlan_tci can be used in BPF filters. This commit introduces a new BPF extension. It makes possible to load the value of vlan_proto (vlan TPID) to register A. Support for classic BPF and eBPF is being added, analogous to skb->protocol. Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Jiri Pirko Signed-off-by: Michal Sekletar Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 3 ++- include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 17 +++++++++++++++++ tools/net/bpf_exp.l | 2 ++ tools/net/bpf_exp.y | 11 ++++++++++- 7 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 9930ecfbb465..135581f015e1 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -280,7 +280,8 @@ Possible BPF extensions are shown in the following table: rxhash skb->hash cpu raw_smp_processor_id() vlan_tci skb_vlan_tag_get(skb) - vlan_pr skb_vlan_tag_present(skb) + vlan_avail skb_vlan_tag_present(skb) + vlan_tpid skb->vlan_proto rand prandom_u32() These extensions can also be prefixed with '#'. diff --git a/include/linux/filter.h b/include/linux/filter.h index 9ee8c67ea249..fa11b3a367be 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,6 +454,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest) BPF_ANCILLARY(VLAN_TAG_PRESENT); BPF_ANCILLARY(PAY_OFFSET); BPF_ANCILLARY(RANDOM); + BPF_ANCILLARY(VLAN_TPID); } /* Fallthrough. */ default: diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3dd314a45d0d..27dc4ec58840 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -182,6 +182,7 @@ struct __sk_buff { __u32 protocol; __u32 vlan_present; __u32 vlan_tci; + __u32 vlan_proto; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 47785d5ecf17..34c7936ca114 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -77,7 +77,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 #define SKF_AD_RANDOM 56 -#define SKF_AD_MAX 60 +#define SKF_AD_VLAN_TPID 60 +#define SKF_AD_MAX 64 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index 084eacc4d1d4..32f43c59908c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -272,6 +272,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp, insn += cnt - 1; break; + case SKF_AD_OFF + SKF_AD_VLAN_TPID: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + + /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_proto)); + /* A = ntohs(A) [emitting a nop or swap16] */ + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); + break; + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: @@ -1226,6 +1236,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, offsetof(struct sk_buff, protocol)); break; + case offsetof(struct __sk_buff, vlan_proto): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_proto)); + break; + case offsetof(struct __sk_buff, mark): return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 833a96611da6..c83af3fb77de 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -92,6 +92,8 @@ extern void yyerror(const char *str); "#"?("cpu") { return K_CPU; } "#"?("vlan_tci") { return K_VLANT; } "#"?("vlan_pr") { return K_VLANP; } +"#"?("vlan_avail") { return K_VLANP; } +"#"?("vlan_tpid") { return K_VLANTPID; } "#"?("rand") { return K_RAND; } ":" { return ':'; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e6306c51c26f..f8332749b44c 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_VLANTPID K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -167,6 +167,9 @@ ldb | OP_LDB K_RAND { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDB K_VLANTPID { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldh @@ -218,6 +221,9 @@ ldh | OP_LDH K_RAND { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDH K_VLANTPID { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldi @@ -274,6 +280,9 @@ ld | OP_LD K_RAND { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LD K_VLANTPID { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { -- cgit v1.3-6-gb490 From 58be8a583d8d316448bafa5926414cfb83c02dec Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:16 +0100 Subject: rhashtable: Extend RCU read lock into rhashtable_insert_rehash() rhashtable_insert_rehash() requires RCU locks to be held in order to access ht->tbl and traverse to the last table. Fixes: ccd57b1bd324 ("rhashtable: Add immediate rehash during insertion") Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d7be9cb0e91f..5976ab59b88f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -585,8 +585,8 @@ restart: if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); - rcu_read_unlock(); err = rhashtable_insert_rehash(ht); + rcu_read_unlock(); if (err) return err; -- cgit v1.3-6-gb490 From 299e5c32a37a6bca8175db177117467bd1ce970a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:17 +0100 Subject: rhashtable: Use 'unsigned int' consistently Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 14 +++++++------- lib/rhashtable.c | 18 ++++++++++-------- 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5976ab59b88f..f89cda067cb9 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - unsigned hash; + unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); else if (params.key_len) { - unsigned key_len = params.key_len; + unsigned int key_len = params.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, tbl->hash_rnd); @@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn( hash = jhash2(key, key_len / sizeof(u32), tbl->hash_rnd); } else { - unsigned key_len = ht->p.key_len; + unsigned int key_len = ht->p.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, tbl->hash_rnd); @@ -512,7 +512,7 @@ static inline void *rhashtable_lookup_fast( }; const struct bucket_table *tbl; struct rhash_head *he; - unsigned hash; + unsigned int hash; rcu_read_lock(); @@ -550,8 +550,8 @@ static inline int __rhashtable_insert_fast( struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; - unsigned elasticity; - unsigned hash; + unsigned int elasticity; + unsigned int hash; int err; restart: @@ -718,7 +718,7 @@ static inline int __rhashtable_remove_fast( struct rhash_head __rcu **pprev; struct rhash_head *he; spinlock_t * lock; - unsigned hash; + unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 8514f7c5f029..50abe4fec4b8 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -153,7 +153,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, return new_tbl; } -static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) +static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, @@ -162,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) int err = -ENOENT; struct rhash_head *head, *next, *entry; spinlock_t *new_bucket_lock; - unsigned new_hash; + unsigned int new_hash; rht_for_each(entry, old_tbl, old_hash) { err = 0; @@ -199,7 +199,8 @@ out: return err; } -static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) +static void rhashtable_rehash_chain(struct rhashtable *ht, + unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); spinlock_t *old_bucket_lock; @@ -244,7 +245,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht) struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl; struct rhashtable_walker *walker; - unsigned old_hash; + unsigned int old_hash; new_tbl = rht_dereference(old_tbl->future_tbl, ht); if (!new_tbl) @@ -324,11 +325,12 @@ static int rhashtable_expand(struct rhashtable *ht) static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); - unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + unsigned int size; int err; ASSERT_RHT_MUTEX(ht); + size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); if (size < ht->p.min_size) size = ht->p.min_size; @@ -379,9 +381,9 @@ unlock: static bool rhashtable_check_elasticity(struct rhashtable *ht, struct bucket_table *tbl, - unsigned hash) + unsigned int hash) { - unsigned elasticity = ht->elasticity; + unsigned int elasticity = ht->elasticity; struct rhash_head *head; rht_for_each(head, tbl, hash) @@ -431,7 +433,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct bucket_table *tbl) { struct rhash_head *head; - unsigned hash; + unsigned int hash; int err; tbl = rhashtable_last_table(ht, tbl); -- cgit v1.3-6-gb490 From ac833bddb591b9c7a0609f440f196375be184044 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:18 +0100 Subject: rhashtable: Mark internal/private inline functions as such Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f89cda067cb9..0e1f975ad101 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -539,6 +539,7 @@ restart: return NULL; } +/* Internal function, please use rhashtable_insert_fast() instead */ static inline int __rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) @@ -711,6 +712,7 @@ static inline int rhashtable_lookup_insert_key( return __rhashtable_insert_fast(ht, key, obj, params); } +/* Internal function, please use rhashtable_remove_fast() instead */ static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params) -- cgit v1.3-6-gb490 From b5e2c150ac914f28a28833b57397bec0b0a2bd5f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 20:42:19 +0000 Subject: rhashtable: Disable automatic shrinking by default Introduce a new bool automatic_shrinking to require the user to explicitly opt-in to automatic shrinking of tables. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 7 +++++-- lib/rhashtable.c | 2 +- net/netfilter/nft_hash.c | 1 + net/netlink/af_netlink.c | 1 + net/tipc/socket.c | 1 + 5 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0e1f975ad101..ae26c494e230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -2,7 +2,7 @@ * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015 Herbert Xu - * Copyright (c) 2014 Thomas Graf + * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * * Code partially derived from nft_hash @@ -104,6 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @insecure_elasticity: Set to true to disable chain length checks + * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -118,6 +119,7 @@ struct rhashtable_params { unsigned int min_size; u32 nulls_base; bool insecure_elasticity; + bool automatic_shrinking; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -784,7 +786,8 @@ static inline int rhashtable_remove_fast( goto out; atomic_dec(&ht->nelems); - if (rht_shrink_below_30(ht, tbl)) + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); out: diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 50abe4fec4b8..50374d181148 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -367,7 +367,7 @@ static void rht_deferred_worker(struct work_struct *work) if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); - else if (rht_shrink_below_30(ht, tbl)) + else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); err = rhashtable_rehash_table(ht); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index ad3966976cf5..8577a37af18b 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -172,6 +172,7 @@ static const struct rhashtable_params nft_hash_params = { .head_offset = offsetof(struct nft_hash_elem, node), .key_offset = offsetof(struct nft_hash_elem, key), .hashfn = jhash, + .automatic_shrinking = true, }; static int nft_hash_init(const struct nft_set *set, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e2f7f28148e0..4caa809dbbe0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3142,6 +3142,7 @@ static const struct rhashtable_params netlink_rhashtable_params = { .obj_hashfn = netlink_hash, .obj_cmpfn = netlink_compare, .max_size = 65536, + .automatic_shrinking = true, }; static int __init netlink_proto_init(void) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 094710519477..ee90d74d7516 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2297,6 +2297,7 @@ static const struct rhashtable_params tsk_rht_params = { .key_len = sizeof(u32), /* portid */ .max_size = 1048576, .min_size = 256, + .automatic_shrinking = true, }; int tipc_sk_rht_init(struct net *net) -- cgit v1.3-6-gb490 From 6b6f302ceda7a052dab545d6c69abf5f0d4a6cab Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:20 +0100 Subject: rhashtable: Add rhashtable_free_and_destroy() rhashtable_destroy() variant which stops rehashes, iterates over the table and calls a callback to release resources. Avoids need for nft_hash to embed rhashtable internals and allows to get rid of the being_destroyed flag. It also saves a 2nd mutex lock upon destruction. Also fixes an RCU lockdep splash on nft set destruction due to calling rht_for_each_entry_safe() without holding bucket locks. Open code this loop as we need know that no mutations may occur in parallel. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- lib/rhashtable.c | 49 ++++++++++++++++++++++++++++++++++++---------- net/netfilter/nft_hash.c | 25 +++++++---------------- 3 files changed, 49 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae26c494e230..99f2e49a8a07 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -136,12 +136,10 @@ struct rhashtable_params { * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list - * @being_destroyed: True if table is set up for destruction */ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; - bool being_destroyed; unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; @@ -334,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg); void rhashtable_destroy(struct rhashtable *ht); #define rht_dereference(p, ht) \ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 50374d181148..4b7b7e672b93 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -359,8 +359,6 @@ static void rht_deferred_worker(struct work_struct *work) ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); - if (ht->being_destroyed) - goto unlock; tbl = rht_dereference(ht->tbl, ht); tbl = rhashtable_last_table(ht, tbl); @@ -372,7 +370,6 @@ static void rht_deferred_worker(struct work_struct *work) err = rhashtable_rehash_table(ht); -unlock: mutex_unlock(&ht->mutex); if (err) @@ -783,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht, EXPORT_SYMBOL_GPL(rhashtable_init); /** - * rhashtable_destroy - destroy hash table + * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn * - * Frees the bucket array. This function is not rcu safe, therefore the caller - * has to make sure that no resizing may happen by unpublishing the hashtable - * and waiting for the quiescent cycle before releasing the bucket array. + * Stops an eventual async resize. If defined, invokes free_fn for each + * element to releasal resources. Please note that RCU protected + * readers may still be accessing the elements. Releasing of resources + * must occur in a compatible manner. Then frees the bucket array. + * + * This function will eventually sleep to wait for an async resize + * to complete. The caller is responsible that no further write operations + * occurs in parallel. */ -void rhashtable_destroy(struct rhashtable *ht) +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg) { - ht->being_destroyed = true; + const struct bucket_table *tbl; + unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); - bucket_table_free(rht_dereference(ht->tbl, ht)); + tbl = rht_dereference(ht->tbl, ht); + if (free_fn) { + for (i = 0; i < tbl->size; i++) { + struct rhash_head *pos, *next; + + for (pos = rht_dereference(tbl->buckets[i], ht), + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL; + !rht_is_a_nulls(pos); + pos = next, + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL) + free_fn(rht_obj(ht, pos), arg); + } + } + + bucket_table_free(tbl); mutex_unlock(&ht->mutex); } +EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); + +void rhashtable_destroy(struct rhashtable *ht) +{ + return rhashtable_free_and_destroy(ht, NULL, NULL); +} EXPORT_SYMBOL_GPL(rhashtable_destroy); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 8577a37af18b..f9ce2195fd63 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -188,26 +188,15 @@ static int nft_hash_init(const struct nft_set *set, return rhashtable_init(priv, ¶ms); } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_free_element(void *ptr, void *arg) { - struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; - struct nft_hash_elem *he; - struct rhash_head *pos, *next; - unsigned int i; - - /* Stop an eventual async resizing */ - priv->being_destroyed = true; - mutex_lock(&priv->mutex); - - tbl = rht_dereference(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_safe(he, pos, next, tbl, i, node) - nft_hash_elem_destroy(set, he); - } - mutex_unlock(&priv->mutex); + nft_hash_elem_destroy((const struct nft_set *)arg, ptr); +} - rhashtable_destroy(priv); +static void nft_hash_destroy(const struct nft_set *set) +{ + rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element, + (void *)set); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, -- cgit v1.3-6-gb490 From 49f7b33e63fec9d16e7ee62ba8f8ab4159cbdc26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:45 +0000 Subject: rhashtable: provide len to obj_hashfn nftables sets will be converted to use so called setextensions, moving the key to a non-fixed position. To hash it, the obj_hashfn must be used, however it so far doesn't receive the length parameter. Pass the key length to obj_hashfn() and convert existing users. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/rhashtable.h | 6 ++++-- lib/rhashtable.c | 2 +- net/netlink/af_netlink.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99f2e49a8a07..e23d242d1230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -88,7 +88,7 @@ struct rhashtable_compare_arg { }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); @@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn( const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? - rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4b7b7e672b93..4898442b837f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -691,7 +691,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * struct rhash_head node; * }; * - * u32 my_hash_fn(const void *data, u32 seed) + * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4caa809dbbe0..19909d0786a2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3127,7 +3127,7 @@ static struct pernet_operations __net_initdata netlink_net_ops = { .exit = netlink_net_exit, }; -static inline u32 netlink_hash(const void *data, u32 seed) +static inline u32 netlink_hash(const void *data, u32 len, u32 seed) { const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; -- cgit v1.3-6-gb490 From 074c238177a75f5e79af3b2cb6a84e54823ef950 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Mar 2015 15:55:42 -0700 Subject: mm: numa: slow PTE scan rate if migration failures occur Dave Chinner reported the following on https://lkml.org/lkml/2015/3/1/226 Across the board the 4.0-rc1 numbers are much slower, and the degradation is far worse when using the large memory footprint configs. Perf points straight at the cause - this is from 4.0-rc1 on the "-o bhash=101073" config: - 56.07% 56.07% [kernel] [k] default_send_IPI_mask_sequence_phys - default_send_IPI_mask_sequence_phys - 99.99% physflat_send_IPI_mask - 99.37% native_send_call_func_ipi smp_call_function_many - native_flush_tlb_others - 99.85% flush_tlb_page ptep_clear_flush try_to_unmap_one rmap_walk try_to_unmap migrate_pages migrate_misplaced_page - handle_mm_fault - 99.73% __do_page_fault trace_do_page_fault do_async_page_fault + async_page_fault 0.63% native_send_call_func_single_ipi generic_exec_single smp_call_function_single This is showing excessive migration activity even though excessive migrations are meant to get throttled. Normally, the scan rate is tuned on a per-task basis depending on the locality of faults. However, if migrations fail for any reason then the PTE scanner may scan faster if the faults continue to be remote. This means there is higher system CPU overhead and fault trapping at exactly the time we know that migrations cannot happen. This patch tracks when migration failures occur and slows the PTE scanner. Signed-off-by: Mel Gorman Reported-by: Dave Chinner Tested-by: Dave Chinner Cc: Ingo Molnar Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 +++++---- kernel/sched/fair.c | 8 ++++++-- mm/huge_memory.c | 3 ++- mm/memory.c | 3 ++- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..a419b65770d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1625,11 +1625,11 @@ struct task_struct { /* * numa_faults_locality tracks if faults recorded during the last - * scan window were remote/local. The task scan period is adapted - * based on the locality of the faults with different weights - * depending on whether they were shared or private faults + * scan window were remote/local or failed to migrate. The task scan + * period is adapted based on the locality of the faults with different + * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[2]; + unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ @@ -1719,6 +1719,7 @@ struct task_struct { #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 #define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7ce18f3c097a..bcfe32088b37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p, /* * If there were no record hinting faults then either the task is * completely idle or all activity is areas that are not of interest - * to automatic numa balancing. Scan slower + * to automatic numa balancing. Related to that, if there were failed + * migration then it implies we are migrating too quickly or the local + * node is overloaded. In either case, scan slower */ - if (local + shared == 0) { + if (local + shared == 0 || p->numa_faults_locality[2]) { p->numa_scan_period = min(p->numa_scan_period_max, p->numa_scan_period << 1); @@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) if (migrated) p->numa_pages_migrated += pages; + if (flags & TNF_MIGRATE_FAIL) + p->numa_faults_locality[2] += pages; p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0a42d1521aa4..51b3e7c64622 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1350,7 +1350,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { flags |= TNF_MIGRATED; page_nid = target_nid; - } + } else + flags |= TNF_MIGRATE_FAIL; goto out; clear_pmdnuma: diff --git a/mm/memory.c b/mm/memory.c index d20e12da3a3c..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3103,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { page_nid = target_nid; flags |= TNF_MIGRATED; - } + } else + flags |= TNF_MIGRATE_FAIL; out: if (page_nid != -1) -- cgit v1.3-6-gb490 From 1e9e39f4a29857a396ac7b669d109f697f66695e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 26 Feb 2015 19:34:37 +0000 Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the usbnet core does not update the tx_packets statistic for drivers with FLAG_MULTI_PACKET and there is no hook in the TX completion path where they could do this. cdc_ncm and dependent drivers are bumping tx_packets stat on the transmit path while asix and sr9800 aren't updating it at all. Add a packet count in struct skb_data so these drivers can fill it in, initialise it to 1 for other drivers, and add the packet count to the tx_packets statistic on completion. Signed-off-by: Ben Hutchings Tested-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 2 ++ drivers/net/usb/cdc_ncm.c | 3 ++- drivers/net/usb/sr9800.c | 1 + drivers/net/usb/usbnet.c | 5 +++-- include/linux/usb/usbnet.h | 12 ++++++++++++ 5 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 5c55f11572ba..724a9b50df7a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes)); skb_put(skb, sizeof(padbytes)); } + + usbnet_set_skb_tx_stats(skb, 1); return skb; } diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 80a844e0ae03..70cbea551139 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1172,7 +1172,6 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; - dev->net->stats.tx_packets += ctx->tx_curr_frame_num; /* keep private stats: framing overhead and number of NTBs */ ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; @@ -1184,6 +1183,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) */ dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + usbnet_set_skb_tx_stats(skb_out, n); + return skb_out; exit_no_skb: diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index b94a0fbb8b3b..7650cdc8fe6b 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb, skb_put(skb, sizeof(padbytes)); } + usbnet_set_skb_tx_stats(skb, 1); return skb; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 449835f4331e..0f3ff285f6a1 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1188,8 +1188,7 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - if (!(dev->driver_info->flags & FLAG_MULTI_PACKET)) - dev->net->stats.tx_packets++; + dev->net->stats.tx_packets += entry->packets; dev->net->stats.tx_bytes += entry->length; } else { dev->net->stats.tx_errors++; @@ -1348,6 +1347,8 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, urb->transfer_flags |= URB_ZERO_PACKET; } entry->length = urb->transfer_buffer_length = length; + if (!(info->flags & FLAG_MULTI_PACKET)) + usbnet_set_skb_tx_stats(skb, 1); spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d9a4905e01d0..ff3fb2bd0e90 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -228,8 +228,20 @@ struct skb_data { /* skb->cb is one of these */ struct usbnet *dev; enum skb_state state; size_t length; + unsigned long packets; }; +/* Drivers that set FLAG_MULTI_PACKET must call this in their + * tx_fixup method before returning an skb. + */ +static inline void +usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) +{ + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; +} + extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, -- cgit v1.3-6-gb490 From 7a1e890e2168e33fb62d84528e996b8b4b478fea Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 25 Mar 2015 21:41:33 +0100 Subject: usbnet: Fix tx_bytes statistic running backward in cdc_ncm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cdc_ncm disagrees with usbnet about how much framing overhead should be counted in the tx_bytes statistics, and tries 'fix' this by decrementing tx_bytes on the transmit path. But statistics must never be decremented except due to roll-over; this will thoroughly confuse user-space. Also, tx_bytes is only incremented by usbnet in the completion path. Fix this by requiring drivers that set FLAG_MULTI_FRAME to set a tx_bytes delta along with the tx_packets count. Fixes: beeecd42c3b4 ("net: cdc_ncm/cdc_mbim: adding NCM protocol statistics") Signed-off-by: Ben Hutchings Signed-off-by: Bjørn Mork --- drivers/net/usb/asix_common.c | 2 +- drivers/net/usb/cdc_ncm.c | 7 +++---- drivers/net/usb/sr9800.c | 2 +- drivers/net/usb/usbnet.c | 16 +++++++++++++--- include/linux/usb/usbnet.h | 6 ++++-- 5 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 724a9b50df7a..75d6f26729a3 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -189,7 +189,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, skb_put(skb, sizeof(padbytes)); } - usbnet_set_skb_tx_stats(skb, 1); + usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 70cbea551139..c3e4da9e79ca 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1177,13 +1177,12 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; ctx->tx_ntbs++; - /* usbnet has already counted all the framing overhead. + /* usbnet will count all the framing overhead by default. * Adjust the stats so that the tx_bytes counter show real * payload data instead. */ - dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; - - usbnet_set_skb_tx_stats(skb_out, n); + usbnet_set_skb_tx_stats(skb_out, n, + ctx->tx_curr_frame_payload - skb_out->len); return skb_out; diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 7650cdc8fe6b..953de13267df 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -144,7 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb, skb_put(skb, sizeof(padbytes)); } - usbnet_set_skb_tx_stats(skb, 1); + usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 0f3ff285f6a1..777757ae1973 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1346,9 +1346,19 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, } else urb->transfer_flags |= URB_ZERO_PACKET; } - entry->length = urb->transfer_buffer_length = length; - if (!(info->flags & FLAG_MULTI_PACKET)) - usbnet_set_skb_tx_stats(skb, 1); + urb->transfer_buffer_length = length; + + if (info->flags & FLAG_MULTI_PACKET) { + /* Driver has set number of packets and a length delta. + * Calculate the complete length and ensure that it's + * positive. + */ + entry->length += length; + if (WARN_ON_ONCE(entry->length <= 0)) + entry->length = length; + } else { + usbnet_set_skb_tx_stats(skb, 1, length); + } spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index ff3fb2bd0e90..6e0ce8c7b8cb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -227,7 +227,7 @@ struct skb_data { /* skb->cb is one of these */ struct urb *urb; struct usbnet *dev; enum skb_state state; - size_t length; + long length; unsigned long packets; }; @@ -235,11 +235,13 @@ struct skb_data { /* skb->cb is one of these */ * tx_fixup method before returning an skb. */ static inline void -usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) +usbnet_set_skb_tx_stats(struct sk_buff *skb, + unsigned long packets, long bytes_delta) { struct skb_data *entry = (struct skb_data *) skb->cb; entry->packets = packets; + entry->length = bytes_delta; } extern int usbnet_open(struct net_device *net); -- cgit v1.3-6-gb490 From 4ad3e3634a6cbe916722c7113c5b488d52c7a3dc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 27 Mar 2015 14:15:04 +0000 Subject: irqchip: gicv3-its: Fix PROP/PEND and BASE/CBASE confusion The ITS driver sometime mixes up the use of GICR_PROPBASE bitfields for the GICR_PENDBASE register, and GITS_BASER for GICR_CBASE. This does not lead to any observable bug because similar bits are at the same location, but this just make the code even harder to understand... This patch provides the required #defines and fixes the mixup. Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1427465705-17126-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic-v3-its.c | 6 +++--- include/linux/irqchip/arm-gic-v3.h | 13 +++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fa0c43660c8b..56353f6b5952 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -986,8 +986,8 @@ static void its_cpu_init_lpis(void) /* set PENDBASE */ val = (page_to_phys(pend_page) | - GICR_PROPBASER_InnerShareable | - GICR_PROPBASER_WaWb); + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_WaWb); writeq_relaxed(val, rbase + GICR_PENDBASER); @@ -1425,7 +1425,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) writeq_relaxed(0, its->base + GITS_CWRITER); writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); - if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) { + if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) { pr_info("ITS: using cache flushing for cmd queue\n"); its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING; } diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 781974afff9f..826a4bd63d4a 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -128,6 +128,19 @@ #define GICR_PROPBASER_RaWaWb (7U << 7) #define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PENDBASER_NonShareable (0U << 10) +#define GICR_PENDBASER_InnerShareable (1U << 10) +#define GICR_PENDBASER_OuterShareable (2U << 10) +#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10) +#define GICR_PENDBASER_nCnB (0U << 7) +#define GICR_PENDBASER_nC (1U << 7) +#define GICR_PENDBASER_RaWt (2U << 7) +#define GICR_PENDBASER_RaWb (3U << 7) +#define GICR_PENDBASER_WaWt (4U << 7) +#define GICR_PENDBASER_WaWb (5U << 7) +#define GICR_PENDBASER_RaWaWt (6U << 7) +#define GICR_PENDBASER_RaWaWb (7U << 7) + /* * Re-Distributor registers, offsets from SGI_base */ -- cgit v1.3-6-gb490 From 241a386c7dbb8b0db400a1f92f2ebe3b10eb661d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 27 Mar 2015 14:15:05 +0000 Subject: irqchip: gicv3-its: Use non-cacheable accesses when no shareability If the ITS or the redistributors report their shareability as zero, then it is important to make sure they will no generate any cacheable traffic, as this is unlikely to produce the expected result. Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1427465705-17126-5-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic-v3-its.c | 47 ++++++++++++++++++++++++++++++++++---- include/linux/irqchip/arm-gic-v3.h | 4 ++++ 2 files changed, 47 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 56353f6b5952..9687f8afebff 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -802,6 +802,7 @@ static int its_alloc_tables(struct its_node *its) int i; int psz = SZ_64K; u64 shr = GITS_BASER_InnerShareable; + u64 cache = GITS_BASER_WaWb; for (i = 0; i < GITS_BASER_NR_REGS; i++) { u64 val = readq_relaxed(its->base + GITS_BASER + i * 8); @@ -848,7 +849,7 @@ retry_baser: val = (virt_to_phys(base) | (type << GITS_BASER_TYPE_SHIFT) | ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | - GITS_BASER_WaWb | + cache | shr | GITS_BASER_VALID); @@ -874,9 +875,12 @@ retry_baser: * Shareability didn't stick. Just use * whatever the read reported, which is likely * to be the only thing this redistributor - * supports. + * supports. If that's zero, make it + * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; + if (!shr) + cache = GITS_BASER_nC; goto retry_baser; } @@ -980,6 +984,17 @@ static void its_cpu_init_lpis(void) tmp = readq_relaxed(rbase + GICR_PROPBASER); if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { + if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + val &= ~(GICR_PROPBASER_SHAREABILITY_MASK | + GICR_PROPBASER_CACHEABILITY_MASK); + val |= GICR_PROPBASER_nC; + writeq_relaxed(val, rbase + GICR_PROPBASER); + } pr_info_once("GIC: using cache flushing for LPI property table\n"); gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING; } @@ -990,6 +1005,18 @@ static void its_cpu_init_lpis(void) GICR_PENDBASER_WaWb); writeq_relaxed(val, rbase + GICR_PENDBASER); + tmp = readq_relaxed(rbase + GICR_PENDBASER); + + if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must remove the + * cacheability attributes as well. + */ + val &= ~(GICR_PENDBASER_SHAREABILITY_MASK | + GICR_PENDBASER_CACHEABILITY_MASK); + val |= GICR_PENDBASER_nC; + writeq_relaxed(val, rbase + GICR_PENDBASER); + } /* Enable LPIs */ val = readl_relaxed(rbase + GICR_CTLR); @@ -1422,14 +1449,26 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) writeq_relaxed(baser, its->base + GITS_CBASER); tmp = readq_relaxed(its->base + GITS_CBASER); - writeq_relaxed(0, its->base + GITS_CWRITER); - writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) { + if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + baser &= ~(GITS_CBASER_SHAREABILITY_MASK | + GITS_CBASER_CACHEABILITY_MASK); + baser |= GITS_CBASER_nC; + writeq_relaxed(baser, its->base + GITS_CBASER); + } pr_info("ITS: using cache flushing for cmd queue\n"); its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING; } + writeq_relaxed(0, its->base + GITS_CWRITER); + writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); + if (of_property_read_bool(its->msi_chip.of_node, "msi-controller")) { its->domain = irq_domain_add_tree(NULL, &its_domain_ops, its); if (!its->domain) { diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 826a4bd63d4a..ffbc034c8810 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -126,6 +126,7 @@ #define GICR_PROPBASER_WaWb (5U << 7) #define GICR_PROPBASER_RaWaWt (6U << 7) #define GICR_PROPBASER_RaWaWb (7U << 7) +#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7) #define GICR_PROPBASER_IDBITS_MASK (0x1f) #define GICR_PENDBASER_NonShareable (0U << 10) @@ -140,6 +141,7 @@ #define GICR_PENDBASER_WaWb (5U << 7) #define GICR_PENDBASER_RaWaWt (6U << 7) #define GICR_PENDBASER_RaWaWb (7U << 7) +#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7) /* * Re-Distributor registers, offsets from SGI_base @@ -195,6 +197,7 @@ #define GITS_CBASER_WaWb (5UL << 59) #define GITS_CBASER_RaWaWt (6UL << 59) #define GITS_CBASER_RaWaWb (7UL << 59) +#define GITS_CBASER_CACHEABILITY_MASK (7UL << 59) #define GITS_CBASER_NonShareable (0UL << 10) #define GITS_CBASER_InnerShareable (1UL << 10) #define GITS_CBASER_OuterShareable (2UL << 10) @@ -211,6 +214,7 @@ #define GITS_BASER_WaWb (5UL << 59) #define GITS_BASER_RaWaWt (6UL << 59) #define GITS_BASER_RaWaWb (7UL << 59) +#define GITS_BASER_CACHEABILITY_MASK (7UL << 59) #define GITS_BASER_TYPE_SHIFT (56) #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) -- cgit v1.3-6-gb490 From 608cd71a9c7c9db76e78a792c5a4101e12fea43f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 26 Mar 2015 19:53:57 -0700 Subject: tc: bpf: generalize pedit action existing TC action 'pedit' can munge any bits of the packet. Generalize it for use in bpf programs attached as cls_bpf and act_bpf via bpf_skb_store_bytes() helper function. Signed-off-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 2 ++ net/core/filter.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 73 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 280a315de8d6..d5cda067115a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -59,6 +59,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 27dc4ec58840..74aab6e0d964 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_func_id { BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ + BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0e714f799ec0..630a7bac1e51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -773,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, expected_type = CONST_IMM; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; + } else if (arg_type == ARG_PTR_TO_CTX) { + expected_type = PTR_TO_CTX; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c index 32f43c59908c..444a07e4f68d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,6 +1175,56 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + unsigned int offset = (unsigned int) r2; + void *from = (void *) (long) r3; + unsigned int len = (unsigned int) r4; + char buf[16]; + void *ptr; + + /* bpf verifier guarantees that: + * 'from' pointer points to bpf program stack + * 'len' bytes of it were initialized + * 'len' > 0 + * 'skb' is a valid pointer to 'struct sk_buff' + * + * so check for invalid 'offset' and too large 'len' + */ + if (offset > 0xffff || len > sizeof(buf)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, buf); + if (unlikely(!ptr)) + return -EFAULT; + + skb_postpull_rcsum(skb, ptr, len); + + memcpy(ptr, from, len); + + if (ptr == buf) + /* skb_store_bits cannot return -EFAULT here */ + skb_store_bits(skb, offset, ptr, len); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + return 0; +} + +const struct bpf_func_proto bpf_skb_store_bytes_proto = { + .func = bpf_skb_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -1194,6 +1244,17 @@ sk_filter_func_proto(enum bpf_func_id func_id) } } +static const struct bpf_func_proto * +tc_cls_act_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + default: + return sk_filter_func_proto(func_id); + } +} + static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { @@ -1270,18 +1331,24 @@ static const struct bpf_verifier_ops sk_filter_ops = { .convert_ctx_access = sk_filter_convert_ctx_access, }; +static const struct bpf_verifier_ops tc_cls_act_ops = { + .get_func_proto = tc_cls_act_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, +}; + static struct bpf_prog_type_list sk_filter_type __read_mostly = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; static struct bpf_prog_type_list sched_cls_type __read_mostly = { - .ops = &sk_filter_ops, + .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; static struct bpf_prog_type_list sched_act_type __read_mostly = { - .ops = &sk_filter_ops, + .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; -- cgit v1.3-6-gb490 From f5a7fb88e1f82542ca14ba93a1d4fa35471c60ca Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:11 +0900 Subject: vlan: Introduce helper functions to check if skb is tagged Separate the two checks for single vlan and multiple vlans in netif_skb_features(). This allows us to move the check for multiple vlans to another function later. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 24 ++++++++---------------- 2 files changed, 53 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b11b28a30b9e..4265d440ec4d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -561,4 +561,49 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } +/** + * skb_vlan_tagged - check if skb is vlan tagged. + * @skb: skbuff to query + * + * Returns true if the skb is tagged, regardless of whether it is hardware + * accelerated or not. + */ +static inline bool skb_vlan_tagged(const struct sk_buff *skb) +{ + if (!skb_vlan_tag_present(skb) && + likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) + return false; + + return true; +} + +/** + * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. + * @skb: skbuff to query + * + * Returns true if the skb is tagged with multiple vlan headers, regardless + * of whether it is hardware accelerated or not. + */ +static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +{ + __be16 protocol = skb->protocol; + + if (!skb_vlan_tag_present(skb)) { + struct vlan_ethhdr *veh; + + if (likely(protocol != htons(ETH_P_8021Q) && + protocol != htons(ETH_P_8021AD))) + return false; + + veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } + + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + return false; + + return true; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/net/core/dev.c b/net/core/dev.c index a0408d497dae..04bffcd4a48d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2567,7 +2567,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - __be16 protocol = skb->protocol; if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; @@ -2579,22 +2578,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!skb_vlan_tag_present(skb)) { - if (unlikely(protocol == htons(ETH_P_8021Q) || - protocol == htons(ETH_P_8021AD))) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else { - goto finalize; - } - } - - features = netdev_intersect_features(features, - dev->vlan_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged(skb)) + features = netdev_intersect_features(features, + dev->vlan_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + else + goto finalize; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) + if (skb_vlan_tagged_multi(skb)) features = netdev_intersect_features(features, NETIF_F_SG | NETIF_F_HIGHDMA | -- cgit v1.3-6-gb490 From 8cb65d00086bfba22bac87ff18b751432fc74003 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:12 +0900 Subject: net: Move check for multiple vlans to drivers To allow drivers to handle the features check for multiple tags, move the check to ndo_features_check(). As no drivers currently handle multiple tagged TSO, introduce dflt_features_check() and call it if the driver does not have ndo_features_check(). Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 + include/linux/if_vlan.h | 22 ++++++++++++++++++++++ net/core/dev.c | 21 +++++++++------------ 5 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 9677431c582a..039b0c1f480e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12557,6 +12557,7 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a8339e98ad24..ebc93a101c93 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2373,6 +2373,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index a430a34a4434..367f3976df56 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -507,6 +507,7 @@ static netdev_features_t qlcnic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } #endif diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4265d440ec4d..920e4457ce6e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -606,4 +606,26 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) return true; } +/** + * vlan_features_check - drop unsafe features for skb with multiple tags. + * @skb: skbuff to query + * @features: features to be checked + * + * Returns features without unsafe ones if the skb has multiple tags. + */ +static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, + netdev_features_t features) +{ + if (skb_vlan_tagged_multi(skb)) + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + return features; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/net/core/dev.c b/net/core/dev.c index 04bffcd4a48d..cb46badbef5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,6 +2562,13 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +static netdev_features_t dflt_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return vlan_features_check(skb, features); +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -2583,22 +2590,12 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - else - goto finalize; - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - -finalize: if (dev->netdev_ops->ndo_features_check) features &= dev->netdev_ops->ndo_features_check(skb, dev, features); + else + features &= dflt_features_check(skb, dev, features); return harmonize_features(skb, features); } -- cgit v1.3-6-gb490 From e38f30256b36700aa63aa709dc091bf6eb69c257 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:13 +0900 Subject: net: Introduce passthru_features_check As there are a number of (especially virtual) devices that don't need the multiple vlan check, introduce passthru_features_check() for convenience. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 08c4ab37189f..967bb4c8caf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) diff --git a/net/core/dev.c b/net/core/dev.c index cb46badbef5a..3a06003ecafd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,6 +2562,14 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return features; +} +EXPORT_SYMBOL(passthru_features_check); + static netdev_features_t dflt_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) -- cgit v1.3-6-gb490 From e9637415a92cf25ad800b7fdeddcd30cce7b44ab Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 30 Mar 2015 13:39:09 -0400 Subject: block: fix blk_stack_limits() regression due to lcm() change Linux 3.19 commit 69c953c ("lib/lcm.c: lcm(n,0)=lcm(0,n) is 0, not n") caused blk_stack_limits() to not properly stack queue_limits for stacked devices (e.g. DM). Fix this regression by establishing lcm_not_zero() and switching blk_stack_limits() over to using it. DM uses blk_set_stacking_limits() to establish the initial top-level queue_limits that are then built up based on underlying devices' limits using blk_stack_limits(). In the case of optimal_io_size (io_opt) blk_set_stacking_limits() establishes a default value of 0. With commit 69c953c, lcm(0, n) is no longer n, which compromises proper stacking of the underlying devices' io_opt. Test: $ modprobe scsi_debug dev_size_mb=10 num_tgts=1 opt_blks=1536 $ cat /sys/block/sde/queue/optimal_io_size 786432 $ dmsetup create node --table "0 100 linear /dev/sde 0" Before this fix: $ cat /sys/block/dm-5/queue/optimal_io_size 0 After this fix: $ cat /sys/block/dm-5/queue/optimal_io_size 786432 Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.19+ Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 +++--- include/linux/lcm.h | 1 + lib/lcm.c | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index 6ed2cbe5e8c9..12600bfffca9 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -585,7 +585,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->physical_block_size); t->io_min = max(t->io_min, b->io_min); - t->io_opt = lcm(t->io_opt, b->io_opt); + t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); t->cluster &= b->cluster; t->discard_zeroes_data &= b->discard_zeroes_data; @@ -616,7 +616,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->raid_partial_stripes_expensive); /* Find lowest common alignment_offset */ - t->alignment_offset = lcm(t->alignment_offset, alignment) + t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment) % max(t->physical_block_size, t->io_min); /* Verify that new alignment_offset is on a logical block boundary */ @@ -643,7 +643,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_discard_sectors); t->discard_granularity = max(t->discard_granularity, b->discard_granularity); - t->discard_alignment = lcm(t->discard_alignment, alignment) % + t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) % t->discard_granularity; } diff --git a/include/linux/lcm.h b/include/linux/lcm.h index 7bf01d779b45..1ce79a7f1daa 100644 --- a/include/linux/lcm.h +++ b/include/linux/lcm.h @@ -4,5 +4,6 @@ #include unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__; +unsigned long lcm_not_zero(unsigned long a, unsigned long b) __attribute_const__; #endif /* _LCM_H */ diff --git a/lib/lcm.c b/lib/lcm.c index e97dbd51e756..03d7fcb420b5 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b) return 0; } EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); -- cgit v1.3-6-gb490 From 92f1719407b90475b3be0b7b9c983dec2ff8351e Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2015 23:11:51 +0200 Subject: ptp: introduce get/set time methods with explicit 64 bit seconds. Converting the PHC drivers over to the new methods is one step along the way to making them ready for 2038. Once all the drivers are up to date, then the old methods will be removed. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 0d8ff3fb84ba..7d6f8e66396f 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -64,12 +64,18 @@ struct ptp_clock_request { * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * - * @gettime: Reads the current time from the hardware clock. + * @gettime: Reads the current time from the hardware clock. (deprecated) * parameter ts: Holds the result. * - * @settime: Set the current time on the hardware clock. + * @settime: Set the current time on the hardware clock. (deprecated) * parameter ts: Time value to set. * + * @gettime64: Reads the current time from the hardware clock. + * parameter ts: Holds the result. + * + * @settime64: Set the current time on the hardware clock. + * parameter ts: Time value to set. + * * @enable: Request driver to enable or disable an ancillary feature. * parameter request: Desired resource to enable or disable. * parameter on: Caller passes one to enable or zero to disable. @@ -106,6 +112,8 @@ struct ptp_clock_info { int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); + int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, -- cgit v1.3-6-gb490 From ed7c6317bc599502e1fdc7f5f95cb9a5550360a4 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2015 23:12:13 +0200 Subject: ptp: remove 32 bit get/set methods. All of the PHC drivers have been converted to the new methods. This patch converts the three remaining callers within the core code and removes the older methods for good. As a result, the core PHC code is ready for the year 2038. However, some of the PHC drivers are not quite ready yet. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 8 +------- drivers/ptp/ptp_clock.c | 15 ++++----------- include/linux/ptp_clock_kernel.h | 8 -------- 3 files changed, 5 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 95bcf1525a84..da7bae991552 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -125,7 +125,6 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) struct ptp_clock_info *ops = ptp->info; struct ptp_clock_time *pct; struct timespec64 ts; - struct timespec t2; int enable, err = 0; unsigned int i, pin_index; @@ -202,12 +201,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; - if (ptp->info->gettime64) { - ptp->info->gettime64(ptp->info, &ts); - } else { - ptp->info->gettime(ptp->info, &t2); - ts = timespec_to_timespec64(t2); - } + ptp->info->gettime64(ptp->info, &ts); pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index df50d5eeae6f..2e481b9e8ea5 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -109,9 +109,7 @@ static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp) struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct timespec64 ts = timespec_to_timespec64(*tp); - return ptp->info->settime64 ? - ptp->info->settime64(ptp->info, &ts) : - ptp->info->settime(ptp->info, tp); + return ptp->info->settime64(ptp->info, &ts); } static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp) @@ -120,14 +118,9 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp) struct timespec64 ts; int err; - if (ptp->info->gettime64) { - err = ptp->info->gettime64(ptp->info, &ts); - if (!err) - *tp = timespec64_to_timespec(ts); - } else { - err = ptp->info->gettime(ptp->info, tp); - } - + err = ptp->info->gettime64(ptp->info, &ts); + if (!err) + *tp = timespec64_to_timespec(ts); return err; } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 7d6f8e66396f..b8b73066d137 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -64,12 +64,6 @@ struct ptp_clock_request { * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * - * @gettime: Reads the current time from the hardware clock. (deprecated) - * parameter ts: Holds the result. - * - * @settime: Set the current time on the hardware clock. (deprecated) - * parameter ts: Time value to set. - * * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * @@ -110,8 +104,6 @@ struct ptp_clock_info { struct ptp_pin_desc *pin_config; int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); - int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); - int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, -- cgit v1.3-6-gb490 From 930345ea630405aa6e6f42efcb149c3f360a6b67 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:25 +0200 Subject: netlink: implement nla_put_in_addr and nla_put_in6_addr IP addresses are often stored in netlink attributes. Add generic functions to do that. For nla_put_in_addr, it would be nicer to pass struct in_addr but this is not used universally throughout the kernel, in way too many places __be32 is used to store IPv4 address. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 22 +++++++++---------- include/linux/netfilter/ipset/ip_set.h | 5 ++--- include/net/netlink.h | 29 ++++++++++++++++++++++++++ net/ipv4/devinet.c | 6 +++--- net/ipv4/fib_rules.c | 4 ++-- net/ipv4/fib_semantics.c | 8 +++---- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_vti.c | 4 ++-- net/ipv4/ipip.c | 4 ++-- net/ipv4/ipmr.c | 4 ++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++-- net/ipv4/route.c | 8 +++---- net/ipv4/tcp_metrics.c | 16 +++++++------- net/ipv6/addrconf.c | 10 ++++----- net/ipv6/addrlabel.c | 2 +- net/ipv6/fib6_rules.c | 6 ++---- net/ipv6/ip6_gre.c | 4 ++-- net/ipv6/ip6_tunnel.c | 6 ++---- net/ipv6/ip6_vti.c | 6 ++---- net/ipv6/ip6mr.c | 4 ++-- net/ipv6/ndisc.c | 3 +-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 ++---- net/ipv6/route.c | 14 ++++++------- net/ipv6/sit.c | 12 +++++------ net/l2tp/l2tp_netlink.c | 14 +++++++------ net/netlabel/netlabel_mgmt.c | 20 +++++++----------- net/netlabel/netlabel_unlabeled.c | 28 +++++++++++-------------- net/openvswitch/flow_netlink.c | 6 ++++-- net/wireless/nl80211.c | 4 ++-- 29 files changed, 139 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1c80b67c688d..86f085f95408 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -187,9 +187,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, const union vxlan_addr *ip) { if (ip->sa.sa_family == AF_INET6) - return nla_put(skb, attr, sizeof(struct in6_addr), &ip->sin6.sin6_addr); + return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr); else - return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr); + return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); } #else /* !CONFIG_IPV6 */ @@ -226,7 +226,7 @@ static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, const union vxlan_addr *ip) { - return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr); + return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); } #endif @@ -2807,13 +2807,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (!vxlan_addr_any(&dst->remote_ip)) { if (dst->remote_ip.sa.sa_family == AF_INET) { - if (nla_put_be32(skb, IFLA_VXLAN_GROUP, - dst->remote_ip.sin.sin_addr.s_addr)) + if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP, + dst->remote_ip.sin.sin_addr.s_addr)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) } else { - if (nla_put(skb, IFLA_VXLAN_GROUP6, sizeof(struct in6_addr), - &dst->remote_ip.sin6.sin6_addr)) + if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6, + &dst->remote_ip.sin6.sin6_addr)) goto nla_put_failure; #endif } @@ -2824,13 +2824,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (!vxlan_addr_any(&vxlan->saddr)) { if (vxlan->saddr.sa.sa_family == AF_INET) { - if (nla_put_be32(skb, IFLA_VXLAN_LOCAL, - vxlan->saddr.sin.sin_addr.s_addr)) + if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL, + vxlan->saddr.sin.sin_addr.s_addr)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) } else { - if (nla_put(skb, IFLA_VXLAN_LOCAL6, sizeof(struct in6_addr), - &vxlan->saddr.sin6.sin6_addr)) + if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6, + &vxlan->saddr.sin6.sin6_addr)) goto nla_put_failure; #endif } diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f1606fa6132d..34b172301558 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -483,7 +483,7 @@ static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr) if (!__nested) return -EMSGSIZE; - ret = nla_put_net32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); + ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); if (!ret) ipset_nest_end(skb, __nested); return ret; @@ -497,8 +497,7 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb, int type, if (!__nested) return -EMSGSIZE; - ret = nla_put(skb, IPSET_ATTR_IPADDR_IPV6, - sizeof(struct in6_addr), ipaddrptr); + ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr); if (!ret) ipset_nest_end(skb, __nested); return ret; diff --git a/include/net/netlink.h b/include/net/netlink.h index e010ee8da41d..17fc76e5b05e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -4,6 +4,7 @@ #include #include #include +#include /* ======================================================================== * Netlink Messages and Attributes Interface (As Seen On TV) @@ -105,6 +106,8 @@ * nla_put_string(skb, type, str) add string attribute to skb * nla_put_flag(skb, type) add flag attribute to skb * nla_put_msecs(skb, type, jiffies) add msecs attribute to skb + * nla_put_in_addr(skb, type, addr) add IPv4 address attribute to skb + * nla_put_in6_addr(skb, type, addr) add IPv6 address attribute to skb * * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute @@ -956,6 +959,32 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, return nla_put(skb, attrtype, sizeof(u64), &tmp); } +/** + * nla_put_in_addr - Add an IPv4 address netlink attribute to a socket + * buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @addr: IPv4 address + */ +static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype, + __be32 addr) +{ + return nla_put_be32(skb, attrtype, addr); +} + +/** + * nla_put_in6_addr - Add an IPv6 address netlink attribute to a socket + * buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @addr: IPv6 address + */ +static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype, + const struct in6_addr *addr) +{ + return nla_put(skb, attrtype, sizeof(*addr), addr); +} + /** * nla_get_u32 - return payload of u32 attribute * @nla: u32 netlink attribute diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 975ee5e30c64..66cd85973056 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1541,11 +1541,11 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } if ((ifa->ifa_address && - nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || + nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && - nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || + nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || (ifa->ifa_broadcast && - nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || + nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index e9bc5e42cf43..edfea0deec43 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -279,9 +279,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule4->tos; if ((rule4->dst_len && - nla_put_be32(skb, FRA_DST, rule4->dst)) || + nla_put_in_addr(skb, FRA_DST, rule4->dst)) || (rule4->src_len && - nla_put_be32(skb, FRA_SRC, rule4->src))) + nla_put_in_addr(skb, FRA_SRC, rule4->src))) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (rule4->tclassid && diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 66c1e4fbf884..453b24e5322c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1015,7 +1015,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len && - nla_put_be32(skb, RTA_DST, dst)) + nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) @@ -1024,11 +1024,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (fi->fib_prefsrc && - nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc)) + nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw && - nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) + nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; if (fi->fib_nh->nh_oif && nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) @@ -1058,7 +1058,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_ifindex = nh->nh_oif; if (nh->nh_gw && - nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw)) + nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid && diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6207275fc749..2e878df46075 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -776,8 +776,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || - nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) || + nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || + nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || nla_put_u8(skb, IFLA_GRE_PMTUDISC, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 94efe148181c..f189f2a8aaa5 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -505,8 +505,8 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_VTI_LINK, p->link); nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); - nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr); - nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr); + nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr); + nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr); return 0; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 915d215a7d14..17df8d38bbbd 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -450,8 +450,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c688cd1b2110..b4a545d24adb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2281,8 +2281,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || - nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) + if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || + nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) goto nla_put_failure; err = __ipmr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5c61328b7704..b36ebfc6b812 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -322,8 +322,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || - nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) + if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || + nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) goto nla_put_failure; return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index be8703d02ef0..1f147204f1f3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2319,11 +2319,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (IPCB(skb)->flags & IPSKB_DOREDIRECT) r->rtm_flags |= RTCF_DOREDIRECT; - if (nla_put_be32(skb, RTA_DST, dst)) + if (nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (src) { r->rtm_src_len = 32; - if (nla_put_be32(skb, RTA_SRC, src)) + if (nla_put_in_addr(skb, RTA_SRC, src)) goto nla_put_failure; } if (rt->dst.dev && @@ -2336,11 +2336,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, #endif if (!rt_is_input_route(rt) && fl4->saddr != src) { - if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) + if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } if (rt->rt_uses_gateway && - nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) + nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; expires = rt->dst.expires; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f62c2c68ced0..32e36ea6bc0f 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -786,19 +786,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, switch (tm->tcpm_daddr.family) { case AF_INET: - if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_daddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_daddr.addr.a4) < 0) goto nla_put_failure; - if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, - tm->tcpm_saddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: - if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_daddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6, + &tm->tcpm_daddr.addr.in6) < 0) goto nla_put_failure; - if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, - tm->tcpm_saddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6, + &tm->tcpm_saddr.addr.in6) < 0) goto nla_put_failure; break; default: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e86f7434e3c3..5c9e94cb1b2c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4237,11 +4237,11 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } if (!ipv6_addr_any(&ifa->peer_addr)) { - if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || - nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || + nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0) goto error; } else - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0) goto error; if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) @@ -4273,7 +4273,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4299,7 +4299,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 3cc50e2d3bf5..882124ebb438 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -477,7 +477,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb, ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); - if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || + if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 273eb26cd6d4..d313bfd88512 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -250,11 +250,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule6->tclass; if ((rule6->dst.plen && - nla_put(skb, FRA_DST, sizeof(struct in6_addr), - &rule6->dst.addr)) || + nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && - nla_put(skb, FRA_SRC, sizeof(struct in6_addr), - &rule6->src.addr))) + nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr))) goto nla_put_failure; return 0; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 67e014d88e55..f61f7ad2d045 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1622,8 +1622,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) || - nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) || + nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || + nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0c68012b6d6e..80543d13ea7c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1739,10 +1739,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1ec5b4a530d0..87a262b0f07b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -983,10 +983,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || - nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) goto nla_put_failure; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index ff883c9d0e3c..caf6b99374e6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2378,8 +2378,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || - nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) + if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || + nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; err = __ip6mr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 482dfb9f0f7e..c283827d60e2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1049,8 +1049,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); - if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), - &ipv6_hdr(ra)->saddr)) + if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b68d0e59c1f8..78284a697439 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -290,10 +290,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, - &tuple->src.u3.ip6) || - nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, - &tuple->dst.u3.ip6)) + if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || + nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) goto nla_put_failure; return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fe742fa0f7ff..385e9bd4f218 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2669,19 +2669,19 @@ static int rt6_fill_node(struct net *net, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - if (nla_put(skb, RTA_DST, 16, dst)) + if (nla_put_in6_addr(skb, RTA_DST, dst)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { - if (nla_put(skb, RTA_SRC, 16, src)) + if (nla_put_in6_addr(skb, RTA_SRC, src)) goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) goto nla_put_failure; #endif if (iif) { @@ -2705,14 +2705,14 @@ static int rt6_fill_node(struct net *net, } else if (dst) { struct in6_addr saddr_buf; if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && - nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } if (rt->rt6i_prefsrc.plen) { struct in6_addr saddr_buf; saddr_buf = rt->rt6i_prefsrc.addr; - if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } @@ -2720,7 +2720,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) goto nla_put_failure; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 92692a7e8a2b..0e2bb538a556 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1683,8 +1683,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, @@ -1694,10 +1694,10 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD - if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), - &tunnel->ip6rd.prefix) || - nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, - tunnel->ip6rd.relay_prefix) || + if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX, + &tunnel->ip6rd.prefix) || + nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, tunnel->ip6rd.prefixlen) || nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index b4e923f77954..a4f78d36bace 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -376,15 +376,17 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (np) { - if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr), - &np->saddr) || - nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr), - &sk->sk_v6_daddr)) + if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, + &np->saddr) || + nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, + &sk->sk_v6_daddr)) goto nla_put_failure; } else #endif - if (nla_put_be32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) || - nla_put_be32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr)) + if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, + inet->inet_saddr) || + nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, + inet->inet_daddr)) goto nla_put_failure; break; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 70440748fe5c..13f777f20995 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -293,15 +293,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return -ENOMEM; addr_struct.s_addr = iter4->addr; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4ADDR, + addr_struct.s_addr); if (ret_val != 0) return ret_val; addr_struct.s_addr = iter4->mask; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4MASK, + addr_struct.s_addr); if (ret_val != 0) return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); @@ -328,14 +326,12 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, if (nla_b == NULL) return -ENOMEM; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, - sizeof(struct in6_addr), - &iter6->addr); + ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6ADDR, + &iter6->addr); if (ret_val != 0) return ret_val; - ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, - sizeof(struct in6_addr), - &iter6->mask); + ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6MASK, + &iter6->mask); if (ret_val != 0) return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index aec7994f78cf..b0380927f05f 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1117,34 +1117,30 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, struct in_addr addr_struct; addr_struct.s_addr = addr4->list.addr; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV4ADDR, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV4ADDR, + addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; addr_struct.s_addr = addr4->list.mask; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV4MASK, - sizeof(struct in_addr), - &addr_struct); + ret_val = nla_put_in_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV4MASK, + addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; secid = addr4->secid; } else { - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV6ADDR, - sizeof(struct in6_addr), - &addr6->list.addr); + ret_val = nla_put_in6_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV6ADDR, + &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; - ret_val = nla_put(cb_arg->skb, - NLBL_UNLABEL_A_IPV6MASK, - sizeof(struct in6_addr), - &addr6->list.mask); + ret_val = nla_put_in6_addr(cb_arg->skb, + NLBL_UNLABEL_A_IPV6MASK, + &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 22b18c145c92..c0c5b5519f45 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -648,10 +648,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + output->ipv4_src)) return -EMSGSIZE; if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, + output->ipv4_dst)) return -EMSGSIZE; if (output->ipv4_tos && nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d6ba4a6bbff6..6a4a4d7db1fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8761,8 +8761,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, if (!nl_tcp) return -ENOBUFS; - if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || - nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + if (nla_put_in_addr(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_in_addr(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || -- cgit v1.3-6-gb490 From f9c72d10d6fbf949558cd088389a42213ed7b12d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 31 Mar 2015 12:03:28 -0400 Subject: sunrpc: make debugfs file creation failure non-fatal We currently have a problem that SELinux policy is being enforced when creating debugfs files. If a debugfs file is created as a side effect of doing some syscall, then that creation can fail if the SELinux policy for that process prevents it. This seems wrong. We don't do that for files under /proc, for instance, so Bruce has proposed a patch to fix that. While discussing that patch however, Greg K.H. stated: "No kernel code should care / fail if a debugfs function fails, so please fix up the sunrpc code first." This patch converts all of the sunrpc debugfs setup code to be void return functins, and the callers to not look for errors from those functions. This should allow rpc_clnt and rpc_xprt creation to work, even if the kernel fails to create debugfs files for some reason. Symptoms were failing krb5 mounts on systems using gss-proxy and selinux. Fixes: 388f0c776781 "sunrpc: add a debugfs rpc_xprt directory..." Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Acked-by: Greg Kroah-Hartman Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/debug.h | 18 +++++++-------- net/sunrpc/clnt.c | 4 +--- net/sunrpc/debugfs.c | 52 ++++++++++++++++++++++++-------------------- net/sunrpc/sunrpc_syms.c | 7 +----- net/sunrpc/xprt.c | 7 +----- 5 files changed, 41 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index c57d8ea0716c..59a7889e15db 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -60,17 +60,17 @@ struct rpc_xprt; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); -int sunrpc_debugfs_init(void); +void sunrpc_debugfs_init(void); void sunrpc_debugfs_exit(void); -int rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_register(struct rpc_clnt *); void rpc_clnt_debugfs_unregister(struct rpc_clnt *); -int rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_register(struct rpc_xprt *); void rpc_xprt_debugfs_unregister(struct rpc_xprt *); #else -static inline int +static inline void sunrpc_debugfs_init(void) { - return 0; + return; } static inline void @@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void) return; } -static inline int +static inline void rpc_clnt_debugfs_register(struct rpc_clnt *clnt) { - return 0; + return; } static inline void @@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) return; } -static inline int +static inline void rpc_xprt_debugfs_register(struct rpc_xprt *xprt) { - return 0; + return; } static inline void diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 612aa73bbc60..e6ce1517367f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -303,9 +303,7 @@ static int rpc_client_register(struct rpc_clnt *clnt, struct super_block *pipefs_sb; int err; - err = rpc_clnt_debugfs_register(clnt); - if (err) - return err; + rpc_clnt_debugfs_register(clnt); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index e811f390f9f6..82962f7e6e88 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -129,48 +129,52 @@ static const struct file_operations tasks_fops = { .release = tasks_release, }; -int +void rpc_clnt_debugfs_register(struct rpc_clnt *clnt) { - int len, err; + int len; char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ + struct rpc_xprt *xprt; /* Already registered? */ - if (clnt->cl_debugfs) - return 0; + if (clnt->cl_debugfs || !rpc_clnt_dir) + return; len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); if (len >= sizeof(name)) - return -EINVAL; + return; /* make the per-client dir */ clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir); if (!clnt->cl_debugfs) - return -ENOMEM; + return; /* make tasks file */ - err = -ENOMEM; if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, clnt, &tasks_fops)) goto out_err; - err = -EINVAL; rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + /* no "debugfs" dentry? Don't bother with the symlink. */ + if (!xprt->debugfs) { + rcu_read_unlock(); + return; + } len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", - rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name); + xprt->debugfs->d_name.name); rcu_read_unlock(); + if (len >= sizeof(name)) goto out_err; - err = -ENOMEM; if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name)) goto out_err; - return 0; + return; out_err: debugfs_remove_recursive(clnt->cl_debugfs); clnt->cl_debugfs = NULL; - return err; } void @@ -226,33 +230,33 @@ static const struct file_operations xprt_info_fops = { .release = xprt_info_release, }; -int +void rpc_xprt_debugfs_register(struct rpc_xprt *xprt) { int len, id; static atomic_t cur_id; char name[9]; /* 8 hex digits + NULL term */ + if (!rpc_xprt_dir) + return; + id = (unsigned int)atomic_inc_return(&cur_id); len = snprintf(name, sizeof(name), "%x", id); if (len >= sizeof(name)) - return -EINVAL; + return; /* make the per-client dir */ xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir); if (!xprt->debugfs) - return -ENOMEM; + return; /* make tasks file */ if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, xprt, &xprt_info_fops)) { debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; - return -ENOMEM; } - - return 0; } void @@ -266,14 +270,17 @@ void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); + topdir = NULL; + rpc_clnt_dir = NULL; + rpc_xprt_dir = NULL; } -int __init +void __init sunrpc_debugfs_init(void) { topdir = debugfs_create_dir("sunrpc", NULL); if (!topdir) - goto out; + return; rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) @@ -283,10 +290,9 @@ sunrpc_debugfs_init(void) if (!rpc_xprt_dir) goto out_remove; - return 0; + return; out_remove: debugfs_remove_recursive(topdir); topdir = NULL; -out: - return -ENOMEM; + rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index e37fbed87956..ee5d3d253102 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -98,10 +98,7 @@ init_sunrpc(void) if (err) goto out4; - err = sunrpc_debugfs_init(); - if (err) - goto out5; - + sunrpc_debugfs_init(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif @@ -109,8 +106,6 @@ init_sunrpc(void) init_socket_xprt(); /* clnt sock transport */ return 0; -out5: - unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e3015aede0d9..9949722d99ce 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1331,7 +1331,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { - int err; struct rpc_xprt *xprt; struct xprt_class *t; @@ -1372,11 +1371,7 @@ found: return ERR_PTR(-ENOMEM); } - err = rpc_xprt_debugfs_register(xprt); - if (err) { - xprt_destroy(xprt); - return ERR_PTR(err); - } + rpc_xprt_debugfs_register(xprt); dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); -- cgit v1.3-6-gb490 From ffa88f37ffeaac398be68f9678b0e6046a5ba7f6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 30 Mar 2015 17:45:22 +0300 Subject: net/mlx4_en: Move statistics bitmap setting to the Ethernet driver The statistics bitmap belongs to the Ethernet driver, move it there. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 22 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 ++ drivers/net/ethernet/mellanox/mlx4/port.c | 21 --------------------- include/linux/mlx4/device.h | 1 - 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ebc93a101c93..9556230465f0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -49,6 +49,11 @@ #include "mlx4_en.h" #include "en_port.h" +#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL +#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL +#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL +#define MLX4_STATS_PORT_COUNTERS_MASK 0x7fe00000ULL + int mlx4_en_setup_tc(struct net_device *dev, u8 up) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -2648,6 +2653,21 @@ int mlx4_en_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } +void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) +{ + if (!mlx4_is_mfunc(dev)) { + *stats_bitmap = 0; + return; + } + + *stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK | + MLX4_STATS_TRAFFIC_DROPS_MASK | + MLX4_STATS_PORT_COUNTERS_MASK); + + if (mlx4_is_master(dev)) + *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; +} + int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { @@ -2881,7 +2901,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); - mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); + mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 6b379a2df4a0..af3d9b79277c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -730,6 +730,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, int mlx4_en_start_port(struct net_device *dev); void mlx4_en_stop_port(struct net_device *dev, int detach); +void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap); + void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 3f6f77112274..b97f173ab062 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -45,11 +45,6 @@ #define MLX4_VLAN_VALID (1u << 31) #define MLX4_VLAN_MASK 0xfff -#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL -#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL -#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL -#define MLX4_STATS_PORT_COUNTERS_MASK 0x7fe00000ULL - void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { int i; @@ -1185,22 +1180,6 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, vhcr->in_modifier, outbox); } -void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) -{ - if (!mlx4_is_mfunc(dev)) { - *stats_bitmap = 0; - return; - } - - *stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK | - MLX4_STATS_TRAFFIC_DROPS_MASK | - MLX4_STATS_PORT_COUNTERS_MASK); - - if (mlx4_is_master(dev)) - *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; -} -EXPORT_SYMBOL(mlx4_set_stats_bitmap); - int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4550c67b92e4..49abbe28e230 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1300,7 +1300,6 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); -void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, -- cgit v1.3-6-gb490 From 0b131561a7d639abb0a194d2d8fae839ce3b99e9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 30 Mar 2015 17:45:25 +0300 Subject: net/mlx4_en: Add Flow control statistics display via ethtool Flow control per priority and Global pause counters are now visible via ethtool. The counters shows statistics regarding pauses in the device. Signed-off-by: Matan Barak Signed-off-by: Shani Michaeli Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 4 ++ drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 74 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 57 ++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/en_port.c | 50 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 10 +++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 13 ++++- drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h | 52 ++++++++++++++++- include/linux/mlx4/device.h | 3 +- 8 files changed, 255 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index cde14fa2f742..8e3260c0eaa5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -226,6 +226,10 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, prof->rx_ppp); if (err) en_err(priv, "Failed setting pause params\n"); + else + mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap, + prof->rx_ppp, prof->rx_pause, + prof->tx_ppp, prof->tx_pause); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3e7ed39e8e76..4fc767461b4f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -119,6 +119,48 @@ static const char main_strings[][ETH_GSTRING_LEN] = { "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", + /* priority flow control statistics rx */ + "rx_pause_prio_0", "rx_pause_duration_prio_0", + "rx_pause_transition_prio_0", + "rx_pause_prio_1", "rx_pause_duration_prio_1", + "rx_pause_transition_prio_1", + "rx_pause_prio_2", "rx_pause_duration_prio_2", + "rx_pause_transition_prio_2", + "rx_pause_prio_3", "rx_pause_duration_prio_3", + "rx_pause_transition_prio_3", + "rx_pause_prio_4", "rx_pause_duration_prio_4", + "rx_pause_transition_prio_4", + "rx_pause_prio_5", "rx_pause_duration_prio_5", + "rx_pause_transition_prio_5", + "rx_pause_prio_6", "rx_pause_duration_prio_6", + "rx_pause_transition_prio_6", + "rx_pause_prio_7", "rx_pause_duration_prio_7", + "rx_pause_transition_prio_7", + + /* flow control statistics rx */ + "rx_pause", "rx_pause_duration", "rx_pause_transition", + + /* priority flow control statistics tx */ + "tx_pause_prio_0", "tx_pause_duration_prio_0", + "tx_pause_transition_prio_0", + "tx_pause_prio_1", "tx_pause_duration_prio_1", + "tx_pause_transition_prio_1", + "tx_pause_prio_2", "tx_pause_duration_prio_2", + "tx_pause_transition_prio_2", + "tx_pause_prio_3", "tx_pause_duration_prio_3", + "tx_pause_transition_prio_3", + "tx_pause_prio_4", "tx_pause_duration_prio_4", + "tx_pause_transition_prio_4", + "tx_pause_prio_5", "tx_pause_duration_prio_5", + "tx_pause_transition_prio_5", + "tx_pause_prio_6", "tx_pause_duration_prio_6", + "tx_pause_transition_prio_6", + "tx_pause_prio_7", "tx_pause_duration_prio_7", + "tx_pause_transition_prio_7", + + /* flow control statistics tx */ + "tx_pause", "tx_pause_duration", "tx_pause_transition", + /* packet statistics */ "broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3", "rx_prio_4", "rx_prio_5", "rx_prio_6", "rx_prio_7", "tx_prio_0", @@ -304,6 +346,26 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, if (bitmap_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->port_stats)[i]; + for (i = 0; i < NUM_FLOW_PRIORITY_STATS_RX; + i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = + ((u64 *)&priv->rx_priority_flowstats)[i]; + + for (i = 0; i < NUM_FLOW_STATS_RX; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((u64 *)&priv->rx_flowstats)[i]; + + for (i = 0; i < NUM_FLOW_PRIORITY_STATS_TX; + i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = + ((u64 *)&priv->tx_priority_flowstats)[i]; + + for (i = 0; i < NUM_FLOW_STATS_TX; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((u64 *)&priv->tx_flowstats)[i]; + for (i = 0; i < NUM_PKT_STATS; i++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->pkstats)[i]; @@ -364,6 +426,12 @@ static void mlx4_en_get_strings(struct net_device *dev, strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[strings]); + for (i = 0; i < NUM_FLOW_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + for (i = 0; i < NUM_PKT_STATS; i++, strings++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) @@ -910,6 +978,12 @@ static int mlx4_en_set_pauseparam(struct net_device *dev, priv->prof->rx_ppp); if (err) en_err(priv, "Failed setting pause params\n"); + else + mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap, + priv->prof->rx_ppp, + priv->prof->rx_pause, + priv->prof->tx_ppp, + priv->prof->tx_pause); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4542bab9494b..354e254b53cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1888,6 +1888,12 @@ static void mlx4_en_clear_stats(struct net_device *dev) memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); + memset(&priv->rx_flowstats, 0, sizeof(priv->rx_flowstats)); + memset(&priv->tx_flowstats, 0, sizeof(priv->tx_flowstats)); + memset(&priv->rx_priority_flowstats, 0, + sizeof(priv->rx_priority_flowstats)); + memset(&priv->tx_priority_flowstats, 0, + sizeof(priv->tx_priority_flowstats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; @@ -2648,8 +2654,46 @@ int mlx4_en_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } +void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev, + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause) +{ + int last_i = NUM_MAIN_STATS + NUM_PORT_STATS; + + if (!mlx4_is_slave(dev) && + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN)) { + mutex_lock(&stats_bitmap->mutex); + bitmap_clear(stats_bitmap->bitmap, last_i, NUM_FLOW_STATS); + + if (rx_ppp) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_PRIORITY_STATS_RX); + last_i += NUM_FLOW_PRIORITY_STATS_RX; + + if (rx_pause && !(rx_ppp)) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_STATS_RX); + last_i += NUM_FLOW_STATS_RX; + + if (tx_ppp) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_PRIORITY_STATS_TX); + last_i += NUM_FLOW_PRIORITY_STATS_TX; + + if (tx_pause && !(tx_ppp)) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_FLOW_STATS_TX); + last_i += NUM_FLOW_STATS_TX; + + mutex_unlock(&stats_bitmap->mutex); + } +} + void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, - struct mlx4_en_stats_bitmap *stats_bitmap) + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause) { int last_i = 0; @@ -2677,6 +2721,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, bitmap_set(stats_bitmap->bitmap, last_i, NUM_PORT_STATS); last_i += NUM_PORT_STATS; + mlx4_en_update_pfc_stats_bitmap(dev, stats_bitmap, + rx_ppp, rx_pause, + tx_ppp, tx_pause); + last_i += NUM_FLOW_STATS; + if (!mlx4_is_slave(dev)) bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS); } @@ -2914,7 +2963,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); - mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); + mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap, + mdev->profile.prof[priv->port].rx_ppp, + mdev->profile.prof[priv->port].rx_pause, + mdev->profile.prof[priv->port].tx_ppp, + mdev->profile.prof[priv->port].tx_pause); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 6cb80072af6c..821ae1278dc9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -131,6 +131,7 @@ out: int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) { struct mlx4_en_stat_out_mbox *mlx4_en_stats; + struct mlx4_en_stat_out_flow_control_mbox *flowstats; struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); struct net_device_stats *stats = &priv->stats; struct mlx4_cmd_mailbox *mailbox; @@ -239,6 +240,55 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->pkstats.tx_prio[7] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7); spin_unlock_bh(&priv->stats_lock); + /* 0xffs indicates invalid value */ + memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES); + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) { + memset(mailbox->buf, 0, + sizeof(*flowstats) * MLX4_NUM_PRIORITIES); + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, + in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, + 0, MLX4_CMD_DUMP_ETH_STATS, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + if (err) + goto out; + } + + flowstats = mailbox->buf; + + spin_lock_bh(&priv->stats_lock); + + for (i = 0; i < MLX4_NUM_PRIORITIES; i++) { + priv->rx_priority_flowstats[i].rx_pause = + be64_to_cpu(flowstats[i].rx_pause); + priv->rx_priority_flowstats[i].rx_pause_duration = + be64_to_cpu(flowstats[i].rx_pause_duration); + priv->rx_priority_flowstats[i].rx_pause_transition = + be64_to_cpu(flowstats[i].rx_pause_transition); + priv->tx_priority_flowstats[i].tx_pause = + be64_to_cpu(flowstats[i].tx_pause); + priv->tx_priority_flowstats[i].tx_pause_duration = + be64_to_cpu(flowstats[i].tx_pause_duration); + priv->tx_priority_flowstats[i].tx_pause_transition = + be64_to_cpu(flowstats[i].tx_pause_transition); + } + + /* if pfc is not in use, all priorities counters have the same value */ + priv->rx_flowstats.rx_pause = + be64_to_cpu(flowstats[0].rx_pause); + priv->rx_flowstats.rx_pause_duration = + be64_to_cpu(flowstats[0].rx_pause_duration); + priv->rx_flowstats.rx_pause_transition = + be64_to_cpu(flowstats[0].rx_pause_transition); + priv->tx_flowstats.tx_pause = + be64_to_cpu(flowstats[0].tx_pause); + priv->tx_flowstats.tx_pause_duration = + be64_to_cpu(flowstats[0].tx_pause_duration); + priv->tx_flowstats.tx_pause_transition = + be64_to_cpu(flowstats[0].tx_pause_transition); + + spin_unlock_bh(&priv->stats_lock); + out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 4a471f5d1b56..209a6171e59b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -145,7 +145,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [20] = "Recoverable error events support", [21] = "Port Remap support", [22] = "QCN support", - [23] = "QP rate limiting support" + [23] = "QP rate limiting support", + [24] = "Ethernet Flow control statistics support" }; int i; @@ -672,6 +673,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 +#define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 @@ -773,6 +775,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); + MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET); + if (field & 0x10) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN; dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); if (field & 0x80) @@ -1088,6 +1093,7 @@ out: return err; } +#define DEV_CAP_EXT_2_FLAG_PFC_COUNTERS (1 << 28) #define DEV_CAP_EXT_2_FLAG_VLAN_CONTROL (1 << 26) #define DEV_CAP_EXT_2_FLAG_80_VFS (1 << 21) #define DEV_CAP_EXT_2_FLAG_FSM (1 << 20) @@ -1177,7 +1183,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, /* turn off host side virt features (VST, FSM, etc) for guests */ MLX4_GET(field32, outbox->buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); field32 &= ~(DEV_CAP_EXT_2_FLAG_VLAN_CONTROL | DEV_CAP_EXT_2_FLAG_80_VFS | - DEV_CAP_EXT_2_FLAG_FSM); + DEV_CAP_EXT_2_FLAG_FSM | DEV_CAP_EXT_2_FLAG_PFC_COUNTERS); MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); /* turn off QCN for guests */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d5d971a408f2..67eeea244eff 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -565,6 +565,10 @@ struct mlx4_en_priv { #endif struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; + struct mlx4_en_flow_stats_rx rx_priority_flowstats[MLX4_NUM_PRIORITIES]; + struct mlx4_en_flow_stats_tx tx_priority_flowstats[MLX4_NUM_PRIORITIES]; + struct mlx4_en_flow_stats_rx rx_flowstats; + struct mlx4_en_flow_stats_tx tx_flowstats; struct mlx4_en_port_stats port_stats; struct mlx4_en_stats_bitmap stats_bitmap; struct list_head mc_list; @@ -736,7 +740,9 @@ int mlx4_en_start_port(struct net_device *dev); void mlx4_en_stop_port(struct net_device *dev, int detach); void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, - struct mlx4_en_stats_bitmap *stats_bitmap); + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause); void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); @@ -823,7 +829,10 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); int mlx4_en_reset_config(struct net_device *dev, struct hwtstamp_config ts_config, netdev_features_t new_features); - +void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev, + struct mlx4_en_stats_bitmap *stats_bitmap, + u8 rx_ppp, u8 rx_pause, + u8 tx_ppp, u8 tx_pause); int mlx4_en_netdev_event(struct notifier_block *this, unsigned long event, void *ptr); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index d7183e30b40b..e193680fb527 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -42,8 +42,58 @@ struct mlx4_en_perf_stats { }; #define NUM_MAIN_STATS 21 + +#define MLX4_NUM_PRIORITIES 8 + +struct mlx4_en_flow_stats_rx { + u64 rx_pause; + u64 rx_pause_duration; + u64 rx_pause_transition; +#define NUM_FLOW_STATS_RX 3 +#define NUM_FLOW_PRIORITY_STATS_RX (NUM_FLOW_STATS_RX * \ + MLX4_NUM_PRIORITIES) +}; + +struct mlx4_en_flow_stats_tx { + u64 tx_pause; + u64 tx_pause_duration; + u64 tx_pause_transition; +#define NUM_FLOW_STATS_TX 3 +#define NUM_FLOW_PRIORITY_STATS_TX (NUM_FLOW_STATS_TX * \ + MLX4_NUM_PRIORITIES) +}; + +#define NUM_FLOW_STATS (NUM_FLOW_STATS_RX + NUM_FLOW_STATS_TX + \ + NUM_FLOW_PRIORITY_STATS_TX + \ + NUM_FLOW_PRIORITY_STATS_RX) + +struct mlx4_en_stat_out_flow_control_mbox { + /* Total number of PAUSE frames received from the far-end port */ + __be64 rx_pause; + /* Total number of microseconds that far-end port requested to pause + * transmission of packets + */ + __be64 rx_pause_duration; + /* Number of received transmission from XOFF state to XON state */ + __be64 rx_pause_transition; + /* Total number of PAUSE frames sent from the far-end port */ + __be64 tx_pause; + /* Total time in microseconds that transmission of packets has been + * paused + */ + __be64 tx_pause_duration; + /* Number of transmitter transitions from XOFF state to XON state */ + __be64 tx_pause_transition; + /* Reserverd */ + __be64 reserved[2]; +}; + +enum { + MLX4_DUMP_ETH_STATS_FLOW_CONTROL = 1 << 12 +}; + #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ - NUM_PERF_STATS) + NUM_FLOW_STATS + NUM_PERF_STATS) #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ sizeof(((struct net_device_stats *)0)->n)) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 49abbe28e230..ab7ebec943b8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,7 +205,8 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, - MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24 }; enum { -- cgit v1.3-6-gb490 From a54acb3a6f853e8394c4cb7b6a4d93c88f13eefd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:00 +0200 Subject: dev: introduce dev_get_iflink() The goal of this patch is to prepare the removal of the iflink field. It introduces a new ndo function, which will be implemented by virtual interfaces. There is no functional change into this patch. All readers of iflink field now call dev_get_iflink(). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 2 +- include/linux/netdevice.h | 4 ++++ net/batman-adv/hard-interface.c | 5 +++-- net/bridge/br_netlink.c | 4 ++-- net/core/dev.c | 21 +++++++++++++++++++-- net/core/link_watch.c | 4 ++-- net/core/net-sysfs.c | 10 +++++++++- net/core/rtnetlink.c | 8 ++++---- net/ipv4/ipmr.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ip6mr.c | 2 +- 11 files changed, 48 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 2a175006028b..131bde98188d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -330,7 +330,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) struct rtable *rt; int err, ret = NET_XMIT_DROP; struct flowi4 fl4 = { - .flowi4_oif = dev->iflink, + .flowi4_oif = dev_get_iflink(dev), .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC, .daddr = ip4h->daddr, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 967bb4c8caf1..788eb7a622ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1030,6 +1030,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific * TX queue. + * int (*ndo_get_iflink)(const struct net_device *dev); + * Called to get the iflink value of this device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1191,6 +1193,7 @@ struct net_device_ops { int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); + int (*ndo_get_iflink)(const struct net_device *dev); }; /** @@ -2149,6 +2152,7 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); +int dev_get_iflink(const struct net_device *dev); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index fbda6b54baff..baf1f9843f2c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,11 +83,12 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) + if (dev_get_iflink(net_dev) == 0 || + dev_get_iflink(net_dev) == net_dev->ifindex) return false; /* recurse over the parent device */ - parent_dev = __dev_get_by_index(&init_net, net_dev->iflink); + parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e1115a224a95..0e4ddb81610d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -305,8 +305,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, nla_put_u8(skb, IFLA_OPERSTATE, operstate) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; if (event == RTM_NEWLINK && port) { diff --git a/net/core/dev.c b/net/core/dev.c index 65492b0354c0..77172d085760 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -659,6 +659,23 @@ __setup("netdev=", netdev_boot_setup); *******************************************************************************/ +/** + * dev_get_iflink - get 'iflink' value of a interface + * @dev: targeted interface + * + * Indicates the ifindex the interface is linked to. + * Physical interfaces have the same 'ifindex' and 'iflink' values. + */ + +int dev_get_iflink(const struct net_device *dev) +{ + if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) + return dev->netdev_ops->ndo_get_iflink(dev); + + return dev->iflink; +} +EXPORT_SYMBOL(dev_get_iflink); + /** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace @@ -6345,7 +6362,7 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev->iflink == -1) + if (dev_get_iflink(dev) == -1) dev->iflink = dev->ifindex; /* Transfer changeable features to wanted_features and enable @@ -7061,7 +7078,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev->iflink == dev->ifindex); + int iflink = (dev_get_iflink(dev) == dev->ifindex); dev->ifindex = dev_new_index(net); if (iflink) dev->iflink = dev->ifindex; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 49a9e3e06c08..982861607f88 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(lweventlist_lock); static unsigned char default_operstate(const struct net_device *dev) { if (!netif_carrier_ok(dev)) - return (dev->ifindex != dev->iflink ? + return (dev->ifindex != dev_get_iflink(dev) ? IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN); if (netif_dormant(dev)) @@ -89,7 +89,7 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (!netif_running(dev)) return false; - if (dev->ifindex != dev->iflink) + if (dev->ifindex != dev_get_iflink(dev)) return true; if (dev->priv_flags & IFF_TEAM_PORT) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index cc5cf689809c..4238d6da5c60 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -109,11 +109,19 @@ NETDEVICE_SHOW_RO(dev_id, fmt_hex); NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); -NETDEVICE_SHOW_RO(iflink, fmt_dec); NETDEVICE_SHOW_RO(ifindex, fmt_dec); NETDEVICE_SHOW_RO(type, fmt_dec); NETDEVICE_SHOW_RO(link_mode, fmt_dec); +static ssize_t iflink_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *ndev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, dev_get_iflink(ndev)); +} +static DEVICE_ATTR_RO(iflink); + static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) { return sprintf(buf, fmt_dec, dev->name_assign_type); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b96ac2109c82..ee0186cdd5cf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1055,8 +1055,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink)) || + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || (upper_dev && nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || @@ -2863,8 +2863,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b4a545d24adb..eec68b0c3bc8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -801,7 +801,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5c9e94cb1b2c..37b70e82bff8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4858,8 +4858,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; protoinfo = nla_nest_start(skb, IFLA_PROTINFO); if (!protoinfo) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index caf6b99374e6..18a5ab286420 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -992,7 +992,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & MIFF_REGISTER) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); -- cgit v1.3-6-gb490 From 7a66bbc96ce9ad8261fa5f7f6ae65370eb6866ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:09 +0200 Subject: net: remove iflink field from struct net_device Now that all users of iflink have the ndo_get_iflink handler available, it's possible to remove this field. By default, dev_get_iflink() returns the ifindex of the interface. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- net/core/dev.c | 13 ++----------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 788eb7a622ad..846a1f5bc9db 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1538,7 +1538,7 @@ struct net_device { netdev_features_t mpls_features; int ifindex; - int iflink; + int group; struct net_device_stats stats; @@ -1741,7 +1741,6 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; - int group; struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/core/dev.c b/net/core/dev.c index 77172d085760..3be107e0bc93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,7 +672,7 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - return dev->iflink; + return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); @@ -6331,8 +6331,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); - dev->iflink = -1; - ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out; @@ -6362,9 +6360,6 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev_get_iflink(dev) == -1) - dev->iflink = dev->ifindex; - /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ @@ -7077,12 +7072,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_net_set(dev, net); /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev_get_iflink(dev) == dev->ifindex); + if (__dev_get_by_index(net, dev->ifindex)) dev->ifindex = dev_new_index(net); - if (iflink) - dev->iflink = dev->ifindex; - } /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); -- cgit v1.3-6-gb490 From 802f42a8d99254b8602bf65cc50e8333bf479f13 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:06 +0300 Subject: net/mlx4: Add RSS support for fragmented IP datagrams Enable RSS support for fragmented IP packets, when device supports it. Until now, fragmented IP packets were directed only to the default_qpn. Since IP fragments (datagram) have no upper protocols (L3 IP packets), hash is performed on 3-tuple - dst MAC, source IP and dest IP. The HW makes sure that this holds for the 1st fragment too, so all fragments go to the same QP. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 12 ++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ include/linux/mlx4/device.h | 1 + 4 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 209a6171e59b..412e019e8b2e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -105,6 +105,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", + [52] = "RSS IP fragments support", [53] = "Port ETS Scheduler support", [55] = "Port link type sensing support", [59] = "Port management change event support", @@ -1138,6 +1139,9 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, } for (; slave_port < dev->caps.num_ports; ++slave_port) flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); + + /* Not exposing RSS IP fragments to guests */ + flags &= ~MLX4_DEV_CAP_FLAG_RSS_IP_FRAG; MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET); @@ -1701,6 +1705,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); + /* Enable RSS spread to fragmented IP packets when supported */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_RSS_IP_FRAG) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 13); + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); @@ -1889,6 +1897,10 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, else param->steering_mode = MLX4_STEERING_MODE_A0; } + + if (dword_field & (1 << 13)) + param->rss_ip_frags = 1; + /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 863655bd3947..07cb7c2461ad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -203,6 +203,7 @@ struct mlx4_init_hca_param { u64 dev_cap_enabled; u16 cqe_size; /* For use only when CQE stride feature enabled */ u16 eqe_size; /* For use only when EQE stride feature enabled */ + u8 rss_ip_frags; }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 43aa76775b5f..ee0a67069b57 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -885,6 +885,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + mlx4_dbg(dev, "RSS support for IP fragments is %s\n", + hca_param.rss_ip_frags ? "on" : "off"); if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && dev->caps.bf_reg_size) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ab7ebec943b8..41eaafdfb1cd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -174,6 +174,7 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, + MLX4_DEV_CAP_FLAG_RSS_IP_FRAG = 1LL << 52, MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, -- cgit v1.3-6-gb490 From fccea6436a5bd16aed6f722efce13ec2c90427d7 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:08 +0300 Subject: net/mlx4: Make mlx4_is_eth visible inline funcion Currently implemented as static function in resource_tracker.c -- this change will allow other files in mlx4_core to use it as well. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 ----- include/linux/mlx4/device.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 11bcd27e218f..fd2520aa8a8c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -221,11 +221,6 @@ struct res_fs_rule { int qpn; }; -static int mlx4_is_eth(struct mlx4_dev *dev, int port) -{ - return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1; -} - static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 41eaafdfb1cd..0f7f13d6a03e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1002,6 +1002,11 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev) return dev->flags & MLX4_FLAG_SLAVE; } +static inline int mlx4_is_eth(struct mlx4_dev *dev, int port) +{ + return dev->caps.port_type[port] == MLX4_PORT_TYPE_IB ? 0 : 1; +} + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf, gfp_t gfp); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); -- cgit v1.3-6-gb490 From 12a889c057504fbf307dd237aedb87263ef2848a Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:10 +0300 Subject: net/mlx4: New file for QoS related firmware commands Create two new files fw_qos.h and fw_qos.c in mlx4_core module. It gathers all relevant QoS firmware related commands etc, thus improving encapsulation of the mlx4_core module. For now it contains the QoS existing commands: mlx4_SET_PORT_SCHEDULER and mlx4_SET_PORT_PRIO2TC. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/Makefile | 5 +- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 1 + drivers/net/ethernet/mellanox/mlx4/fw_qos.c | 125 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw_qos.h | 75 +++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 15 --- drivers/net/ethernet/mellanox/mlx4/port.c | 71 -------------- include/linux/mlx4/device.h | 5 - 7 files changed, 204 insertions(+), 93 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx4/fw_qos.c create mode 100644 drivers/net/ethernet/mellanox/mlx4/fw_qos.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile index 3e9c70f15b42..c82217e0d22d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Makefile +++ b/drivers/net/ethernet/mellanox/mlx4/Makefile @@ -1,7 +1,8 @@ obj-$(CONFIG_MLX4_CORE) += mlx4_core.o -mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ - mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o resource_tracker.o +mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \ + main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \ + srq.o resource_tracker.o obj-$(CONFIG_MLX4_EN) += mlx4_en.o diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 8e3260c0eaa5..f01918c63f28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -35,6 +35,7 @@ #include #include "mlx4_en.h" +#include "fw_qos.h" /* Definitions for QCN */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c new file mode 100644 index 000000000000..0f5af7cf4547 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "fw_qos.h" + +struct mlx4_set_port_prio2tc_context { + u8 prio2tc[4]; +}; + +struct mlx4_port_scheduler_tc_cfg_be { + __be16 pg; + __be16 bw_precentage; + __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */ + __be16 max_bw_value; +}; + +struct mlx4_set_port_scheduler_context { + struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC]; +}; + +int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_prio2tc_context *context; + int err; + u32 in_mod; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + + for (i = 0; i < MLX4_NUM_UP; i += 2) + context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1]; + + in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC); + +int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, + u8 *pg, u16 *ratelimit) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_scheduler_context *context; + int err; + u32 in_mod; + int i; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + + for (i = 0; i < MLX4_NUM_TC; i++) { + struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i]; + u16 r; + + if (ratelimit && ratelimit[i]) { + if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) { + r = ratelimit[i]; + tc->max_bw_units = + htons(MLX4_RATELIMIT_100M_UNITS); + } else { + r = ratelimit[i] / 10; + tc->max_bw_units = + htons(MLX4_RATELIMIT_1G_UNITS); + } + tc->max_bw_value = htons(r); + } else { + tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT); + tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS); + } + + tc->pg = htons(pg[i]); + tc->bw_precentage = htons(tc_tx_bw[i]); + } + + in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h new file mode 100644 index 000000000000..ab9be0fad9c8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_FW_QOS_H +#define MLX4_FW_QOS_H + +#include +#include + +#define MLX4_NUM_UP 8 +#define MLX4_NUM_TC 8 + +/** + * mlx4_SET_PORT_PRIO2TC - This routine maps user priorities to traffic + * classes of a given port and device. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @prio2tc: Array of TC associated with each priorities. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); + +/** + * mlx4_SET_PORT_SCHEDULER - This routine configures the arbitration between + * traffic classes (ETS) and configured rate limit for traffic classes. + * tc_tx_bw, pg and ratelimit are arrays where each index represents a TC. + * The description for those parameters below refers to a single TC. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @tc_tx_bw: The percentage of the bandwidth allocated for traffic class + * within a TC group. The sum of the bw_percentage of all the traffic + * classes within a TC group must equal 100% for correct operation. + * @pg: The TC group the traffic class is associated with. + * @ratelimit: The maximal bandwidth allowed for the use by this traffic class. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, + u8 *pg, u16 *ratelimit); + +#endif /* MLX4_FW_QOS_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 0b16db015745..9e5236b1b5f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -64,21 +64,6 @@ #define INIT_HCA_TPT_MW_ENABLE (1 << 7) -struct mlx4_set_port_prio2tc_context { - u8 prio2tc[4]; -}; - -struct mlx4_port_scheduler_tc_cfg_be { - __be16 pg; - __be16 bw_precentage; - __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */ - __be16 max_bw_value; -}; - -struct mlx4_set_port_scheduler_context { - struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC]; -}; - enum { MLX4_HCR_BASE = 0x80680, MLX4_HCR_SIZE = 0x0001c, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index b97f173ab062..6a53d42db52f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1016,77 +1016,6 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, } EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc); -int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc) -{ - struct mlx4_cmd_mailbox *mailbox; - struct mlx4_set_port_prio2tc_context *context; - int err; - u32 in_mod; - int i; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - context = mailbox->buf; - for (i = 0; i < MLX4_NUM_UP; i += 2) - context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1]; - - in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - - mlx4_free_cmd_mailbox(dev, mailbox); - return err; -} -EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC); - -int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, - u8 *pg, u16 *ratelimit) -{ - struct mlx4_cmd_mailbox *mailbox; - struct mlx4_set_port_scheduler_context *context; - int err; - u32 in_mod; - int i; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - context = mailbox->buf; - - for (i = 0; i < MLX4_NUM_TC; i++) { - struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i]; - u16 r; - - if (ratelimit && ratelimit[i]) { - if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) { - r = ratelimit[i]; - tc->max_bw_units = - htons(MLX4_RATELIMIT_100M_UNITS); - } else { - r = ratelimit[i]/10; - tc->max_bw_units = - htons(MLX4_RATELIMIT_1G_UNITS); - } - tc->max_bw_value = htons(r); - } else { - tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT); - tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS); - } - - tc->pg = htons(pg[i]); - tc->bw_precentage = htons(tc_tx_bw[i]); - } - - in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - - mlx4_free_cmd_mailbox(dev, mailbox); - return err; -} -EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); - enum { VXLAN_ENABLE_MODIFY = 1 << 7, VXLAN_STEERING_MODIFY = 1 << 6, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 0f7f13d6a03e..b676d0c0111a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -49,8 +49,6 @@ #define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 -#define MLX4_NUM_UP 8 -#define MLX4_NUM_TC 8 #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values * greater then this value when @@ -1311,9 +1309,6 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); -int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); -int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, - u8 *pg, u16 *ratelimit); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.3-6-gb490 From 7e95bb99a8f969d4534867452793e44cc4731ca9 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:11 +0300 Subject: net/mlx4: Add mlx4_ALLOCATE_VPP implementation Implements device ALLOCATE_VPP command, to be used for granular QoS configuration of VFs by the PF device. Defines and queries the amount of VPPs assigned to each port, and the amount of VPPs assigned to each priority of each port. Once the total VPPs are split between the priorities of a port, they may be assigned with a share of the BW or a rate limit. Split into two functions (get/set) whoch are supplied with mlx4_alloc_vpp_context and physical port number. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 ++++ drivers/net/ethernet/mellanox/mlx4/fw_qos.c | 75 +++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw_qos.h | 28 +++++++++++ include/linux/mlx4/cmd.h | 1 + 4 files changed, 113 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 179ca2889434..79cc5ec57dca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1454,6 +1454,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_CMD_EPERM_wrapper, }, + { + .opcode = MLX4_CMD_ALLOCATE_VPP, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, { .opcode = MLX4_CMD_CONF_SPECIAL_QP, .has_inbox = false, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c index 0f5af7cf4547..5ce3440b99c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -34,6 +34,13 @@ #include #include "fw_qos.h" +#include "fw.h" + +enum { + /* allocate vpp opcode modifiers */ + MLX4_ALLOCATE_VPP_ALLOCATE = 0x0, + MLX4_ALLOCATE_VPP_QUERY = 0x1 +}; struct mlx4_set_port_prio2tc_context { u8 prio2tc[4]; @@ -50,6 +57,12 @@ struct mlx4_set_port_scheduler_context { struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC]; }; +/* Granular Qos (per VF) section */ +struct mlx4_alloc_vpp_param { + __be32 availible_vpp; + __be32 vpp_p_up[MLX4_NUM_UP]; +}; + int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc) { struct mlx4_cmd_mailbox *mailbox; @@ -123,3 +136,65 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, return err; } EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); + +int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, + u16 *availible_vpp, u8 *vpp_p_up) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_alloc_vpp_param *out_param; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + out_param = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, + MLX4_ALLOCATE_VPP_QUERY, + MLX4_CMD_ALLOCATE_VPP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + /* Total number of supported VPPs */ + *availible_vpp = (u16)be32_to_cpu(out_param->availible_vpp); + + for (i = 0; i < MLX4_NUM_UP; i++) + vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_get); + +int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_alloc_vpp_param *in_param; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + in_param = mailbox->buf; + + for (i = 0; i < MLX4_NUM_UP; i++) + in_param->vpp_p_up[i] = cpu_to_be32(vpp_p_up[i]); + + err = mlx4_cmd(dev, mailbox->dma, port, + MLX4_ALLOCATE_VPP_ALLOCATE, + MLX4_CMD_ALLOCATE_VPP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_set); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index ab9be0fad9c8..be79951c44ef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -71,5 +71,33 @@ int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); **/ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); +/** + * mlx4_ALLOCATE_VPP_get - Query port VPP availible resources and allocation. + * Before distribution of VPPs to priorities, only availible_vpp is returned. + * After initialization it returns the distribution of VPPs among priorities. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @availible_vpp: Pointer to variable where number of availible VPPs is stored + * @vpp_p_up: Distribution of VPPs to priorities is stored in this array + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, + u16 *availible_vpp, u8 *vpp_p_up); +/** + * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities. + * The total number of VPPs assigned to all for a port must not exceed + * the value reported by availible_vpp in mlx4_ALLOCATE_VPP_get. + * VPP allocation is allowed only after the port type has been set, + * and while no QPs are open for this port. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @vpp_p_up: Allocation of VPPs to different priorities. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up); #endif /* MLX4_FW_QOS_H */ diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7299e9548906..e0f88a098fb8 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -68,6 +68,7 @@ enum { MLX4_CMD_UNMAP_ICM_AUX = 0xffb, MLX4_CMD_SET_ICM_SIZE = 0xffd, MLX4_CMD_ACCESS_REG = 0x3b, + MLX4_CMD_ALLOCATE_VPP = 0x80, /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, -- cgit v1.3-6-gb490 From 1c29146d3821be984030b49cf9509a269edc3b8b Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:12 +0300 Subject: net/mlx4: Add mlx4_SET_VPORT_QOS implementation Add the SET_VPORT_QOS device command, which is ntended for virtual granular QoS configuration per VF in SRIOV mode. The SET_VPORT_QOS command sets and queries QoS parameters of a VPort. Each priority allowed for a VPort is assigned with a share of the BW, and a BW limitation. QoS parameters can be modified at any time, but must be initialized before any QP is associated with the VPort. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++ drivers/net/ethernet/mellanox/mlx4/fw_qos.c | 89 +++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw_qos.h | 36 ++++++++++++ include/linux/mlx4/cmd.h | 1 + 4 files changed, 135 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 79cc5ec57dca..af95231ba3e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1463,6 +1463,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_CMD_EPERM_wrapper, }, + { + .opcode = MLX4_CMD_SET_VPORT_QOS, + .has_inbox = false, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper, + }, { .opcode = MLX4_CMD_CONF_SPECIAL_QP, .has_inbox = false, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c index 5ce3440b99c7..8f2fde0487c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -42,6 +42,12 @@ enum { MLX4_ALLOCATE_VPP_QUERY = 0x1 }; +enum { + /* set vport qos opcode modifiers */ + MLX4_SET_VPORT_QOS_SET = 0x0, + MLX4_SET_VPORT_QOS_QUERY = 0x1 +}; + struct mlx4_set_port_prio2tc_context { u8 prio2tc[4]; }; @@ -63,6 +69,19 @@ struct mlx4_alloc_vpp_param { __be32 vpp_p_up[MLX4_NUM_UP]; }; +struct mlx4_prio_qos_param { + __be32 bw_share; + __be32 max_avg_bw; + __be32 reserved; + __be32 enable; + __be32 reserved1[4]; +}; + +struct mlx4_set_vport_context { + __be32 reserved[8]; + struct mlx4_prio_qos_param qos_p_up[MLX4_NUM_UP]; +}; + int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc) { struct mlx4_cmd_mailbox *mailbox; @@ -198,3 +217,73 @@ int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up) return err; } EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_set); + +int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *out_param) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_vport_context *ctx; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ctx = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, (vport << 8) | port, + MLX4_SET_VPORT_QOS_QUERY, + MLX4_CMD_SET_VPORT_QOS, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + for (i = 0; i < MLX4_NUM_UP; i++) { + out_param[i].bw_share = be32_to_cpu(ctx->qos_p_up[i].bw_share); + out_param[i].max_avg_bw = + be32_to_cpu(ctx->qos_p_up[i].max_avg_bw); + out_param[i].enable = + !!(be32_to_cpu(ctx->qos_p_up[i].enable) & 31); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_get); + +int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *in_param) +{ + int i; + int err; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_vport_context *ctx; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ctx = mailbox->buf; + + for (i = 0; i < MLX4_NUM_UP; i++) { + ctx->qos_p_up[i].bw_share = cpu_to_be32(in_param[i].bw_share); + ctx->qos_p_up[i].max_avg_bw = + cpu_to_be32(in_param[i].max_avg_bw); + ctx->qos_p_up[i].enable = + cpu_to_be32(in_param[i].enable << 31); + } + + err = mlx4_cmd(dev, mailbox->dma, (vport << 8) | port, + MLX4_SET_VPORT_QOS_SET, + MLX4_CMD_SET_VPORT_QOS, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_set); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index be79951c44ef..b3fffafff062 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -41,6 +41,12 @@ #define MLX4_NUM_UP 8 #define MLX4_NUM_TC 8 +struct mlx4_vport_qos_param { + u32 bw_share; + u32 max_avg_bw; + u8 enable; +}; + /** * mlx4_SET_PORT_PRIO2TC - This routine maps user priorities to traffic * classes of a given port and device. @@ -100,4 +106,34 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, **/ int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up); +/** + * mlx4_SET_VPORT_QOS_get - Query QoS proporties of a Vport. + * Each priority allowed for the Vport is assigned with a share of the BW, + * and a BW limitation. This commands query the current QoS values. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @vport: Vport id. + * @out_param: Array of mlx4_vport_qos_param that will contain the values. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *out_param); + +/** + * mlx4_SET_VPORT_QOS_set - Set QoS proporties of a Vport. + * QoS parameters can be modified at any time, but must be initialized + * before any QP is associated with the VPort. + * + * @dev: mlx4_dev. + * @port: Physical port number. + * @vport: Vport id. + * @out_param: Array of mlx4_vport_qos_param which holds the requested values. + * + * Returns 0 on success or a negative mlx4_core errno code. + **/ +int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport, + struct mlx4_vport_qos_param *in_param); + #endif /* MLX4_FW_QOS_H */ diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e0f88a098fb8..88326ed0033a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -69,6 +69,7 @@ enum { MLX4_CMD_SET_ICM_SIZE = 0xffd, MLX4_CMD_ACCESS_REG = 0x3b, MLX4_CMD_ALLOCATE_VPP = 0x80, + MLX4_CMD_SET_VPORT_QOS = 0x81, /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, -- cgit v1.3-6-gb490 From d019fcb2244519d453694bfce868f3e717bfcebb Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:13 +0300 Subject: net/mlx4: Query device for QoS per VF support Checks in QUERY_DEV_CAP if the granular QoS per VF feature is supported by the device. Disabled for guests. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 10 +++++++++- include/linux/mlx4/device.h | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 412e019e8b2e..94e857e39063 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -147,7 +147,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [21] = "Port Remap support", [22] = "QCN support", [23] = "QP rate limiting support", - [24] = "Ethernet Flow control statistics support" + [24] = "Ethernet Flow control statistics support", + [25] = "Granular QoS per VF support", }; int i; @@ -871,6 +872,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); dev_cap->max_rq_desc_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + if (field & (1 << 4)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QOS_VPP; if (field & (1 << 5)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL; if (field & (1 << 6)) @@ -1199,6 +1202,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field16 = 0; MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + /* turn off QoS per VF support for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + field &= 0xef; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + return 0; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b676d0c0111a..c37208f7869f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,7 +205,8 @@ enum { MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, - MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24 + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, + MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, }; enum { -- cgit v1.3-6-gb490 From 08068cd5683f11e4505aa9c8cc6ed5942f8ad299 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:15 +0300 Subject: net/mlx4: Added qos_vport QP configuration in VST mode Granular QoS per VF feature introduce a new QP field, qos_vport. PF administrator can connect VF QPs to a certain QoS Vport, to inherit its proporties. Connecting QPs to the default QoS Vport (defined as 0) is always allowed, even when there are no allocated VPPs. At this point, only the default vport is connected to QPs. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 ++++++++- drivers/net/ethernet/mellanox/mlx4/fw_qos.h | 3 +++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 ++ drivers/net/ethernet/mellanox/mlx4/qp.c | 5 +++++ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++++ include/linux/mlx4/qp.h | 8 ++++++-- 6 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 7d3c64275b75..4c0c6c7e63e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -48,6 +48,7 @@ #include "mlx4.h" #include "fw.h" +#include "fw_qos.h" #define CMD_POLL_TOKEN 0xffff #define INBOX_MASK 0xffffffffffffff00ULL @@ -1808,7 +1809,8 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, if (vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos && - vp_oper->state.link_state == vp_admin->link_state) + vp_oper->state.link_state == vp_admin->link_state && + vp_oper->state.qos_vport == vp_admin->qos_vport) return 0; if (!(priv->mfunc.master.slave_state[slave].active && @@ -1866,6 +1868,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.qos_vport = vp_admin->qos_vport; if (vp_admin->link_state == IFLA_VF_LINK_STATE_DISABLE) work->flags |= MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE; @@ -1874,6 +1877,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, work->port = port; work->slave = slave; work->qos = vp_oper->state.default_qos; + work->qos_vport = vp_oper->state.qos_vport; work->vlan_id = vp_oper->state.default_vlan; work->vlan_ix = vp_oper->vlan_idx; work->priv = priv; @@ -2339,6 +2343,9 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) INIT_LIST_HEAD(&s_state->mcast_filters[port]); admin_vport->default_vlan = MLX4_VGT; oper_vport->default_vlan = MLX4_VGT; + admin_vport->qos_vport = + MLX4_VPP_DEFAULT_VPORT; + oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index 4a5320f79094..ac1f331878e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -44,6 +44,9 @@ /* Default supported priorities for VPP allocation */ #define MLX4_DEFAULT_QOS_PRIO (0) +/* Derived from FW feature definition, 0 is the default vport fo all QPs */ +#define MLX4_VPP_DEFAULT_VPORT (0) + struct mlx4_vport_qos_param { u32 bw_share; u32 max_avg_bw; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1add698dba5b..32469c64d580 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -498,6 +498,7 @@ struct mlx4_vport_state { u32 tx_rate; bool spoofchk; u32 link_state; + u8 qos_vport; }; struct mlx4_vf_admin_state { @@ -636,6 +637,7 @@ struct mlx4_vf_immed_vlan_work { int orig_vlan_ix; u8 port; u8 qos; + u8 qos_vport; u16 vlan_id; u16 orig_vlan_id; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 69e4462e4ee4..b75214a80d0e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -447,6 +447,11 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val); } + if (attr & MLX4_UPDATE_QP_QOS_VPORT) { + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP; + cmd->qp_context.qos_vport = params->qos_vport; + } + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); cmd->qp_mask = cpu_to_be64(qp_mask); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index fd2520aa8a8c..c7f28bf4b8e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -765,6 +765,7 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; + qpc->qos_vport = vp_oper->state.qos_vport; } if (vp_oper->state.spoofchk) { qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; @@ -4917,6 +4918,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) qp->sched_queue & 0xC7; upd_context->qp_context.pri_path.sched_queue |= ((work->qos & 0x7) << 3); + upd_context->qp_mask |= + cpu_to_be64(1ULL << + MLX4_UPD_QP_MASK_QOS_VPP); + upd_context->qp_context.qos_vport = + work->qos_vport; } err = mlx4_cmd(dev, mailbox->dma, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 1023ebe035b7..6fed539e5456 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -209,7 +209,8 @@ struct mlx4_qp_context { __be16 sq_wqe_counter; u32 reserved3; __be16 rate_limit_params; - __be16 reserved4; + u8 reserved4; + u8 qos_vport; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; @@ -231,6 +232,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_QOS_VPP = 34, MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; @@ -432,7 +434,8 @@ enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 + MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 }; enum mlx4_update_qp_params_flags { @@ -441,6 +444,7 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; + u8 qos_vport; u32 flags; u16 rate_unit; u16 rate_val; -- cgit v1.3-6-gb490 From cda373f4849d5dd6fedceb4aeba35682a0e1a833 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:16 +0300 Subject: net/mlx4_en: Enable TX rate limit per VF Support granular QoS per VF, by implementing the ndo_set_vf_rate. Enforce a rate limit per VF when called, and enabled only for VFs in VST mode with user priority supported by the device. We don't enforce VFs to be in VST mode at the moment of configuration, but rather save the given rate limit and enforce it when the VF is moved to VST with user priority which is supported (currently 0). VST<->VGT or VST qos value state changes are disallowed when a rate limit is configured. Minimum BW share is not supported yet. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 177 ++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 11 ++ include/linux/mlx4/cmd.h | 2 + 3 files changed, 189 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 4c0c6c7e63e7..bc6e4c9b6bb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2781,6 +2781,103 @@ static int mlx4_slaves_closest_port(struct mlx4_dev *dev, int slave, int port) return port; } +static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port, + int max_tx_rate) +{ + int i; + int err; + struct mlx4_qos_manager *port_qos; + struct mlx4_dev *dev = &priv->dev; + struct mlx4_vport_qos_param vpp_qos[MLX4_NUM_UP]; + + port_qos = &priv->mfunc.master.qos_ctl[port]; + memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP); + + if (slave > port_qos->num_of_qos_vfs) { + mlx4_info(dev, "No availible VPP resources for this VF\n"); + return -EINVAL; + } + + /* Query for default QoS values from Vport 0 is needed */ + err = mlx4_SET_VPORT_QOS_get(dev, port, 0, vpp_qos); + if (err) { + mlx4_info(dev, "Failed to query Vport 0 QoS values\n"); + return err; + } + + for (i = 0; i < MLX4_NUM_UP; i++) { + if (test_bit(i, port_qos->priority_bm) && max_tx_rate) { + vpp_qos[i].max_avg_bw = max_tx_rate; + vpp_qos[i].enable = 1; + } else { + /* if user supplied tx_rate == 0, meaning no rate limit + * configuration is required. so we are leaving the + * value of max_avg_bw as queried from Vport 0. + */ + vpp_qos[i].enable = 0; + } + } + + err = mlx4_SET_VPORT_QOS_set(dev, port, slave, vpp_qos); + if (err) { + mlx4_info(dev, "Failed to set Vport %d QoS values\n", slave); + return err; + } + + return 0; +} + +static bool mlx4_is_vf_vst_and_prio_qos(struct mlx4_dev *dev, int port, + struct mlx4_vport_state *vf_admin) +{ + struct mlx4_qos_manager *info; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!mlx4_is_master(dev) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) + return false; + + info = &priv->mfunc.master.qos_ctl[port]; + + if (vf_admin->default_vlan != MLX4_VGT && + test_bit(vf_admin->default_qos, info->priority_bm)) + return true; + + return false; +} + +static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port, + struct mlx4_vport_state *vf_admin, + int vlan, int qos) +{ + struct mlx4_vport_state dummy_admin = {0}; + + if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) || + !vf_admin->tx_rate) + return true; + + dummy_admin.default_qos = qos; + dummy_admin.default_vlan = vlan; + + /* VF wants to move to other VST state which is valid with current + * rate limit. Either differnt default vlan in VST or other + * supported QoS priority. Otherwise we don't allow this change when + * the TX rate is still configured. + */ + if (mlx4_is_vf_vst_and_prio_qos(dev, port, &dummy_admin)) + return true; + + mlx4_info(dev, "Cannot change VF state to %s while rate is set\n", + (vlan == MLX4_VGT) ? "VGT" : "VST"); + + if (vlan != MLX4_VGT) + mlx4_info(dev, "VST priority %d not supported for QoS\n", qos); + + mlx4_info(dev, "Please set rate to 0 prior to this VF state change\n"); + + return false; +} + int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2824,12 +2921,22 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) port = mlx4_slaves_closest_port(dev, slave, port); vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos)) + return -EPERM; + if ((0 == vlan) && (0 == qos)) vf_admin->default_vlan = MLX4_VGT; else vf_admin->default_vlan = vlan; vf_admin->default_qos = qos; + /* If rate was configured prior to VST, we saved the configured rate + * in vf_admin->rate and now, if priority supported we enforce the QoS + */ + if (mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) && + vf_admin->tx_rate) + vf_admin->qos_vport = slave; + if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_info(dev, "updating vf %d port %d config will take effect on next VF restart\n", @@ -2838,6 +2945,69 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); +int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, + int max_tx_rate) +{ + int err; + int slave; + struct mlx4_vport_state *vf_admin; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!mlx4_is_master(dev) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) + return -EPROTONOSUPPORT; + + if (min_tx_rate) { + mlx4_info(dev, "Minimum BW share not supported\n"); + return -EPROTONOSUPPORT; + } + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + err = mlx4_set_vport_qos(priv, slave, port, max_tx_rate); + if (err) { + mlx4_info(dev, "vf %d failed to set rate %d\n", vf, + max_tx_rate); + return err; + } + + vf_admin->tx_rate = max_tx_rate; + /* if VF is not in supported mode (VST with supported prio), + * we do not change vport configuration for its QPs, but save + * the rate, so it will be enforced when it moves to supported + * mode next time. + */ + if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin)) { + mlx4_info(dev, + "rate set for VF %d when not in valid state\n", vf); + + if (vf_admin->default_vlan != MLX4_VGT) + mlx4_info(dev, "VST priority not supported by QoS\n"); + else + mlx4_info(dev, "VF in VGT mode (needed VST)\n"); + + mlx4_info(dev, + "rate %d take affect when VF moves to valid state\n", + max_tx_rate); + return 0; + } + + /* If user sets rate 0 assigning default vport for its QPs */ + vf_admin->qos_vport = max_tx_rate ? slave : MLX4_VPP_DEFAULT_VPORT; + + if (priv->mfunc.master.slave_state[slave].active && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) + mlx4_master_immediate_activate_vlan_qos(priv, slave, port); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_rate); + /* mlx4_get_slave_default_vlan - * return true if VST ( default vlan) * if VST, will return vlan & qos (if not NULL) @@ -2911,7 +3081,12 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->vlan = s_info->default_vlan; ivf->qos = s_info->default_qos; - ivf->max_tx_rate = s_info->tx_rate; + + if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info)) + ivf->max_tx_rate = s_info->tx_rate; + else + ivf->max_tx_rate = 0; + ivf->min_tx_rate = 0; ivf->spoofchk = s_info->spoofchk; ivf->linkstate = s_info->link_state; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d73ab1ba5a78..2a5df0aa2228 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2240,6 +2240,16 @@ static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); } +static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_rate(mdev->dev, en_priv->port, vf, min_tx_rate, + max_tx_rate); +} + static int mlx4_en_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) { struct mlx4_en_priv *en_priv = netdev_priv(dev); @@ -2458,6 +2468,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, .ndo_set_vf_mac = mlx4_en_set_vf_mac, .ndo_set_vf_vlan = mlx4_en_set_vf_vlan, + .ndo_set_vf_rate = mlx4_en_set_vf_rate, .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, .ndo_set_vf_link_state = mlx4_en_set_vf_link_state, .ndo_get_vf_config = mlx4_en_get_vf_config, diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 88326ed0033a..a788839f54bb 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -296,6 +296,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, + int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); -- cgit v1.3-6-gb490 From 3742cc65512cd4897a63dce94104f9a6e74997a0 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:17 +0300 Subject: net/mlx4: Warn users of depracated QoS Firmware A new capability bit was introduced in the past to to differ devices using the QoS ETS feature. The old was deprecated since then. If driver sees device which set only the old capabilty, it will print warning to user suggesting to upgrade the FW. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 7 +++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 8 ++++++++ include/linux/mlx4/device.h | 1 + 4 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 2a5df0aa2228..be9e07a72193 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2814,7 +2814,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->msg_enable = MLX4_EN_MSG_LEVEL; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { - if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { en_info(priv, "enabling only PFC DCB ops\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 94e857e39063..16bbd6cdff6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -149,6 +149,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [23] = "QP rate limiting support", [24] = "Ethernet Flow control statistics support", [25] = "Granular QoS per VF support", + [26] = "Port ETS Scheduler support", }; int i; @@ -900,6 +901,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); if (field & 1<<3) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS; + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) @@ -1157,9 +1160,9 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); - /* For guests, disable vxlan tunneling */ + /* For guests, disable vxlan tunneling and QoS support */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN); - field &= 0xf7; + field &= 0xd7; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); /* For guests, report Blueflame disabled */ diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index ee0a67069b57..3becad37aec8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -528,6 +528,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.alloc_res_qp_mask = (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) | MLX4_RESERVE_A0_QP; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) && + dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { + mlx4_warn(dev, "Old device ETS support detected\n"); + mlx4_warn(dev, "Consider upgrading device FW.\n"); + dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; + } + } else { dev->caps.alloc_res_qp_mask = 0; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c37208f7869f..15f46767342e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -207,6 +207,7 @@ enum { MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, + MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, }; enum { -- cgit v1.3-6-gb490 From a130b59057320192b4b00ed0ba4bc8a38f66f289 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:19 +0300 Subject: net/mlx4: Add SET_PORT opcode modifiers enumeration The calls to SET_PORT used hard-code numbers, when supplying command's opcode modifiers, fix that to use well defined constants. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 11 +++++----- drivers/net/ethernet/mellanox/mlx4/cmd.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/port.c | 36 ++++++++++++++++++------------- include/linux/mlx4/cmd.h | 8 ++++++- 4 files changed, 36 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b972c0b41799..976bea794b5f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -587,8 +587,9 @@ static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_vio ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; @@ -1525,8 +1526,8 @@ static void update_gids_task(struct work_struct *work) memcpy(gids, gw->gids, sizeof gw->gids); err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, - 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); + MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) pr_warn("set port command failed\n"); else @@ -1564,7 +1565,7 @@ static void reset_gids_task(struct work_struct *work) IB_LINK_LAYER_ETHERNET) { err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, - 1, MLX4_CMD_SET_PORT, + MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index bc6e4c9b6bb5..06993ea9e6ba 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -726,7 +726,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, * specific command/input_mod/opcode_mod/fw-status to be debug. */ if (op == MLX4_CMD_SET_PORT && in_modifier == 1 && - op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE) + op_modifier == MLX4_SET_PORT_IB_OPCODE && + context->fw_status == CMD_STAT_BAD_SIZE) mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); else diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 6a53d42db52f..ca4488ed561c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -123,8 +123,9 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; @@ -337,8 +338,9 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE); in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); @@ -625,9 +627,9 @@ static int mlx4_reset_roce_port_gids(struct mlx4_dev *dev, int slave, MLX4_ROCE_GID_ENTRY_SIZE); err = mlx4_cmd(dev, mailbox->dma, - ((u32)port) | (MLX4_SET_PORT_GID_TABLE << 8), 1, - MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_NATIVE); + ((u32)port) | (MLX4_SET_PORT_GID_TABLE << 8), + MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mutex_unlock(&(priv->port[port].gid_table.mutex)); return err; } @@ -941,8 +943,9 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz) (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) | (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) | (vl_cap << MLX4_SET_PORT_VL_CAP)); - err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + err = mlx4_cmd(dev, mailbox->dma, port, + MLX4_SET_PORT_IB_OPCODE, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err != -ENOMEM) break; } @@ -971,8 +974,9 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, context->pfcrx = pfcrx; in_mod = MLX4_SET_PORT_GENERAL << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); return err; @@ -1008,8 +1012,9 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, context->vlan_miss = MLX4_VLAN_MISS_IDX; in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); return err; @@ -1050,8 +1055,9 @@ int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable) context->steering = steering; in_mod = MLX4_SET_PORT_VXLAN << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index a788839f54bb..62c4d4def474 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -188,7 +188,13 @@ enum { }; enum { - /* set port opcode modifiers */ + /* Set port opcode modifiers */ + MLX4_SET_PORT_IB_OPCODE = 0x0, + MLX4_SET_PORT_ETH_OPCODE = 0x1, +}; + +enum { + /* Set port Ethernet input modifiers */ MLX4_SET_PORT_GENERAL = 0x0, MLX4_SET_PORT_RQP_CALC = 0x1, MLX4_SET_PORT_MAC_TABLE = 0x2, -- cgit v1.3-6-gb490 From 51af33cfed248dc8f36fa82df06b85e10038a01e Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:20 +0300 Subject: net/mlx4_en: Add interface identify support Add support for the interface ethtool identify feature. Make the physical port LED to blink with green and yellow colors. The device handles the LED blink by itself (synchrous use of set_phys_id), by returning 0 to ETHTOOL_ID_ACTIVE command. Signed-off-by: Eyal Grossman Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 27 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 10 +++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + drivers/net/ethernet/mellanox/mlx4/port.c | 26 ++++++++++++++++++++++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 ++ 6 files changed, 67 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index eba969b08dd1..3f44e2bbb982 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1939,6 +1939,32 @@ static int mlx4_en_get_module_eeprom(struct net_device *dev, return 0; } +static int mlx4_en_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + int err; + u16 beacon_duration; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_BEACON)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = PORT_BEACON_MAX_LIMIT; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = 0; + break; + default: + return -EOPNOTSUPP; + } + + err = mlx4_SET_PORT_BEACON(mdev->dev, priv->port, beacon_duration); + return err; +} + const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, @@ -1948,6 +1974,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_sset_count = mlx4_en_get_sset_count, .get_ethtool_stats = mlx4_en_get_ethtool_stats, .self_test = mlx4_en_self_test, + .set_phys_id = mlx4_en_set_phys_id, .get_wol = mlx4_en_get_wol, .set_wol = mlx4_en_set_wol, .get_msglevel = mlx4_en_get_msglevel, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index b93e23bef6c9..6317844b291f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -150,6 +150,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [24] = "Ethernet Flow control statistics support", [25] = "Granular QoS per VF support", [26] = "Port ETS Scheduler support", + [27] = "Port beacon support", }; int i; @@ -647,6 +648,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSS_OFFSET 0x2e #define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 +#define QUERY_DEV_CAP_PORT_BEACON_OFFSET 0x34 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 #define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 @@ -786,6 +788,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; @@ -1165,6 +1170,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0xd7; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); + /* For guests, disable port BEACON */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_PORT_BEACON_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_PORT_BEACON_OFFSET); + /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 67eeea244eff..9de30216b146 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -482,6 +482,7 @@ enum { MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), }; +#define PORT_BEACON_MAX_LIMIT (65535) #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) #define MLX4_EN_MAC_HASH_IDX 5 diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index ca4488ed561c..e8d716dfd5cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -835,6 +835,12 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, MLX4_CMD_NATIVE); } + /* Slaves are not allowed to SET_PORT beacon (LED) blink */ + if (op_mod == MLX4_SET_PORT_BEACON_OPCODE) { + mlx4_warn(dev, "denying SET_PORT Beacon slave:%d\n", slave); + return -EPERM; + } + /* For IB, we only consider: * - The capability mask, which is set to the aggregate of all * slave function capabilities @@ -1064,6 +1070,26 @@ int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable) } EXPORT_SYMBOL(mlx4_SET_PORT_VXLAN); +int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time) +{ + int err; + struct mlx4_cmd_mailbox *mailbox; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + *((__be32 *)mailbox->buf) = cpu_to_be32(time); + + err = mlx4_cmd(dev, mailbox->dma, port, MLX4_SET_PORT_BEACON_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_BEACON); + int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 62c4d4def474..f62e7cf227c6 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -191,6 +191,7 @@ enum { /* Set port opcode modifiers */ MLX4_SET_PORT_IB_OPCODE = 0x0, MLX4_SET_PORT_ETH_OPCODE = 0x1, + MLX4_SET_PORT_BEACON_OPCODE = 0x4, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 15f46767342e..b761be7c88c8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,7 @@ enum { MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, + MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, }; enum { @@ -1311,6 +1312,7 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); +int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.3-6-gb490 From 78500b8c03236a18d454c9cc8a24cccca506b200 Mon Sep 17 00:00:00 2001 From: Muhammad Mahajna Date: Thu, 2 Apr 2015 16:31:22 +0300 Subject: net/mlx4_en: Add RX-ALL support Enabled when the device supports KEEP FCS and IGNORE FCS. When the flag is set, pass all received frames up the stack, even ones with invalid FCS, controlled by ethtool. Signed-off-by: Muhammad Mahajna Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 14 +++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 8 ++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 21 ++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +++-- drivers/net/ethernet/mellanox/mlx4/port.c | 34 ++++++++++++++++++++++++++ include/linux/mlx4/device.h | 3 +++ 6 files changed, 84 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 333af3405a80..0f1afc085d58 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2204,6 +2204,17 @@ static int mlx4_en_set_features(struct net_device *netdev, reset = true; } + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_RXALL)) { + u8 ignore_fcs_value = (features & NETIF_F_RXALL) ? 1 : 0; + + en_info(priv, "Turn %s RX-ALL\n", + ignore_fcs_value ? "ON" : "OFF"); + ret = mlx4_SET_PORT_fcs_check(priv->mdev->dev, + priv->port, ignore_fcs_value); + if (ret) + return ret; + } + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_RX)) { en_info(priv, "Turn %s RX vlan strip offload\n", (features & NETIF_F_HW_VLAN_CTAG_RX) ? "ON" : "OFF"); @@ -2914,6 +2925,9 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) dev->hw_features |= NETIF_F_RXFCS; + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS) + dev->hw_features |= NETIF_F_RXALL; + if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && mdev->dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 6317844b291f..b9881fc1252f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -151,6 +151,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [25] = "Granular QoS per VF support", [26] = "Port ETS Scheduler support", [27] = "Port beacon support", + [28] = "RX-ALL support", }; int i; @@ -893,6 +894,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; + if (field & (1 << 2)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS; MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); @@ -1220,6 +1223,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0xef; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + /* turn off ignore FCS feature for guests */ + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); + field &= 0xfb; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 3becad37aec8..acceb75e8c44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -297,6 +297,25 @@ static int mlx4_dev_port(struct mlx4_dev *dev, int port, return err; } +static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev) +{ + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS)) + return; + + if (mlx4_is_mfunc(dev)) { + mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + return; + } + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) { + mlx4_dbg(dev, + "Keep FCS is not supported - Disabling Ignore FCS"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + return; + } +} + #define MLX4_A0_STEERING_TABLE_SIZE 256 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { @@ -540,6 +559,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.alloc_res_qp_mask = 0; } + mlx4_enable_ignore_fcs(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 32469c64d580..f30eeb730a86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -763,9 +763,11 @@ enum { struct mlx4_set_port_general_context { - u8 reserved[3]; + u16 reserved1; + u8 v_ignore_fcs; u8 flags; - u16 reserved2; + u8 ignore_fcs; + u8 reserved2; __be16 mtu; u8 pptx; u8 pfctx; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index e8d716dfd5cb..c2b21313dba7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -45,6 +45,14 @@ #define MLX4_VLAN_VALID (1u << 31) #define MLX4_VLAN_MASK 0xfff +#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL +#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL +#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL +#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL + +#define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 +#define MLX4_IGNORE_FCS_MASK 0x1 + void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { int i; @@ -1027,6 +1035,32 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, } EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc); +int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->v_ignore_fcs |= MLX4_FLAG_V_IGNORE_FCS_MASK; + if (ignore_fcs_value) + context->ignore_fcs |= MLX4_IGNORE_FCS_MASK; + else + context->ignore_fcs &= ~MLX4_IGNORE_FCS_MASK; + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_fcs_check); + enum { VXLAN_ENABLE_MODIFY = 1 << 7, VXLAN_STEERING_MODIFY = 1 << 6, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b761be7c88c8..f9ce34bec45b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -209,6 +209,7 @@ enum { MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, + MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, }; enum { @@ -1313,6 +1314,8 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); +int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, + u8 ignore_fcs_value); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.3-6-gb490 From 64599cca51de08cef94bc13a0f98351e5bb01f41 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 2 Apr 2015 17:07:25 +0300 Subject: net/mlx5_core: Use coherent memory for command interface page Use coherent memory for the commands descriptor page. Take measures to make sure the page is aligned to MLX5_ADAPTER_PAGE_SIZE as required by the hardware. Reported-by: Yevgeny Kliteynik Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 72 ++++++++++++++++++--------- include/linux/mlx5/driver.h | 3 ++ 2 files changed, 52 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a2853057c779..43a73d36b01d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1319,6 +1319,45 @@ ex_err: return err; } +static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + struct device *ddev = &dev->pdev->dev; + + cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + /* make sure it is aligned to 4K */ + if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) { + cmd->cmd_buf = cmd->cmd_alloc_buf; + cmd->dma = cmd->alloc_dma; + cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE; + return 0; + } + + dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, + cmd->alloc_dma); + cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, + 2 * MLX5_ADAPTER_PAGE_SIZE - 1, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE); + cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE); + cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1; + return 0; +} + +static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + struct device *ddev = &dev->pdev->dev; + + dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf, + cmd->alloc_dma); +} + int mlx5_cmd_init(struct mlx5_core_dev *dev) { int size = sizeof(struct mlx5_cmd_prot_block); @@ -1341,17 +1380,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) if (!cmd->pool) return -ENOMEM; - cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0); - if (!cmd->cmd_buf) { - err = -ENOMEM; + err = alloc_cmd_page(dev, cmd); + if (err) goto err_free_pool; - } - cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) { - err = -ENOMEM; - goto err_free; - } cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; cmd->log_sz = cmd_l >> 4 & 0xf; @@ -1360,13 +1391,13 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n", 1 << cmd->log_sz); err = -EINVAL; - goto err_map; + goto err_free_page; } if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { dev_err(&dev->pdev->dev, "command queue size overflow\n"); err = -EINVAL; - goto err_map; + goto err_free_page; } cmd->checksum_disabled = 1; @@ -1378,7 +1409,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", CMD_IF_REV, cmd->cmdif_rev); err = -ENOTSUPP; - goto err_map; + goto err_free_page; } spin_lock_init(&cmd->alloc_lock); @@ -1394,7 +1425,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) if (cmd_l & 0xfff) { dev_err(&dev->pdev->dev, "invalid command queue address\n"); err = -ENOMEM; - goto err_map; + goto err_free_page; } iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); @@ -1410,7 +1441,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) err = create_msg_cache(dev); if (err) { dev_err(&dev->pdev->dev, "failed to create command cache\n"); - goto err_map; + goto err_free_page; } set_wqname(dev); @@ -1435,11 +1466,8 @@ err_wq: err_cache: destroy_msg_cache(dev); -err_map: - dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE, - DMA_BIDIRECTIONAL); -err_free: - free_pages((unsigned long)cmd->cmd_buf, 0); +err_free_page: + free_cmd_page(dev, cmd); err_free_pool: pci_pool_destroy(cmd->pool); @@ -1455,9 +1483,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) clean_debug_files(dev); destroy_workqueue(cmd->wq); destroy_msg_cache(dev); - dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE, - DMA_BIDIRECTIONAL); - free_pages((unsigned long)cmd->cmd_buf, 0); + free_cmd_page(dev, cmd); pci_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 166d9315fe4b..8d8ca6d9b03b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -232,6 +232,9 @@ struct mlx5_cmd_stats { }; struct mlx5_cmd { + void *cmd_alloc_buf; + dma_addr_t alloc_dma; + int alloc_size; void *cmd_buf; dma_addr_t dma; u16 cmdif_rev; -- cgit v1.3-6-gb490 From 302bdf68fc56a6330bc6b10ce435b4d466417537 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:29 +0300 Subject: net/mlx5_core: Fix Mellanox copyright note Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/pd.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 2 +- include/linux/mlx5/cmd.h | 2 +- include/linux/mlx5/cq.h | 2 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/doorbell.h | 2 +- include/linux/mlx5/driver.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 2 +- include/linux/mlx5/qp.h | 2 +- include/linux/mlx5/srq.h | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 63dd75485204..ac0f7bf4be95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 697d538163f2..e3273faf4568 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 43c5f4809526..eb0cf81f5f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 4878025e231c..5210d92e6bc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index da82991239a8..dbf190d9b9ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 06f9036acd83..4b4cda3bcc5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 3e6670c4a7cd..292d76f2a904 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c index fd80ecfa7195..ee1b0b965f34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f29fd5592305..78dfdab1ead4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index 44837640bd7c..d79fd85d1dd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index f0c9f9a7a361..a051b906afdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 1a9b75139c2d..1adb300dd850 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index cf8591a0c48d..df2238372ea7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c index 790da5c4ca4f..f2d3aee909e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 72c2d002c3b8..49e90f2612d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 575d853dbe05..dc7dbf7e9d98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 38bce93f8314..f9d25dcd03c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 06801d6f595e..5a89bb1d678a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h index 2826a4b6071e..68cd08f02c2f 100644 --- a/include/linux/mlx5/cmd.h +++ b/include/linux/mlx5/cmd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index f6b17ac601bd..72ee0d732a26 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4e5bd813bb9a..abf65c790421 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h index 163a818411e7..afc78a3f4462 100644 --- a/include/linux/mlx5/doorbell.h +++ b/include/linux/mlx5/doorbell.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8d8ca6d9b03b..8fedd39a9a60 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5f48b8f592c5..cb3ad17edd1f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 61f7a342d1bf..310b5f7fd6ae 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index e1a363a33663..f43ed054a3e0 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU -- cgit v1.3-6-gb490 From 233d05d28ad942929b6b4fbc48aa8dd083c16484 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:32 +0300 Subject: net/mlx5_core: Move completion eqs from mlx5_ib to mlx5_core Preparation for ethernet driver. These functions will be used in drivers other than mlx5_ib. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/cq.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 104 +------------------------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/net/ethernet/mellanox/mlx5/core/main.c | 91 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 3 +- 5 files changed, 98 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 6efd9f73ff60..c94e2651820d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -780,7 +780,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, cq->cqe_size = cqe_size; cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); - err = mlx5_vector2eqn(dev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) goto err_cqb; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 45816b10af5e..e59e2a1f8726 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -62,95 +62,6 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; -int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) -{ - struct mlx5_eq_table *table = &dev->mdev->priv.eq_table; - struct mlx5_eq *eq, *n; - int err = -ENOENT; - - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { - if (eq->index == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; - err = 0; - break; - } - } - spin_unlock(&table->lock); - - return err; -} - -static int alloc_comp_eqs(struct mlx5_ib_dev *dev) -{ - struct mlx5_eq_table *table = &dev->mdev->priv.eq_table; - char name[MLX5_MAX_EQ_NAME]; - struct mlx5_eq *eq, *n; - int ncomp_vec; - int nent; - int err; - int i; - - INIT_LIST_HEAD(&dev->eqs_list); - ncomp_vec = table->num_comp_vectors; - nent = MLX5_COMP_EQ_SIZE; - for (i = 0; i < ncomp_vec; i++) { - eq = kzalloc(sizeof(*eq), GFP_KERNEL); - if (!eq) { - err = -ENOMEM; - goto clean; - } - - snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev->mdev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, &dev->mdev->priv.uuari.uars[0]); - if (err) { - kfree(eq); - goto clean; - } - mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - eq->index = i; - spin_lock(&table->lock); - list_add_tail(&eq->list, &dev->eqs_list); - spin_unlock(&table->lock); - } - - dev->num_comp_vectors = ncomp_vec; - return 0; - -clean: - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { - list_del(&eq->list); - spin_unlock(&table->lock); - if (mlx5_destroy_unmap_eq(dev->mdev, eq)) - mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); - kfree(eq); - spin_lock(&table->lock); - } - spin_unlock(&table->lock); - return err; -} - -static void free_comp_eqs(struct mlx5_ib_dev *dev) -{ - struct mlx5_eq_table *table = &dev->mdev->priv.eq_table; - struct mlx5_eq *eq, *n; - - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { - list_del(&eq->list); - spin_unlock(&table->lock); - if (mlx5_destroy_unmap_eq(dev->mdev, eq)) - mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); - kfree(eq); - spin_lock(&table->lock); - } - spin_unlock(&table->lock); -} - static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -1291,10 +1202,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) get_ext_port_caps(dev); - err = alloc_comp_eqs(dev); - if (err) - goto err_dealloc; - MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); @@ -1303,7 +1210,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; dev->num_ports = mdev->caps.gen.num_ports; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = dev->num_comp_vectors; + dev->ib_dev.num_comp_vectors = + dev->mdev->priv.eq_table.num_comp_vectors; dev->ib_dev.dma_device = &mdev->pdev->dev; dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; @@ -1390,13 +1298,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) err = init_node_data(dev); if (err) - goto err_eqs; + goto err_dealloc; mutex_init(&dev->cap_mask_mutex); err = create_dev_resources(&dev->devr); if (err) - goto err_eqs; + goto err_dealloc; err = mlx5_ib_odp_init_one(dev); if (err) @@ -1433,9 +1341,6 @@ err_odp: err_rsrc: destroy_dev_resources(&dev->devr); -err_eqs: - free_comp_eqs(dev); - err_dealloc: ib_dealloc_device((struct ib_device *)dev); @@ -1450,7 +1355,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); - free_comp_eqs(dev); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8f94f69fbea5..dff1cfcdf476 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -421,9 +421,7 @@ struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; MLX5_DECLARE_DOORBELL_LOCK(uar_lock); - struct list_head eqs_list; int num_ports; - int num_comp_vectors; /* serialize update of capability mask */ struct mutex cap_mask_mutex; @@ -594,7 +592,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); -int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fe41a7c2c629..cfc29027f07a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -507,6 +507,87 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) return 0; } +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq, *n; + int err = -ENOENT; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (eq->index == vector) { + *eqn = eq->eqn; + *irqn = eq->irqn; + err = 0; + break; + } + } + spin_unlock(&table->lock); + + return err; +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +static void free_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq, *n; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(dev, eq)) + mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", + eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); +} + +static int alloc_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + char name[MLX5_MAX_EQ_NAME]; + struct mlx5_eq *eq; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&table->comp_eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; + for (i = 0; i < ncomp_vec; i++) { + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + err = mlx5_create_map_eq(dev, eq, + i + MLX5_EQ_VEC_COMP_BASE, nent, 0, + name, &dev->priv.uuari.uars[0]); + if (err) { + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + eq->index = i; + spin_lock(&table->lock); + list_add_tail(&eq->list, &table->comp_eqs_list); + spin_unlock(&table->lock); + } + + return 0; + +clean: + free_comp_eqs(dev); + return err; +} + static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) { struct mlx5_priv *priv = &dev->priv; @@ -643,6 +724,12 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_free_uar; } + err = alloc_comp_eqs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); + goto err_stop_eqs; + } + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); mlx5_init_cq_table(dev); @@ -652,6 +739,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) return 0; +err_stop_eqs: + mlx5_stop_eqs(dev); + err_free_uar: mlx5_free_uuars(dev, &priv->uuari); @@ -703,6 +793,7 @@ static void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); + free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); mlx5_eq_cleanup(dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8fedd39a9a60..f250f6580dad 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -410,7 +410,7 @@ struct mlx5_core_srq { struct mlx5_eq_table { void __iomem *update_ci; void __iomem *update_arm_ci; - struct list_head *comp_eq_head; + struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; @@ -725,6 +725,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.3-6-gb490 From ce0f75093282c5dca1e79ae3e3e893deaea86166 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:33 +0300 Subject: net/mlx5_core: Modify arm CQ in preparation for upcoming Ethernet driver Pass consumer index as a parameter to arm CQ Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/cq.c | 8 ++++++-- include/linux/mlx5/cq.h | 5 +++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index c94e2651820d..2ee6b1051975 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -572,11 +572,15 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { + struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; + void __iomem *uar_page = mdev->priv.uuari.uars[0].map; + mlx5_cq_arm(&to_mcq(ibcq)->mcq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, - to_mdev(ibcq->device)->mdev->priv.uuari.uars[0].map, - MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev->priv.cq_uar_lock)); + uar_page, + MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), + to_mcq(ibcq)->mcq.cons_index); return 0; } diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 72ee0d732a26..2695ced222df 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -137,14 +137,15 @@ enum { static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, void __iomem *uar_page, - spinlock_t *doorbell_lock) + spinlock_t *doorbell_lock, + u32 cons_index) { __be32 doorbell[2]; u32 sn; u32 ci; sn = cq->arm_sn & 3; - ci = cq->cons_index & 0xffffff; + ci = cons_index & 0xffffff; *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); -- cgit v1.3-6-gb490 From 64613d9499c4887485d4350387919ea507330d90 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:34 +0300 Subject: net/mlx5_core: Extend struct mlx5_interface to support multiple protocols Preparation for ethernet driver. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 22 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 8 ++++++++ 3 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e59e2a1f8726..57c9809e8b87 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1362,6 +1362,7 @@ static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, .event = mlx5_ib_event, + .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; static int __init mlx5_ib_init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index cfc29027f07a..28425e5ea91f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -908,6 +908,28 @@ void mlx5_unregister_interface(struct mlx5_interface *intf) } EXPORT_SYMBOL(mlx5_unregister_interface); +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f250f6580dad..9a90e7523dc2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -781,14 +781,22 @@ enum { MAX_MR_CACHE_ENTRIES = 16, }; +enum { + MLX5_INTERFACE_PROTOCOL_IB = 0, + MLX5_INTERFACE_PROTOCOL_ETH = 1, +}; + struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); + void * (*get_dev)(void *context); + int protocol; struct list_head list; }; +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); -- cgit v1.3-6-gb490 From e79d8429aac95a5cbe4c235795c7cd554c91f924 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 3 Apr 2015 22:17:17 +1030 Subject: netdevice: document NETDEV_TX_BUSY deprecation. This paraphrases DaveM (and steals some of his words) explaining why a device shouldn't return NETDEV_TX_BUSY, even though it looks so inviting to driver authors. See http://www.spinics.net/lists/netdev/msg322350.html Inspired-by: David Miller Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 846a1f5bc9db..a710d22b174f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,7 +795,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. - * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. + * Returns NETDEV_TX_OK. Can return NETDEV_TX_BUSY, but you should stop + * the queue before that can happen; it's for obsolete devices and weird + * corner cases, but the stack really does a non-trivial amount + * of useless work if you return NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * -- cgit v1.3-6-gb490 From 2e7056c433216f406b90a003aa0ba42e19d3bdcf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 31 Mar 2015 14:19:10 -0700 Subject: jhash: Update jhash_[321]words functions to use correct initval Looking over the implementation for jhash2 and comparing it to jhash_3words I realized that the two hashes were in fact very different. Doing a bit of digging led me to "The new jhash implementation" in which lookup2 was supposed to have been replaced with lookup3. In reviewing the patch I noticed that jhash2 had originally initialized a and b to JHASH_GOLDENRATIO and c to initval, but after the patch a, b, and c were initialized to initval + (length << 2) + JHASH_INITVAL. However the changes in jhash_3words simply replaced the initialization of a and b with JHASH_INITVAL. This change corrects what I believe was an oversight so that a, b, and c in jhash_3words all have the same value added consisting of initval + (length << 2) + JHASH_INITVAL so that jhash2 and jhash_3words will now produce the same hash result given the same inputs. Fixes: 60d509c823cca ("The new jhash implementation") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/jhash.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 47cb09edec1a..348c6f47e4cc 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -145,11 +145,11 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) } -/* jhash_3words - hash exactly 3, 2 or 1 word(s) */ -static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { - a += JHASH_INITVAL; - b += JHASH_INITVAL; + a += initval; + b += initval; c += initval; __jhash_final(a, b, c); @@ -157,14 +157,19 @@ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) return c; } +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); +} + static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { - return jhash_3words(a, b, 0, initval); + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } static inline u32 jhash_1word(u32 a, u32 initval) { - return jhash_3words(a, 0, 0, initval); + return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); } #endif /* _LINUX_JHASH_H */ -- cgit v1.3-6-gb490 From a3bebdce4135a44d09e96ba66c40797c8f9fa902 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Wed, 1 Apr 2015 20:30:31 +0300 Subject: add fixed_phy_update_state() - update state of fixed_phy Currently fixed_phy uses a callback to periodically poll the link state. This patch adds the fixed_phy_update_state() API. It solves the following problems: - On link state interrupt, MAC driver can't update status. Instead it needs to provide the callback to periodically query the HW about the link state. It is more efficient to update status after interrupt. - The callback needs to be unregistered before phy_disconnect(), or otherwise it will be called with net_dev==NULL. phy_disconnect() does not have enough info to unregister the callback automatically. - The callback needs to be registered before of_phy_connect() to avoid running with outdated state, but of_phy_connect() returns the phy_device pointer, which is needed to register the callback. Registering it before of_phy_connect() will therefore require a hack to get the pointer earlier. Overall, this addition makes the subsequent patch that implements SGMII link status for mvneta, much cleaner. CC: Florian Fainelli CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Stas Sergeev Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 29 +++++++++++++++++++++++++++++ include/linux/phy_fixed.h | 9 +++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index a08a3c78ba97..1960b46add65 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -183,6 +183,35 @@ int fixed_phy_set_link_update(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(fixed_phy_set_link_update); +int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed) +{ + struct fixed_mdio_bus *fmb = &platform_fmb; + struct fixed_phy *fp; + + if (!phydev || !phydev->bus) + return -EINVAL; + + list_for_each_entry(fp, &fmb->phys, node) { + if (fp->addr == phydev->addr) { +#define _UPD(x) if (changed->x) \ + fp->status.x = status->x + _UPD(link); + _UPD(speed); + _UPD(duplex); + _UPD(pause); + _UPD(asym_pause); +#undef _UPD + fixed_phy_update_regs(fp); + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(fixed_phy_update_state); + int fixed_phy_add(unsigned int irq, int phy_addr, struct fixed_phy_status *status) { diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 7e75bfe37cc7..fe5732d53eda 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -21,6 +21,9 @@ extern void fixed_phy_del(int phy_addr); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); +extern int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) @@ -43,6 +46,12 @@ static inline int fixed_phy_set_link_update(struct phy_device *phydev, { return -ENODEV; } +static inline int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed) +{ + return -ENODEV; +} #endif /* CONFIG_FIXED_PHY */ #endif /* __PHY_FIXED_H */ -- cgit v1.3-6-gb490 From cfdfab314647b1755afedc33ab66f3f247e161ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 16:23:58 -0400 Subject: netfilter: Create and use nf_hook_state. Instead of passing a large number of arguments down into the nf_hook() entry points, create a structure which carries this state down through the hook processing layers. This makes is so that if we want to change the types or signatures of any of these pieces of state, there are less places that need to be changed. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 28 +++++++++++++++++++++++----- net/netfilter/core.c | 32 +++++++++++++------------------- net/netfilter/nf_internals.h | 11 +++-------- net/netfilter/nf_queue.c | 38 ++++++++++++++++++++------------------ 4 files changed, 59 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2517ece98820..aee7ef1e23ed 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -44,6 +44,16 @@ int netfilter_init(void); struct sk_buff; struct nf_hook_ops; + +struct nf_hook_state { + unsigned int hook; + int thresh; + u_int8_t pf; + struct net_device *in; + struct net_device *out; + int (*okfn)(struct sk_buff *); +}; + typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -118,9 +128,7 @@ static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) } #endif -int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook @@ -135,8 +143,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh) { - if (nf_hooks_active(pf, hook)) - return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); + if (nf_hooks_active(pf, hook)) { + struct nf_hook_state state = { + .hook = hook, + .thresh = thresh, + .pf = pf, + .in = indev, + .out = outdev, + .okfn = okfn + }; + + return nf_hook_slow(skb, &state); + } return 1; } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fea9ef566427..11d04ebfc5e3 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -120,12 +120,8 @@ EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - unsigned int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), - int hook_thresh) + struct nf_hook_state *state, + struct nf_hook_ops **elemp) { unsigned int verdict; @@ -134,19 +130,20 @@ unsigned int nf_iterate(struct list_head *head, * function because of risk of continuing from deleted element. */ list_for_each_entry_continue_rcu((*elemp), head, list) { - if (hook_thresh > (*elemp)->priority) + if (state->thresh > (*elemp)->priority) continue; /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, state->in, state->out, + state->okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - (*elemp)->hook, hook); + (*elemp)->hook, state->hook); continue; } #endif @@ -161,11 +158,7 @@ repeat: /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - int hook_thresh) +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { struct nf_hook_ops *elem; unsigned int verdict; @@ -174,10 +167,11 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list); + elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], + struct nf_hook_ops, list); next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, - outdev, &elem, okfn, hook_thresh); + verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, + &elem); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { @@ -186,8 +180,8 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_QBITS); + int err = nf_queue(skb, elem, state, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 61a3c927e63c..ea7f36784b3d 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,16 +14,11 @@ /* core.c */ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - unsigned int hook, const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), int hook_thresh); + struct nf_hook_state *state, struct nf_hook_ops **elemp); /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, - unsigned int hook, struct net_device *indev, - struct net_device *outdev, int (*okfn)(struct sk_buff *), - unsigned int queuenum); +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, + struct nf_hook_state *state, unsigned int queuenum); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4c8b68e5fa16..6f8e9485cc83 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -100,12 +100,9 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); * through nf_reinject(). */ int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) + struct nf_hook_ops *elem, + struct nf_hook_state *state, + unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -121,7 +118,7 @@ int nf_queue(struct sk_buff *skb, goto err_unlock; } - afinfo = nf_get_afinfo(pf); + afinfo = nf_get_afinfo(state->pf); if (!afinfo) goto err_unlock; @@ -134,11 +131,11 @@ int nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, .elem = elem, - .pf = pf, - .hook = hook, - .indev = indev, - .outdev = outdev, - .okfn = okfn, + .pf = state->pf, + .hook = state->hook, + .indev = state->in, + .outdev = state->out, + .okfn = state->okfn, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -171,6 +168,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) struct sk_buff *skb = entry->skb; struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; + struct nf_hook_state state; int err; rcu_read_lock(); @@ -189,12 +187,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) verdict = NF_DROP; } + state.hook = entry->hook; + state.thresh = INT_MIN; + state.pf = entry->pf; + state.in = entry->indev; + state.out = entry->outdev; + state.okfn = entry->okfn; + if (verdict == NF_ACCEPT) { next_hook: verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook], - skb, entry->hook, - entry->indev, entry->outdev, &elem, - entry->okfn, INT_MIN); + skb, &state, &elem); } switch (verdict & NF_VERDICT_MASK) { @@ -205,9 +208,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, elem, entry->pf, entry->hook, - entry->indev, entry->outdev, entry->okfn, - verdict >> NF_VERDICT_QBITS); + err = nf_queue(skb, elem, &state, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; -- cgit v1.3-6-gb490 From 238e54c9cb9385a1ba99e92801f3615a2fb398b6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 20:32:56 -0400 Subject: netfilter: Make nf_hookfn use nf_hook_state. Pass the nf_hook_state all the way down into the hook functions themselves. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 4 +-- net/bridge/br_netfilter.c | 46 +++++++++----------------- net/bridge/netfilter/ebtable_filter.c | 14 ++++---- net/bridge/netfilter/ebtable_nat.c | 14 ++++---- net/bridge/netfilter/nf_tables_bridge.c | 10 +++--- net/decnet/netfilter/dn_rtmsg.c | 4 +-- net/ipv4/netfilter/arptable_filter.c | 7 ++-- net/ipv4/netfilter/ipt_CLUSTERIP.c | 10 +++--- net/ipv4/netfilter/ipt_SYNPROXY.c | 6 ++-- net/ipv4/netfilter/iptable_filter.c | 7 ++-- net/ipv4/netfilter/iptable_mangle.c | 14 ++++---- net/ipv4/netfilter/iptable_nat.c | 28 +++++++--------- net/ipv4/netfilter/iptable_raw.c | 8 ++--- net/ipv4/netfilter/iptable_security.c | 8 ++--- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 20 ++++------- net/ipv4/netfilter/nf_defrag_ipv4.c | 4 +-- net/ipv4/netfilter/nf_tables_arp.c | 6 ++-- net/ipv4/netfilter/nf_tables_ipv4.c | 12 +++---- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 25 +++++--------- net/ipv4/netfilter/nft_chain_route_ipv4.c | 6 ++-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 6 ++-- net/ipv6/netfilter/ip6table_filter.c | 7 ++-- net/ipv6/netfilter/ip6table_mangle.c | 13 ++++---- net/ipv6/netfilter/ip6table_nat.c | 28 +++++++--------- net/ipv6/netfilter/ip6table_raw.c | 7 ++-- net/ipv6/netfilter/ip6table_security.c | 8 ++--- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 20 ++++------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 8 ++--- net/ipv6/netfilter/nf_tables_ipv6.c | 12 +++---- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 24 +++++--------- net/ipv6/netfilter/nft_chain_route_ipv6.c | 6 ++-- net/netfilter/core.c | 3 +- net/netfilter/ipvs/ip_vs_core.c | 32 ++++++------------ security/selinux/hooks.c | 28 +++++----------- security/smack/smack_netfilter.c | 8 ++--- 35 files changed, 169 insertions(+), 294 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index aee7ef1e23ed..c480c43ad8f7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -56,9 +56,7 @@ struct nf_hook_state { typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)); + const struct nf_hook_state *state); struct nf_hook_ops { struct list_head list; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f3884a1b942f..7527e94dd5dc 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -562,9 +562,7 @@ bad: * to ip6tables, which doesn't support NAT, so things are fairly simple. */ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct ipv6hdr *hdr; u32 pkt_len; @@ -612,9 +610,7 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, * address to be able to detect DNAT afterwards. */ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct net_bridge_port *p; struct net_bridge *br; @@ -623,7 +619,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, if (unlikely(!pskb_may_pull(skb, len))) return NF_DROP; - p = br_port_get_rcu(in); + p = br_port_get_rcu(state->in); if (p == NULL) return NF_DROP; br = p->br; @@ -633,7 +629,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, state); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -671,9 +667,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, * prevent this from happening. */ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { br_drop_fake_rtable(skb); return NF_ACCEPT; @@ -710,9 +704,7 @@ static int br_nf_forward_finish(struct sk_buff *skb) * bridge ports. */ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; struct net_device *parent; @@ -726,7 +718,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (!nf_bridge_unshare(skb)) return NF_DROP; - parent = bridge_parent(out); + parent = bridge_parent(state->out); if (!parent) return NF_DROP; @@ -754,23 +746,21 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent, - br_nf_forward_finish); + NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, state->in), + parent, br_nf_forward_finish); return NF_STOLEN; } static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct net_bridge_port *p; struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); - p = br_port_get_rcu(out); + p = br_port_get_rcu(state->out); if (p == NULL) return NF_ACCEPT; br = p->br; @@ -789,9 +779,9 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, nf_bridge_push_encap_header(skb); return NF_ACCEPT; } - *d = (struct net_device *)in; - NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, - (struct net_device *)out, br_nf_forward_finish); + *d = state->in; + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, state->in, + state->out, br_nf_forward_finish); return NF_STOLEN; } @@ -859,9 +849,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); @@ -910,9 +898,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, * for the second time. */ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (skb->nf_bridge && !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index ce205aabf9c5..8a3f63b2e807 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -58,20 +58,18 @@ static const struct ebt_table frame_filter = { static unsigned int ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(in)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->in)->xt.frame_filter); } static unsigned int ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(out)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index a0ac2984fb6c..c5ef5b1ab678 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -58,20 +58,18 @@ static struct ebt_table frame_nat = { static unsigned int ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(in)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->in)->xt.frame_nat); } static unsigned int ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, in, out, - dev_net(out)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, state->in, state->out, + dev_net(state->out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 19473a9371b8..2c46a47160a8 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -93,21 +93,19 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, static unsigned int nft_do_chain_bridge(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out); break; default: - nft_set_pktinfo(&pkt, ops, skb, in, out); + nft_set_pktinfo(&pkt, ops, skb, state->in, state->out); break; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index e4d9560a910b..af34fc9bdf69 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -89,9 +89,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { dnrmg_send_peer(skb); return NF_ACCEPT; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 802ddecb30b8..6a641cb41062 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -28,12 +28,11 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return arpt_do_table(skb, ops->hooknum, in, out, + return arpt_do_table(skb, ops->hooknum, state->in, state->out, net->ipv4.arptable_filter); } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f75e9df5e017..771ab3d01ad3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -504,14 +504,12 @@ static void arp_print(struct arp_payload *payload) static unsigned int arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; - struct net *net = dev_net(in ? in : out); + struct net *net = dev_net(state->in ? state->in : state->out); /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || @@ -536,10 +534,10 @@ arp_mangle(const struct nf_hook_ops *ops, * addresses on different interfacs. However, in the CLUSTERIP case * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ - if (c->dev != out) { + if (c->dev != state->out) { pr_debug("not mangling arp reply on different " "interface: cip'%s'-skb'%s'\n", - c->dev->name, out->name); + c->dev->name, state->out->name); clusterip_config_put(c); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index a313c3fbeb46..e9e67793055f 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -300,11 +300,9 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index e08a74a243a8..1df0d42bfd39 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -34,8 +34,7 @@ static const struct xt_table packet_filter = { static unsigned int iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct net *net; @@ -45,8 +44,8 @@ iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, /* root is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, ops->hooknum, in, out, + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state->in, state->out, net->ipv4.iptable_filter); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 6a5079c34bb3..7a825e740045 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -81,18 +81,16 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) static unsigned int iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ipt_mangle_out(skb, out); + return ipt_mangle_out(skb, state->out); if (ops->hooknum == NF_INET_POST_ROUTING) - return ipt_do_table(skb, ops->hooknum, in, out, - dev_net(out)->ipv4.iptable_mangle); + return ipt_do_table(skb, ops->hooknum, state->in, state->out, + dev_net(state->out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, ops->hooknum, in, out, - dev_net(in)->ipv4.iptable_mangle); + return ipt_do_table(skb, ops->hooknum, state->in, state->out, + dev_net(state->in)->ipv4.iptable_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 6b67d7e9a75d..7a7fea4711e5 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -41,38 +41,34 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_fn(ops, skb, state->in, state->out, + iptable_nat_do_chain); } static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_in(ops, skb, state->in, state->out, + iptable_nat_do_chain); } static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_out(ops, skb, state->in, state->out, + iptable_nat_do_chain); } static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, in, out, iptable_nat_do_chain); + return nf_nat_ipv4_local_fn(ops, skb, state->in, state->out, + iptable_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index b2f7e8f98316..fac8f607c70b 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -21,8 +21,7 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct net *net; @@ -32,8 +31,9 @@ iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, /* root is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state->in, state->out, + net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index c86647ed2078..d9ad60a57413 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -38,9 +38,7 @@ static const struct xt_table security_table = { static unsigned int iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { const struct net *net; @@ -50,8 +48,8 @@ iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, /* Somebody is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, ops->hooknum, in, out, + net = dev_net(state->in ? state->in : state->out); + return ipt_do_table(skb, ops->hooknum, state->in, state->out, net->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8c8d6642cbb0..30ad9554b5e9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -94,9 +94,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, static unsigned int ipv4_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -123,9 +121,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops, static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -149,24 +145,20 @@ out: static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb); } static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 7e5ca6f2d0cd..c88b7d434718 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -63,9 +63,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(skb->sk); diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 19412a4063fb..fceb50e1e87d 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -17,13 +17,11 @@ static unsigned int nft_do_chain_arp(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, ops, skb, in, out); + nft_set_pktinfo(&pkt, ops, skb, state->in, state->out); return nft_do_chain(&pkt, ops); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 6820c8c40842..708e388e3dbe 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -20,22 +20,18 @@ static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out); return nft_do_chain(&pkt, ops); } static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (unlikely(skb->len < sizeof(struct iphdr) || ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { @@ -45,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv4(ops, skb, in, out, okfn); + return nft_do_chain_ipv4(ops, skb, state); } struct nft_af_info nft_af_ipv4 __read_mostly = { diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index df547bf50078..d08db6b0fcc3 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -41,38 +41,31 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_fn(ops, skb, state->in, state->out, nft_nat_do_chain); } static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_in(ops, skb, state->in, state->out, nft_nat_do_chain); } static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_out(ops, skb, state->in, state->out, nft_nat_do_chain); } static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv4_local_fn(ops, skb, state->in, state->out, + nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv4 = { diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 125b66766c0a..073d0776ae7f 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -23,9 +23,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { unsigned int ret; struct nft_pktinfo pkt; @@ -39,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out); mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index a0d17270117c..6edb7b106de7 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -315,11 +315,9 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index ca7f6c128086..eb9ef093454f 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -33,12 +33,11 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, + return ip6t_do_table(skb, ops->hooknum, state->in, state->out, net->ipv6.ip6table_filter); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 307bbb782d14..e713b8d3dbbc 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -77,17 +77,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, out); + return ip6t_mangle_out(skb, state->out); if (ops->hooknum == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, ops->hooknum, in, out, - dev_net(out)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, ops->hooknum, state->in, state->out, + dev_net(state->out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, ops->hooknum, in, out, - dev_net(in)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, ops->hooknum, state->in, state->out, + dev_net(state->in)->ipv6.ip6table_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index b0634ac996b7..e32b0d0315e6 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -43,38 +43,34 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_fn(ops, skb, state->in, state->out, + ip6table_nat_do_chain); } static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_in(ops, skb, state->in, state->out, + ip6table_nat_do_chain); } static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_out(ops, skb, state->in, state->out, + ip6table_nat_do_chain); } static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_local_fn(ops, skb, state->in, state->out, + ip6table_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5274740acecc..937908e25862 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -20,12 +20,11 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, + return ip6t_do_table(skb, ops->hooknum, state->in, state->out, net->ipv6.ip6table_raw); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ab3b0219ecfa..f33b41e8e294 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -37,13 +37,11 @@ static const struct xt_table security_table = { static unsigned int ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, + return ip6t_do_table(skb, ops->hooknum, state->in, state->out, net->ipv6.ip6table_security); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fba91c6fc7ca..4ba0c34c627b 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -97,9 +97,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -135,9 +133,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops, static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -171,25 +167,21 @@ out: static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb); } static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct ipv6hdr)) { net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e70382e4dfb5..e2b882056751 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -54,9 +54,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct sk_buff *reasm; @@ -78,8 +76,8 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, - (struct net_device *) in, (struct net_device *) out, - okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + state->in, state->out, + state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 0d812b31277d..224bc8971a0b 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -18,14 +18,12 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out) < 0) return NF_DROP; return nft_do_chain(&pkt, ops); @@ -33,9 +31,7 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) @@ -44,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv6(ops, skb, in, out, okfn); + return nft_do_chain_ipv6(ops, skb, state); } struct nft_af_info nft_af_ipv6 __read_mostly = { diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 1c4b75dd425b..f73f4ae25bc2 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -39,38 +39,30 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_fn(ops, skb, state->in, state->out, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_in(ops, skb, state->in, state->out, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_out(ops, skb, state->in, state->out, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_local_fn(ops, skb, state->in, state->out, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv6 = { diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 42031299585e..c826c3c854b2 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -24,9 +24,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { unsigned int ret; struct nft_pktinfo pkt; @@ -35,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, u32 mark, flowlabel; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out) < 0) return NF_DROP; /* save source/dest address, mark, hoplimit, flowlabel, priority */ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 11d04ebfc5e3..e6163017c42d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -136,8 +136,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(*elemp, skb, state->in, state->out, - state->okfn); + verdict = (*elemp)->hook(*elemp, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 04dbd9c7213f..5d2b806a862e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1272,8 +1272,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) */ static unsigned int ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET); } @@ -1284,8 +1283,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET); } @@ -1299,8 +1297,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET6); } @@ -1311,8 +1308,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_out(ops->hooknum, skb, AF_INET6); } @@ -1769,9 +1765,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ static unsigned int ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET); } @@ -1782,8 +1776,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET); } @@ -1796,9 +1789,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET6); } @@ -1809,8 +1800,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return ip_vs_in(ops->hooknum, skb, AF_INET6); } @@ -1829,8 +1819,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, */ static unsigned int ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { int r; struct net *net; @@ -1851,8 +1840,7 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 static unsigned int ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { int r; struct net *net; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index edc66de39f2e..7e392edaab97 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4852,21 +4852,17 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return selinux_ip_forward(skb, in, PF_INET); + return selinux_ip_forward(skb, state->in, PF_INET); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return selinux_ip_forward(skb, in, PF_INET6); + return selinux_ip_forward(skb, state->in, PF_INET6); } #endif /* IPV6 */ @@ -4914,9 +4910,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { return selinux_ip_output(skb, PF_INET); } @@ -5091,21 +5085,17 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return selinux_ip_postroute(skb, out, PF_INET); + return selinux_ip_postroute(skb, state->out, PF_INET); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return selinux_ip_postroute(skb, out, PF_INET6); + return selinux_ip_postroute(skb, state->out, PF_INET6); } #endif /* IPV6 */ diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c index c952632afb0d..a455cfc9ec1f 100644 --- a/security/smack/smack_netfilter.c +++ b/security/smack/smack_netfilter.c @@ -23,9 +23,7 @@ static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct socket_smack *ssp; struct smack_known *skp; @@ -42,9 +40,7 @@ static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops, static unsigned int smack_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct socket_smack *ssp; struct smack_known *skp; -- cgit v1.3-6-gb490 From 1c491ba2592f621f21a693d43fab06302527fc0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 20:56:08 -0400 Subject: netfilter: Pass nf_hook_state through ipt_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 +-- net/ipv4/netfilter/ip_tables.c | 13 ++++++------- net/ipv4/netfilter/iptable_filter.c | 3 +-- net/ipv4/netfilter/iptable_mangle.c | 11 ++++++----- net/ipv4/netfilter/iptable_nat.c | 3 +-- net/ipv4/netfilter/iptable_raw.c | 3 +-- net/ipv4/netfilter/iptable_security.c | 2 +- 7 files changed, 17 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 901e84db847d..4073510da485 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -65,8 +65,7 @@ struct ipt_error { extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cf5e82f39d3b..c69db7fa25ee 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -288,8 +288,7 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -306,8 +305,8 @@ ipt_do_table(struct sk_buff *skb, /* Initialization */ ip = ip_hdr(skb); - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -317,8 +316,8 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.family = NFPROTO_IPV4; acpar.hooknum = hook; @@ -370,7 +369,7 @@ ipt_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + trace_packet(skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 1df0d42bfd39..a0f3beca52d2 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -45,8 +45,7 @@ iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, return NF_ACCEPT; net = dev_net(state->in ? state->in : state->out); - return ipt_do_table(skb, ops->hooknum, state->in, state->out, - net->ipv4.iptable_filter); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 7a825e740045..62cbb8c5f4a8 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -37,8 +37,9 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) +ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { + struct net_device *out = state->out; unsigned int ret; const struct iphdr *iph; u_int8_t tos; @@ -58,7 +59,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, + ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state, dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { @@ -84,12 +85,12 @@ iptable_mangle_hook(const struct nf_hook_ops *ops, const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ipt_mangle_out(skb, state->out); + return ipt_mangle_out(skb, state); if (ops->hooknum == NF_INET_POST_ROUTING) - return ipt_do_table(skb, ops->hooknum, state->in, state->out, + return ipt_do_table(skb, ops->hooknum, state, dev_net(state->out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, ops->hooknum, state->in, state->out, + return ipt_do_table(skb, ops->hooknum, state, dev_net(state->in)->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 086e2311438e..0d4d9cdf98a4 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -35,8 +35,7 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, { struct net *net = nf_ct_net(ct); - return ipt_do_table(skb, ops->hooknum, state->in, state->out, - net->ipv4.nat_table); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table); } static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index fac8f607c70b..0356e6da4bb7 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -32,8 +32,7 @@ iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, return NF_ACCEPT; net = dev_net(state->in ? state->in : state->out); - return ipt_do_table(skb, ops->hooknum, state->in, state->out, - net->ipv4.iptable_raw); + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index d9ad60a57413..4bce3980ccd9 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -49,7 +49,7 @@ iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, return NF_ACCEPT; net = dev_net(state->in ? state->in : state->out); - return ipt_do_table(skb, ops->hooknum, state->in, state->out, + return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_security); } -- cgit v1.3-6-gb490 From 8f8a37152df49d541c43f010543f2b0176fcfb8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 21:09:51 -0400 Subject: netfilter: Pass nf_hook_state through ip6t_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +-- net/ipv6/netfilter/ip6_tables.c | 13 ++++++------- net/ipv6/netfilter/ip6table_filter.c | 3 +-- net/ipv6/netfilter/ip6table_mangle.c | 12 ++++++------ net/ipv6/netfilter/ip6table_nat.c | 3 +-- net/ipv6/netfilter/ip6table_raw.c | 3 +-- net/ipv6/netfilter/ip6table_security.c | 2 +- 7 files changed, 17 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 610208b18c05..b40d2b635778 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -31,8 +31,7 @@ extern struct xt_table *ip6t_register_table(struct net *net, extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); /* Check for an extension */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 83f59dc3cccc..1a732a1d3c8e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -317,8 +317,7 @@ ip6t_next_entry(const struct ip6t_entry *entry) unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -333,8 +332,8 @@ ip6t_do_table(struct sk_buff *skb, unsigned int addend; /* Initialization */ - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -342,8 +341,8 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.family = NFPROTO_IPV6; acpar.hooknum = hook; @@ -393,7 +392,7 @@ ip6t_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + trace_packet(skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index eb9ef093454f..5c33d8abc077 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -37,8 +37,7 @@ ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, { const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, state->in, state->out, - net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index e713b8d3dbbc..b551f5b79fe2 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -32,7 +32,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) +ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; struct in6_addr saddr, daddr; @@ -57,8 +57,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, - dev_net(out)->ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state, + dev_net(state->out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -80,12 +80,12 @@ ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, state->out); + return ip6t_mangle_out(skb, state); if (ops->hooknum == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, ops->hooknum, state->in, state->out, + return ip6t_do_table(skb, ops->hooknum, state, dev_net(state->out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, ops->hooknum, state->in, state->out, + return ip6t_do_table(skb, ops->hooknum, state, dev_net(state->in)->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index d78f69c7abce..c3a7f7af0ed4 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -37,8 +37,7 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, { struct net *net = nf_ct_net(ct); - return ip6t_do_table(skb, ops->hooknum, state->in, state->out, - net->ipv6.ip6table_nat); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat); } static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 937908e25862..0b33caad2b69 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -24,8 +24,7 @@ ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, { const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, state->in, state->out, - net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index f33b41e8e294..fcef83c25f7b 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -41,7 +41,7 @@ ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, { const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, state->in, state->out, + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_security); } -- cgit v1.3-6-gb490 From b85c3dc9bd5347ad9540ec8d103b7c049c48b7cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 21:18:46 -0400 Subject: netfilter: Pass nf_hook_state through arpt_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_arp/arp_tables.h | 3 +-- net/ipv4/netfilter/arp_tables.c | 11 +++++------ net/ipv4/netfilter/arptable_filter.c | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index cfb7191e6efa..c22a7fb8d0df 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -54,8 +54,7 @@ extern struct xt_table *arpt_register_table(struct net *net, extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f95b6f93814b..13bfe84bf3ca 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -248,8 +248,7 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -265,8 +264,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; local_bh_disable(); addend = xt_write_recseq_begin(); @@ -281,8 +280,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.hooknum = hook; acpar.family = NFPROTO_ARP; acpar.hotdrop = false; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 6a641cb41062..93876d03120c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -32,7 +32,7 @@ arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, { const struct net *net = dev_net(state->in ? state->in : state->out); - return arpt_do_table(skb, ops->hooknum, state->in, state->out, + return arpt_do_table(skb, ops->hooknum, state, net->ipv4.arptable_filter); } -- cgit v1.3-6-gb490 From f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 Mon Sep 17 00:00:00 2001 From: "hannes@stressinduktion.org" Date: Wed, 1 Apr 2015 17:07:44 +0200 Subject: ipv6: protect skb->sk accesses from recursive dereference inside the stack We should not consult skb->sk for output decisions in xmit recursion levels > 0 in the stack. Otherwise local socket settings could influence the result of e.g. tunnel encapsulation process. ipv6 does not conform with this in three places: 1) ip6_fragment: we do consult ipv6_npinfo for frag_size 2) sk_mc_loop in ipv6 uses skb->sk and checks if we should loop the packet back to the local socket 3) ip6_skb_dst_mtu could query the settings from the user socket and force a wrong MTU Furthermore: In sk_mc_loop we could potentially land in WARN_ON(1) if we use a PF_PACKET socket ontop of an IPv6-backed vxlan device. Reuse xmit_recursion as we are currently only interested in protecting tunnel devices. Cc: Jiri Pirko Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ include/net/ip.h | 16 ---------------- include/net/ip6_route.h | 3 ++- include/net/sock.h | 2 ++ net/core/dev.c | 4 +++- net/core/sock.c | 19 +++++++++++++++++++ net/ipv6/ip6_output.c | 3 ++- 7 files changed, 34 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dcf6ec27739b..278738873703 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2185,6 +2185,12 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +DECLARE_PER_CPU(int, xmit_recursion); +static inline int dev_recursion_level(void) +{ + return this_cpu_read(xmit_recursion); +} + struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/include/net/ip.h b/include/net/ip.h index 025c61c0dffb..6cc1eafb153a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif -static inline int sk_mc_loop(struct sock *sk) -{ - if (!sk) - return 1; - switch (sk->sk_family) { - case AF_INET: - return inet_sk(sk)->mc_loop; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - return inet6_sk(sk)->mc_loop; -#endif - } - WARN_ON(1); - return 1; -} - bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1d09b46c1e48..eda131d179d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); diff --git a/include/net/sock.h b/include/net/sock.h index ab186b1d31ff..e4079c28e6b8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1762,6 +1762,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +bool sk_mc_loop(struct sock *sk); + static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..45109b70664e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -static DEFINE_PER_CPU(int, xmit_recursion); +DEFINE_PER_CPU(int, xmit_recursion); +EXPORT_SYMBOL(xmit_recursion); + #define RECURSION_LIMIT 10 /** diff --git a/net/core/sock.c b/net/core/sock.c index 78e89eb7eb70..71e3e5f1eaa0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) sock_reset_flag(sk, bit); } +bool sk_mc_loop(struct sock *sk) +{ + if (dev_recursion_level()) + return false; + if (!sk) + return true; + switch (sk->sk_family) { + case AF_INET: + return inet_sk(sk)->mc_loop; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + return inet6_sk(sk)->mc_loop; +#endif + } + WARN_ON(1); + return true; +} +EXPORT_SYMBOL(sk_mc_loop); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e80b61b51ff..36cf0ab685a0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -542,7 +542,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; -- cgit v1.3-6-gb490 From d8725c86aebaf3516e220760aaf5fefc73825188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Dec 2014 00:02:50 -0500 Subject: get rid of the size argument of sock_sendmsg() it's equal to iov_iter_count(&msg->msg_iter) in all cases Signed-off-by: Al Viro --- include/linux/net.h | 2 +- net/socket.c | 27 ++++++++++++++------------- net/sunrpc/svcsock.c | 2 +- 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index e74114bcca68..738ea48be889 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -211,7 +211,7 @@ int sock_create(int family, int type, int proto, struct socket **res); int sock_create_kern(int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); +int sock_sendmsg(struct socket *sock, struct msghdr *msg); int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); diff --git a/net/socket.c b/net/socket.c index b6ceeda65214..21676e469b13 100644 --- a/net/socket.c +++ b/net/socket.c @@ -610,17 +610,19 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size) +static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - return sock->ops->sendmsg(sock, msg, size); + int ret = sock->ops->sendmsg(sock, msg, iov_iter_count(&msg->msg_iter)); + BUG_ON(ret == -EIOCBQUEUED); + return ret; } -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) +int sock_sendmsg(struct socket *sock, struct msghdr *msg) { - int err = security_socket_sendmsg(sock, msg, size); + int err = security_socket_sendmsg(sock, msg, + iov_iter_count(&msg->msg_iter)); - return err ?: sock_sendmsg_nosec(sock, msg, size); + return err ?: sock_sendmsg_nosec(sock, msg); } EXPORT_SYMBOL(sock_sendmsg); @@ -628,7 +630,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); - return sock_sendmsg(sock, msg, size); + return sock_sendmsg(sock, msg); } EXPORT_SYMBOL(kernel_sendmsg); @@ -819,7 +821,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = sock_sendmsg(sock, &msg, iov_iter_count(from)); + res = sock_sendmsg(sock, &msg); *from = msg.msg_iter; return res; } @@ -1657,7 +1659,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg, iov_iter_count(&msg.msg_iter)); + err = sock_sendmsg(sock, &msg); out_put: fput_light(sock->file, fput_needed); @@ -1892,7 +1894,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, __attribute__ ((aligned(sizeof(__kernel_size_t)))); /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; - int ctl_len, total_len; + int ctl_len; ssize_t err; msg_sys->msg_name = &address; @@ -1903,7 +1905,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov); if (err < 0) return err; - total_len = iov_iter_count(&msg_sys->msg_iter); err = -ENOBUFS; @@ -1950,10 +1951,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, used_address->name_len == msg_sys->msg_namelen && !memcmp(&used_address->name, msg_sys->msg_name, used_address->name_len)) { - err = sock_sendmsg_nosec(sock, msg_sys, total_len); + err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys, total_len); + err = sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cc331b6cf573..0c8120229a03 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,7 +257,7 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) svc_set_cmsg_data(rqstp, cmh); - if (sock_sendmsg(sock, &msg, 0) < 0) + if (sock_sendmsg(sock, &msg) < 0) goto out; } -- cgit v1.3-6-gb490 From 01e97e6517053d7c0b9af5248e944a9209909cf5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2014 21:39:31 -0500 Subject: new helper: msg_data_left() convert open-coded instances Signed-off-by: Al Viro --- crypto/algif_hash.c | 4 ++-- crypto/algif_skcipher.c | 4 ++-- drivers/vhost/net.c | 4 ++-- include/linux/socket.h | 5 +++++ net/core/datagram.c | 2 +- net/ipv4/tcp.c | 8 ++++---- net/rxrpc/ar-output.c | 19 +++++++++---------- net/socket.c | 4 ++-- 8 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0a465e0f3012..1396ad0787fc 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -56,8 +56,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, ctx->more = 0; - while (iov_iter_count(&msg->msg_iter)) { - int len = iov_iter_count(&msg->msg_iter); + while (msg_data_left(msg)) { + int len = msg_data_left(msg); if (len > limit) len = limit; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 8f903b6df299..945075292bc9 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -641,7 +641,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, long copied = 0; lock_sock(sk); - while (iov_iter_count(&msg->msg_iter)) { + while (msg_data_left(msg)) { sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list, list); sg = sgl->sg; @@ -655,7 +655,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, goto unlock; } - used = min_t(unsigned long, ctx->used, iov_iter_count(&msg->msg_iter)); + used = min_t(unsigned long, ctx->used, msg_data_left(msg)); used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used); err = used; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 18f05bff8826..7d137a43cc86 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -357,13 +357,13 @@ static void handle_tx(struct vhost_net *net) iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); iov_iter_advance(&msg.msg_iter, hdr_size); /* Sanity check */ - if (!iov_iter_count(&msg.msg_iter)) { + if (!msg_data_left(&msg)) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", len, hdr_size); break; } - len = iov_iter_count(&msg.msg_iter); + len = msg_data_left(&msg); zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN && (nvq->upend_idx + 1) % UIO_MAXIOV != diff --git a/include/linux/socket.h b/include/linux/socket.h index c9852ef7e317..5bf59c8493b7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -139,6 +139,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } +static inline size_t msg_data_left(struct msghdr *msg) +{ + return iov_iter_count(&msg->msg_iter); +} + /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ diff --git a/net/core/datagram.c b/net/core/datagram.c index df493d68330c..b80fb91bb3f7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -673,7 +673,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (!chunk) return 0; - if (iov_iter_count(&msg->msg_iter) < chunk) { + if (msg_data_left(msg) < chunk) { if (__skb_checksum_complete(skb)) goto csum_error; if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 094a6822c71d..18e3a12eb1b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1119,7 +1119,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (iov_iter_count(&msg->msg_iter)) { + while (msg_data_left(msg)) { int copy = 0; int max = size_goal; @@ -1163,8 +1163,8 @@ new_segment: } /* Try to append data to the end of skb. */ - if (copy > iov_iter_count(&msg->msg_iter)) - copy = iov_iter_count(&msg->msg_iter); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); /* Where to copy to? */ if (skb_availroom(skb) > 0) { @@ -1221,7 +1221,7 @@ new_segment: tcp_skb_pcount_set(skb, 0); copied += copy; - if (!iov_iter_count(&msg->msg_iter)) { + if (!msg_data_left(msg)) { tcp_tx_timestamp(sk, skb); goto out; } diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 7a31a3958364..c0042807bfc6 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -564,8 +564,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, max &= ~(call->conn->size_align - 1UL); chunk = max; - if (chunk > iov_iter_count(&msg->msg_iter) && !more) - chunk = iov_iter_count(&msg->msg_iter); + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); space = chunk + call->conn->size_align; space &= ~(call->conn->size_align - 1UL); @@ -608,11 +608,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp = rxrpc_skb(skb); /* append next segment of data to the current buffer */ - if (iov_iter_count(&msg->msg_iter) > 0) { + if (msg_data_left(msg) > 0) { int copy = skb_tailroom(skb); ASSERTCMP(copy, >, 0); - if (copy > iov_iter_count(&msg->msg_iter)) - copy = iov_iter_count(&msg->msg_iter); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); if (copy > sp->remain) copy = sp->remain; @@ -633,7 +633,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || - (iov_iter_count(&msg->msg_iter) == 0 && !more)) { + (msg_data_left(msg) == 0 && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; size_t pad; @@ -663,7 +663,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.serviceId = conn->service_id; sp->hdr.flags = conn->out_clientflag; - if (iov_iter_count(&msg->msg_iter) == 0 && !more) + if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; else if (CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz) > 1) @@ -679,11 +679,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, memcpy(skb->head, &sp->hdr, sizeof(struct rxrpc_header)); - rxrpc_queue_packet(call, skb, - iov_iter_count(&msg->msg_iter) == 0 && !more); + rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); skb = NULL; } - } while (iov_iter_count(&msg->msg_iter) > 0); + } while (msg_data_left(msg) > 0); success: ret = copied; diff --git a/net/socket.c b/net/socket.c index 21676e469b13..5b0126234606 100644 --- a/net/socket.c +++ b/net/socket.c @@ -612,7 +612,7 @@ EXPORT_SYMBOL(__sock_tx_timestamp); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = sock->ops->sendmsg(sock, msg, iov_iter_count(&msg->msg_iter)); + int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -620,7 +620,7 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) int sock_sendmsg(struct socket *sock, struct msghdr *msg) { int err = security_socket_sendmsg(sock, msg, - iov_iter_count(&msg->msg_iter)); + msg_data_left(msg)); return err ?: sock_sendmsg_nosec(sock, msg); } -- cgit v1.3-6-gb490