From 119363c7dc2bcc0c33c255a7b4979c8c0fdc1896 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:29:30 +0200 Subject: cfg80211: add support for per-chain signal strength reporting Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d1e48b5e348f..cbf1e228650f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1991,6 +1991,10 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards * non-peer STA + * @NL80211_STA_INFO_CHAIN_SIGNAL: per-chain signal strength of last PPDU + * Contains a nested array of signal strength attributes (u8, dBm) + * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average + * Same format as NL80211_STA_INFO_CHAIN_SIGNAL. * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -2020,6 +2024,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_NONPEER_PM, NL80211_STA_INFO_RX_BYTES64, NL80211_STA_INFO_TX_BYTES64, + NL80211_STA_INFO_CHAIN_SIGNAL, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.3-14-g43fede From fb4e156886ce6e8309e912d8b370d192330d19d3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 28 Apr 2013 16:22:06 -0700 Subject: nl80211: Add generic netlink module alias for cfg80211/nl80211 To support auto-loading of wireless modules from netlink users, add module alias for nl80211 family. This also adds NL80211_GENL_NAME constant to define the "nl80211" netlink family name as part of uapi. Signed-off-by: Marcel Holtmann Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ net/wireless/core.c | 1 + net/wireless/nl80211.c | 8 ++++---- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cbf1e228650f..b48430769eda 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -27,6 +27,8 @@ #include +#define NL80211_GENL_NAME "nl80211" + /** * DOC: Station handling * diff --git a/net/wireless/core.c b/net/wireless/core.c index 84c9ad7e1dca..68f0c96c0565 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,6 +34,7 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); +MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); /* RCU-protected (and cfg80211_mutex for writers) */ LIST_HEAD(cfg80211_rdev_list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f687a8d0d026..9cdcd9ec3317 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -37,10 +37,10 @@ static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = "nl80211", /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .netnsok = true, .pre_doit = nl80211_pre_doit, -- cgit v1.3-14-g43fede From 6e16d90b5218307db805e6b3e0b06d3946eb8c4c Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Wed, 8 May 2013 11:45:59 -0700 Subject: cfg80211: Userspace may inform kernel of mesh auth method. Authentication takes place in userspace, but the beacon is generated in the kernel. Allow userspace to inform the kernel of the authentication method so the appropriate mesh config IE can be set prior to beacon generation when joining the MBSS. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/mesh.c | 1 + net/wireless/nl80211.c | 8 ++++++++ 4 files changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 26e91138a2c6..32a2f1b20861 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1161,6 +1161,7 @@ struct mesh_config { * @sync_method: which synchronization method to use * @path_sel_proto: which path selection protocol to use * @path_metric: which metric to use + * @auth_id: which authentication method this mesh is using * @ie: vendor information elements (optional) * @ie_len: length of vendor information elements * @is_authenticated: this mesh requires authentication @@ -1179,6 +1180,7 @@ struct mesh_setup { u8 sync_method; u8 path_sel_proto; u8 path_metric; + u8 auth_id; const u8 *ie; u8 ie_len; bool is_authenticated; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b48430769eda..06320713e9c9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2645,6 +2645,10 @@ enum nl80211_meshconf_params { * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will * implement an MPM which handles peer allocation and state. * + * @NL80211_MESH_SETUP_AUTH_PROTOCOL: Inform the kernel of the authentication + * method (u8, as defined in IEEE 8.4.2.100.6, e.g. 0x1 for SAE). + * Default is no authentication method required. + * * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number * * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use @@ -2658,6 +2662,7 @@ enum nl80211_mesh_setup_params { NL80211_MESH_SETUP_USERSPACE_AMPE, NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC, NL80211_MESH_SETUP_USERSPACE_MPM, + NL80211_MESH_SETUP_AUTH_PROTOCOL, /* keep last */ __NL80211_MESH_SETUP_ATTR_AFTER_LAST, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0bb93f3061a4..9546ad210550 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -82,6 +82,7 @@ const struct mesh_setup default_mesh_setup = { .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .auth_id = 0, /* open */ .ie = NULL, .ie_len = 0, .is_secure = false, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9cdcd9ec3317..5f10f7acfa06 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4672,6 +4672,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -4857,6 +4858,13 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (setup->is_secure) setup->user_mpm = true; + if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { + if (!setup->user_mpm) + return -EINVAL; + setup->auth_id = + nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); + } + return 0; } -- cgit v1.3-14-g43fede From adf6271281a811dbcf2cb090b2a99a8520f7852b Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Thu, 18 Apr 2013 08:16:08 -0300 Subject: [media] videodev2.h: Make V4L2_PIX_FMT_MPEG4 comment more specific about its usage Signed-off-by: Ismael Luceno Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index f40b41c7e108..16c3cf71eb94 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -395,7 +395,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_H263 v4l2_fourcc('H', '2', '6', '3') /* H263 */ #define V4L2_PIX_FMT_MPEG1 v4l2_fourcc('M', 'P', 'G', '1') /* MPEG-1 ES */ #define V4L2_PIX_FMT_MPEG2 v4l2_fourcc('M', 'P', 'G', '2') /* MPEG-2 ES */ -#define V4L2_PIX_FMT_MPEG4 v4l2_fourcc('M', 'P', 'G', '4') /* MPEG-4 ES */ +#define V4L2_PIX_FMT_MPEG4 v4l2_fourcc('M', 'P', 'G', '4') /* MPEG-4 part 2 ES */ #define V4L2_PIX_FMT_XVID v4l2_fourcc('X', 'V', 'I', 'D') /* Xvid */ #define V4L2_PIX_FMT_VC1_ANNEX_G v4l2_fourcc('V', 'C', '1', 'G') /* SMPTE 421M Annex G compliant stream */ #define V4L2_PIX_FMT_VC1_ANNEX_L v4l2_fourcc('V', 'C', '1', 'L') /* SMPTE 421M Annex L compliant stream */ -- cgit v1.3-14-g43fede From 5e4b6f5698421d94226cc2f80eae6d613c9acef8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 16 May 2013 20:11:08 +0300 Subject: cfg80211: Allow TDLS peer AID to be configured for VHT VHT uses peer AID in the PARTIAL_AID field in TDLS frames. The current design for TDLS is to first add a dummy STA entry before completing TDLS Setup and then update information on this STA entry based on what was received from the peer during the setup exchange. In theory, this could use NL80211_ATTR_STA_AID to set the peer AID just like this is used in AP mode to set the AID of an association station. However, existing cfg80211 validation rules prevent this attribute from being used with set_station operation. To avoid interoperability issues between different kernel and user space version combinations, introduce a new nl80211 attribute for the purpose of setting TDLS peer AID. This attribute can be used in both the new_station and set_station operations. It is not supposed to be allowed to change the AID value during the lifetime of the STA entry, but that validation is left for drivers to do in the change_station callback. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ net/wireless/nl80211.c | 11 +++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 06320713e9c9..32b060ea5266 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1431,6 +1431,11 @@ enum nl80211_commands { * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which * the connection should have increased reliability (u16). * + * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16). + * This is similar to @NL80211_ATTR_STA_AID but with a difference of being + * allowed to be used with the first @NL80211_CMD_SET_STATION command to + * update a TDLS peer STA entry. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1729,6 +1734,8 @@ enum nl80211_attrs { NL80211_ATTR_CRIT_PROT_ID, NL80211_ATTR_MAX_CRIT_PROT_DURATION, + NL80211_ATTR_PEER_AID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5f10f7acfa06..14276af7964b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -378,6 +378,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -3872,6 +3873,8 @@ static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_PEER_AID]) + params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -4012,7 +4015,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -4023,7 +4027,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_STA_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; -- cgit v1.3-14-g43fede From ff29feb9146d1c0020f2ccbb25369582c6a16681 Mon Sep 17 00:00:00 2001 From: "Lad, Prabhakar" Date: Mon, 20 May 2013 06:02:40 -0300 Subject: [media] videodev2.h: fix typos This patch fixes several typos in videodev2.h file Signed-off-by: Lad, Prabhakar Acked-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 16c3cf71eb94..2c5e67a45436 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -555,7 +555,7 @@ struct v4l2_jpegcompression { __u32 jpeg_markers; /* Which markers should go into the JPEG * output. Unless you exactly know what * you do, leave them untouched. - * Inluding less markers will make the + * Including less markers will make the * resulting code smaller, but there will * be fewer applications which can read it. * The presence of the APP and COM marker @@ -567,7 +567,7 @@ struct v4l2_jpegcompression { #define V4L2_JPEG_MARKER_DRI (1<<5) /* Define Restart Interval */ #define V4L2_JPEG_MARKER_COM (1<<6) /* Comment segment */ #define V4L2_JPEG_MARKER_APP (1<<7) /* App segment, driver will - * allways use APP0 */ + * always use APP0 */ }; /* @@ -900,7 +900,7 @@ typedef __u64 v4l2_std_id; /* * "Common" PAL - This macro is there to be compatible with the old * V4L1 concept of "PAL": /BGDKHI. - * Several PAL standards are mising here: /M, /N and /Nc + * Several PAL standards are missing here: /M, /N and /Nc */ #define V4L2_STD_PAL (V4L2_STD_PAL_BG |\ V4L2_STD_PAL_DK |\ @@ -1790,7 +1790,7 @@ struct v4l2_event_subscription { #define V4L2_CHIP_MATCH_HOST V4L2_CHIP_MATCH_BRIDGE #define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver name */ #define V4L2_CHIP_MATCH_I2C_ADDR 2 /* Match against I2C 7-bit address */ -#define V4L2_CHIP_MATCH_AC97 3 /* Match against anciliary AC97 chip */ +#define V4L2_CHIP_MATCH_AC97 3 /* Match against ancillary AC97 chip */ #define V4L2_CHIP_MATCH_SUBDEV 4 /* Match against subdev index */ struct v4l2_dbg_match { -- cgit v1.3-14-g43fede From 7ec872114251b618adf5a2688de0ca00de63635d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 May 2013 01:11:11 +0000 Subject: net: ethtool: disambiguate XCVR_* meaning Add a comment which explains the real meaning of XCVR_INTERNAL (PHY and Ethernet MAC in the same package/die) and XCVR_EXTERNAL (PHY and Ethernet MAC in a different package/die). Most if not all of the drivers setting their transceiver type already do it the way the comment describes it. Signed-off-by: Florian Fainelli Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 0c9b44871df0..38dbafaa5341 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -993,8 +993,8 @@ enum ethtool_sfeatures_retval_bits { #define PORT_OTHER 0xff /* Which transceiver to use. */ -#define XCVR_INTERNAL 0x00 -#define XCVR_EXTERNAL 0x01 +#define XCVR_INTERNAL 0x00 /* PHY and MAC are in the same package */ +#define XCVR_EXTERNAL 0x01 /* PHY and MAC are in different packages */ #define XCVR_DUMMY1 0x02 #define XCVR_DUMMY2 0x03 #define XCVR_DUMMY3 0x04 -- cgit v1.3-14-g43fede From e057d3c31bdf87616b415c4b2cbf7310f54b9219 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 28 May 2013 13:01:52 +0200 Subject: cfg80211: support an active monitor interface flag An active monitor interface is one that is used for communication (via injection). It is expected to ACK incoming unicast packets. This is useful for running various 802.11 testing utilities that associate to an AP via injection and manage the state in user space. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 10 ++++++++++ 3 files changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b3b076a46d50..13b247d26544 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -961,6 +961,7 @@ struct station_info { * @MONITOR_FLAG_CONTROL: pass control frames * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering * @MONITOR_FLAG_COOK_FRAMES: report frames after processing + * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { MONITOR_FLAG_FCSFAIL = 1<wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, flags, ¶ms); else @@ -2395,6 +2400,11 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); + + if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); -- cgit v1.3-14-g43fede From 4c4d41f200db375b2d2cc6d0a1de0606c8266398 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 6 Jun 2013 10:15:54 +0800 Subject: xfrm: add LINUX_MIB_XFRMACQUIREERROR statistic counter When host ping its peer, ICMP echo request packet triggers IPsec policy, then host negotiates SA secret with its peer. After IKE installed SA for OUT direction, but before SA for IN direction installed, host get ICMP echo reply from its peer. At the time being, the SA state for IN direction could be XFRM_STATE_ACQ, then the received packet will be dropped after adding LINUX_MIB_XFRMINSTATEINVALID statistic. Adding a LINUX_MIB_XFRMACQUIREERROR statistic counter for such scenario when SA in larval state is much clearer for user than LINUX_MIB_XFRMINSTATEINVALID which indicates the SA is totally bad. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/uapi/linux/snmp.h | 1 + net/xfrm/xfrm_input.c | 5 +++++ net/xfrm/xfrm_proc.c | 1 + 3 files changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index df2e8b4f9c03..ea17542ff321 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -287,6 +287,7 @@ enum LINUX_MIB_XFRMOUTPOLERROR, /* XfrmOutPolError */ LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/ LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */ + LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */ __LINUX_MIB_XFRMMAX }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ab2bb42fe094..88843996f935 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -163,6 +163,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; spin_lock(&x->lock); + if (unlikely(x->km.state == XFRM_STATE_ACQ)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + goto drop_unlock; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index c721b0d9ab8b..80cd1e55b834 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -44,6 +44,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), + SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_SENTINEL }; -- cgit v1.3-14-g43fede From 060212928670593fb89243640bf05cf89560b023 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:39:50 +0300 Subject: net: add low latency socket poll Adds an ndo_ll_poll method and the code that supports it. This method can be used by low latency applications to busy-poll Ethernet device queues directly from the socket code. sysctl_net_ll_poll controls how many microseconds to poll. Default is zero (disabled). Individual protocol support will be added by subsequent patches. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 7 ++ include/linux/netdevice.h | 3 + include/linux/skbuff.h | 8 ++- include/net/ll_poll.h | 148 +++++++++++++++++++++++++++++++++++++++++++ include/net/sock.h | 4 ++ include/uapi/linux/snmp.h | 1 + net/Kconfig | 12 ++++ net/core/skbuff.c | 4 ++ net/core/sock.c | 6 ++ net/core/sysctl_net_core.c | 10 +++ net/ipv4/proc.c | 1 + net/socket.c | 6 ++ 12 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 include/net/ll_poll.h (limited to 'include/uapi/linux') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index c1f8640c2fc8..85ab72dcdc3c 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -50,6 +50,13 @@ The maximum number of packets that kernel can handle on a NAPI interrupt, it's a Per-CPU variable. Default: 64 +low_latency_poll +---------------- +Low latency busy poll timeout. (needs CONFIG_NET_LL_RX_POLL) +Approximate time in us to spin waiting for packets on the device queue. +Recommended value is 50. May increase power usage. +Default: 0 (off) + rmem_default ------------ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 39bbd462d68e..2ecb96d9a1e5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -971,6 +971,9 @@ struct net_device_ops { struct netpoll_info *info, gfp_t gfp); void (*ndo_netpoll_cleanup)(struct net_device *dev); +#endif +#ifdef CONFIG_NET_LL_RX_POLL + int (*ndo_ll_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9995834d2cb6..400d82ae2b03 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -386,6 +386,7 @@ typedef unsigned char *sk_buff_data_t; * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions + * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark * @dropcount: total number of sk_receive_queue overflows @@ -500,8 +501,11 @@ struct sk_buff { /* 7/9 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#ifdef CONFIG_NET_DMA - dma_cookie_t dma_cookie; +#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL + union { + unsigned int napi_id; + dma_cookie_t dma_cookie; + }; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; diff --git a/include/net/ll_poll.h b/include/net/ll_poll.h new file mode 100644 index 000000000000..bc262f88173f --- /dev/null +++ b/include/net/ll_poll.h @@ -0,0 +1,148 @@ +/* + * Low Latency Sockets + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Author: Eliezer Tamir + * + * Contact Information: + * e1000-devel Mailing List + */ + +/* + * For now this depends on CONFIG_X86_TSC + */ + +#ifndef _LINUX_NET_LL_POLL_H +#define _LINUX_NET_LL_POLL_H + +#include +#include + +#ifdef CONFIG_NET_LL_RX_POLL + +struct napi_struct; +extern unsigned long sysctl_net_ll_poll __read_mostly; + +/* return values from ndo_ll_poll */ +#define LL_FLUSH_FAILED -1 +#define LL_FLUSH_BUSY -2 + +/* we don't mind a ~2.5% imprecision */ +#define TSC_MHZ (tsc_khz >> 10) + +static inline cycles_t ll_end_time(void) +{ + return TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll) + get_cycles(); +} + +static inline bool sk_valid_ll(struct sock *sk) +{ + return sysctl_net_ll_poll && sk->sk_napi_id && + !need_resched() && !signal_pending(current); +} + +static inline bool can_poll_ll(cycles_t end_time) +{ + return !time_after((unsigned long)get_cycles(), + (unsigned long)end_time); +} + +static inline bool sk_poll_ll(struct sock *sk, int nonblock) +{ + cycles_t end_time = ll_end_time(); + const struct net_device_ops *ops; + struct napi_struct *napi; + int rc = false; + + /* + * rcu read lock for napi hash + * bh so we don't race with net_rx_action + */ + rcu_read_lock_bh(); + + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + ops = napi->dev->netdev_ops; + if (!ops->ndo_ll_poll) + goto out; + + do { + + rc = ops->ndo_ll_poll(napi); + + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ + + if (rc > 0) + /* local bh are disabled so it is ok to use _BH */ + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_LOWLATENCYRXPACKETS, rc); + + } while (skb_queue_empty(&sk->sk_receive_queue) + && can_poll_ll(end_time) && !nonblock); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock_bh(); + return rc; +} + +/* used in the NIC receive handler to mark the skb */ +static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +{ + skb->napi_id = napi->napi_id; +} + +/* used in the protocol hanlder to propagate the napi_id to the socket */ +static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +{ + sk->sk_napi_id = skb->napi_id; +} + +#else /* CONFIG_NET_LL_RX_POLL */ + +static inline cycles_t ll_end_time(void) +{ + return 0; +} + +static inline bool sk_valid_ll(struct sock *sk) +{ + return false; +} + +static inline bool sk_poll_ll(struct sock *sk, int nonblock) +{ + return false; +} + +static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +{ +} + +static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline bool can_poll_ll(cycles_t end_time) +{ + return false; +} + +#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* _LINUX_NET_LL_POLL_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 66772cf8c3c5..ac8e1818380c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -229,6 +229,7 @@ struct cg_proto; * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size * @sk_forward_alloc: space allocated forward + * @sk_napi_id: id of the last napi context to receive data for sk * @sk_allocation: allocation mode * @sk_sndbuf: size of send buffer in bytes * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, @@ -324,6 +325,9 @@ struct sock { int sk_forward_alloc; #ifdef CONFIG_RPS __u32 sk_rxhash; +#endif +#ifdef CONFIG_NET_LL_RX_POLL + unsigned int sk_napi_id; #endif atomic_t sk_drops; int sk_rcvbuf; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index df2e8b4f9c03..26cbf76f8058 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -253,6 +253,7 @@ enum LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ + LINUX_MIB_LOWLATENCYRXPACKETS, /* LowLatencyRxPackets */ __LINUX_MIB_MAX }; diff --git a/net/Kconfig b/net/Kconfig index 523e43e6da1b..d6a9ce6e1800 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -243,6 +243,18 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis +config NET_LL_RX_POLL + bool "Low Latency Receive Poll" + depends on X86_TSC + default n + ---help--- + Support Low Latency Receive Queue Poll. + (For network card drivers which support this option.) + When waiting for data in read or poll call directly into the the device driver + to flush packets which may be pending on the device queues into the stack. + + If unsure, say N. + config BQL boolean depends on SYSFS diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 73f57a0e1523..4a4181e16c1a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -733,6 +733,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); + +#ifdef CONFIG_NET_LL_RX_POLL + new->napi_id = old->napi_id; +#endif } /* diff --git a/net/core/sock.c b/net/core/sock.c index 88868a9d21da..788c0da5eed1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include #endif +#include + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -2284,6 +2286,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); +#ifdef CONFIG_NET_LL_RX_POLL + sk->sk_napi_id = 0; +#endif + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 741db5fc7806..4b48f39582b0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,6 +19,7 @@ #include #include #include +#include static int one = 1; @@ -284,6 +285,15 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ +#ifdef CONFIG_NET_LL_RX_POLL + { + .procname = "low_latency_poll", + .data = &sysctl_net_ll_poll, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, +#endif #endif /* CONFIG_NET */ { .procname = "netdev_budget", diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5bf86d2415..6577a1149a47 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), + SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), SNMP_MIB_SENTINEL }; diff --git a/net/socket.c b/net/socket.c index 3ebdcb805c51..21fd29f63ed2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,6 +104,12 @@ #include #include #include +#include + +#ifdef CONFIG_NET_LL_RX_POLL +unsigned long sysctl_net_ll_poll __read_mostly; +EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); +#endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, -- cgit v1.3-14-g43fede From 9ba18891f75535eca3ef53138b48970eb60f5255 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 5 Jun 2013 10:08:00 -0400 Subject: bridge: Add flag to control mac learning. Allow user to control whether mac learning is enabled on the port. By default, mac learning is enabled. Disabling mac learning will cause new dynamic FDB entries to not be created for a particular port. Signed-off-by: Vlad Yasevich Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_if.c | 2 +- net/bridge/br_input.c | 6 ++++-- net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 2 ++ 6 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b05823cae784..8643809d8417 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -221,6 +221,7 @@ enum { IFLA_BRPORT_GUARD, /* bpdu guard */ IFLA_BRPORT_PROTECT, /* root port protection */ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ + IFLA_BRPORT_LEARNING, /* mac learning */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba60926ff..2c08911df578 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = 0; + p->flags = BR_LEARNING; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 828e2bcc1f52..7e993667d4bf 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -75,7 +75,8 @@ int br_handle_frame_finish(struct sk_buff *skb) /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -142,7 +143,8 @@ static int br_handle_local_finish(struct sk_buff *skb) u16 vid = 0; br_vlan_get_tag(skb, &vid); - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf564798..ce902bf8a618 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -30,6 +30,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + 0; } @@ -56,7 +57,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING))) return -EMSGSIZE; return 0; @@ -281,6 +283,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -328,6 +331,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b0ac95a5c37..04d7f43508f7 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,6 +158,7 @@ struct net_bridge_port #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 +#define BR_LEARNING 0x00000020 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index a1ef1b6e14dc..707f3628e9cd 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -158,6 +158,7 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -195,6 +196,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, &brport_attr_root_block, + &brport_attr_learning, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, -- cgit v1.3-14-g43fede From 867a59436fc35593ae0e0efcd56cc6d2f8506586 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 5 Jun 2013 10:08:01 -0400 Subject: bridge: Add a flag to control unicast packet flood. Add a flag to control flood of unicast traffic. By default, flood is on and the bridge will flood unicast traffic if it doesn't know the destination. When the flag is turned off, unicast traffic without an FDB will not be forwarded to the specified port. Signed-off-by: Vlad Yasevich Reviewed-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_device.c | 8 ++++---- net/bridge/br_forward.c | 14 +++++++++----- net/bridge/br_if.c | 2 +- net/bridge/br_input.c | 9 ++++++--- net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 6 ++++-- net/bridge/br_sysfs_if.c | 2 ++ 8 files changed, 32 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8643809d8417..da05a2698cb5 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -222,6 +222,7 @@ enum { IFLA_BRPORT_PROTECT, /* root port protection */ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ IFLA_BRPORT_LEARNING, /* mac learning */ + IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 75f3239130f8..2ef66781fedb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -58,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (is_broadcast_ether_addr(dest)) - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); goto out; } if (br_multicast_rcv(br, NULL, skb)) { @@ -73,11 +73,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, true); out: rcu_read_unlock(); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 092b20e4ee4c..4b81b1471789 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -174,7 +174,8 @@ out: static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb0, void (*__packet_hook)(const struct net_bridge_port *p, - struct sk_buff *skb)) + struct sk_buff *skb), + bool unicast) { struct net_bridge_port *p; struct net_bridge_port *prev; @@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { + /* Do not flood unicast traffic to ports that turn it off */ + if (unicast && !(p->flags & BR_FLOOD)) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; @@ -203,16 +207,16 @@ out: /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) { - br_flood(br, skb, NULL, __br_deliver); + br_flood(br, skb, NULL, __br_deliver, unicast); } /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2) + struct sk_buff *skb2, bool unicast) { - br_flood(br, skb, skb2, __br_forward); + br_flood(br, skb, skb2, __br_forward, unicast); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2c08911df578..5623be6b9ecd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING; + p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7e993667d4bf..1b8b8b824cd7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + bool unicast = true; u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) @@ -95,9 +96,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) + if (is_broadcast_ether_addr(dest)) { skb2 = skb; - else if (is_multicast_ether_addr(dest)) { + unicast = false; + } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || @@ -110,6 +112,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else skb2 = skb; + unicast = false; br->dev->stats.multicast++; } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { @@ -123,7 +126,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst->used = jiffies; br_forward(dst->dst, skb, skb2); } else - br_flood_forward(br, skb, skb2); + br_flood_forward(br, skb, skb2, unicast); } if (skb2) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ce902bf8a618..1fc30abd3a52 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -31,6 +31,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + 0; } @@ -58,7 +59,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || - nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING))) + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) return -EMSGSIZE; return 0; @@ -284,6 +286,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -332,6 +335,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 04d7f43508f7..3be89b3ce17b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -159,6 +159,7 @@ struct net_bridge_port #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 #define BR_LEARNING 0x00000020 +#define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -414,9 +415,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, + bool unicast); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2); + struct sk_buff *skb2, bool unicast); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 707f3628e9cd..2a2cdb756d51 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -159,6 +159,7 @@ BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -197,6 +198,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_bpdu_guard, &brport_attr_root_block, &brport_attr_learning, + &brport_attr_unicast_flood, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, -- cgit v1.3-14-g43fede From 45203a3b380cee28f570475c0d28c169f908c209 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 08:43:22 -0700 Subject: net_sched: add 64bit rate estimators struct gnet_stats_rate_est contains u32 fields, so the bytes per second field can wrap at 34360Mbit. Add a new gnet_stats_rate_est64 structure to get 64bit bps/pps fields, and switch the kernel to use this structure natively. This structure is dumped to user space as a new attribute : TCA_STATS_RATE_EST64 Old tc command will now display the capped bps (to 34360Mbit), instead of wrapped values, and updated tc command will display correct information. Old tc command output, after patch : eric:~# tc -s -d qd sh dev lo qdisc pfifo 8001: root refcnt 2 limit 1000p Sent 80868245400 bytes 1978837 pkt (dropped 0, overlimits 0 requeues 0) rate 34360Mbit 189696pps backlog 0b 0p requeues 0 This patch carefully reorganizes "struct Qdisc" layout to get optimal performance on SMP. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- include/net/gen_stats.h | 10 +++++----- include/net/netfilter/xt_rateest.h | 2 +- include/net/sch_generic.h | 13 +++++++------ include/uapi/linux/gen_stats.h | 11 +++++++++++ net/core/gen_estimator.c | 12 ++++++------ net/core/gen_stats.c | 22 +++++++++++++++++----- net/netfilter/xt_rateest.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_qfq.c | 2 +- 13 files changed, 54 insertions(+), 30 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/act_api.h b/include/net/act_api.h index 06ef7e926a66..b8ffac7b6bab 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -18,7 +18,7 @@ struct tcf_common { struct tcf_t tcfc_tm; struct gnet_stats_basic_packed tcfc_bstats; struct gnet_stats_queue tcfc_qstats; - struct gnet_stats_rate_est tcfc_rate_est; + struct gnet_stats_rate_est64 tcfc_rate_est; spinlock_t tcfc_lock; struct rcu_head tcfc_rcu; }; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index a79b6cfb02a8..cf8439ba4d11 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -30,7 +30,7 @@ extern int gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b); extern int gnet_stats_copy_rate_est(struct gnet_dump *d, const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est *r); + struct gnet_stats_rate_est64 *r); extern int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q); extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); @@ -38,13 +38,13 @@ extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); extern int gnet_stats_finish_copy(struct gnet_dump *d); extern int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt); extern void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est); + struct gnet_stats_rate_est64 *rate_est); extern int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt); extern bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est); + const struct gnet_stats_rate_est64 *rate_est); #endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 5a2978d1cb22..495c71f66e7e 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -6,7 +6,7 @@ struct xt_rateest { struct gnet_stats_basic_packed bstats; spinlock_t lock; /* keep rstats and lock on same cache line to speedup xt_rateest_mt() */ - struct gnet_stats_rate_est rstats; + struct gnet_stats_rate_est64 rstats; /* following fields not accessed in hot path */ struct hlist_node list; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e7f4e21cc3e1..df5676029827 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -58,14 +58,12 @@ struct Qdisc { * multiqueue device. */ #define TCQ_F_WARN_NONWC (1 << 16) - int padded; + u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; struct list_head list; u32 handle; u32 parent; - atomic_t refcnt; - struct gnet_stats_rate_est rate_est; int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); @@ -76,8 +74,9 @@ struct Qdisc { */ struct Qdisc *__parent; struct netdev_queue *dev_queue; - struct Qdisc *next_sched; + struct gnet_stats_rate_est64 rate_est; + struct Qdisc *next_sched; struct sk_buff *gso_skb; /* * For performance sake on SMP, we put highly modified fields at the end @@ -88,8 +87,10 @@ struct Qdisc { unsigned int __state; struct gnet_stats_queue qstats; struct rcu_head rcu_head; - spinlock_t busylock; - u32 limit; + int padded; + atomic_t refcnt; + + spinlock_t busylock ____cacheline_aligned_in_smp; }; static inline bool qdisc_is_running(const struct Qdisc *qdisc) diff --git a/include/uapi/linux/gen_stats.h b/include/uapi/linux/gen_stats.h index 552c8a0a12d1..6487317ea619 100644 --- a/include/uapi/linux/gen_stats.h +++ b/include/uapi/linux/gen_stats.h @@ -9,6 +9,7 @@ enum { TCA_STATS_RATE_EST, TCA_STATS_QUEUE, TCA_STATS_APP, + TCA_STATS_RATE_EST64, __TCA_STATS_MAX, }; #define TCA_STATS_MAX (__TCA_STATS_MAX - 1) @@ -37,6 +38,16 @@ struct gnet_stats_rate_est { __u32 pps; }; +/** + * struct gnet_stats_rate_est64 - rate estimator + * @bps: current byte rate + * @pps: current packet rate + */ +struct gnet_stats_rate_est64 { + __u64 bps; + __u64 pps; +}; + /** * struct gnet_stats_queue - queuing statistics * @qlen: queue length diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index d9d198aa9fed..6b5b6e7013ca 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -82,7 +82,7 @@ struct gen_estimator { struct list_head list; struct gnet_stats_basic_packed *bstats; - struct gnet_stats_rate_est *rate_est; + struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; u64 last_bytes; @@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est) static struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { struct rb_node *p = est_root.rb_node; @@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator); * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est64 *rate_est) { struct gen_estimator *e; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * Returns 0 on success or a negative error code. */ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { gen_kill_estimator(bstats, rate_est); @@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * Returns true if estimator is active, and false if not. */ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { bool res; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ddedf211e588..9d3d9e78397b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); int gnet_stats_copy_rate_est(struct gnet_dump *d, const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est *r) + struct gnet_stats_rate_est64 *r) { + struct gnet_stats_rate_est est; + int res; + if (b && !gen_estimator_active(b, r)) return 0; + est.bps = min_t(u64, UINT_MAX, r->bps); + /* we have some time before reaching 2^32 packets per second */ + est.pps = r->pps; + if (d->compat_tc_stats) { - d->tc_stats.bps = r->bps; - d->tc_stats.pps = r->pps; + d->tc_stats.bps = est.bps; + d->tc_stats.pps = est.pps; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); + if (d->tail) { + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + if (res < 0 || est.bps == r->bps) + return res; + /* emit 64bit stats only if needed */ + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + } return 0; } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ed0db15ab00e..7720b036d76a 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,7 +18,7 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est *r; + struct gnet_stats_rate_est64 *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba2..71a568862557 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -130,7 +130,7 @@ struct cbq_class { psched_time_t penalized; struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 759b308d1a8d..8302717ea303 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -25,7 +25,7 @@ struct drr_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct list_head alist; struct Qdisc *qdisc; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9facea03faeb..c4075610502c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -114,7 +114,7 @@ struct hfsc_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389c..162fb800754c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -78,7 +78,7 @@ struct htb_class { /* general class parameters */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..7c195d972bf0 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -138,7 +138,7 @@ struct qfq_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ -- cgit v1.3-14-g43fede From a0a2af765543a96bdf188ec57dd201e708195302 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 3 Jun 2013 18:10:45 +0200 Subject: nl80211: add kernel-doc for NL80211_FEATURE_ACTIVE_MONITOR Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5920715278c2..1f0019d4cce6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3579,6 +3579,10 @@ enum nl80211_ap_sme_features { * Peering Management entity which may be implemented by registering for * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is * still generated by the driver. + * @NL80211_FEATURE_ACTIVE_MONITOR: This driver supports an active monitor + * interface. An active monitor interface behaves like a normal monitor + * interface, but gets added to the driver. It ensures that incoming + * unicast packets directed at the configured interface address get ACKed. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, -- cgit v1.3-14-g43fede From 8e7c053853b7d299e8a2b8733659b0df8eee51f7 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Mon, 3 Jun 2013 09:53:39 -0700 Subject: {nl,cfg}80211: make peer link expiration time configurable If a STA has a peer that it hasn't seen any tx activity from for a certain length of time, the peer link is expired. This means the inactive STA is removed from the list of peers and that STA is not considered a peer again unless it re-peers. Previously, this inactivity time was always 30 minutes. Now, add it to the mesh configuration and allow it to be configured. Retain 30 minutes as a default value. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/mesh.c | 2 ++ net/wireless/nl80211.c | 8 +++++++- 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 855c76ccff38..31ca11672ca8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1124,6 +1124,9 @@ struct bss_parameters { * setting for new peer links. * @dot11MeshAwakeWindowDuration: The duration in TUs the STA will remain awake * after transmitting its beacon. + * @plink_timeout: If no tx activity is seen from a STA we've established + * peering with for longer than this time (in seconds), then remove it + * from the STA's list of peers. Default is 30 minutes. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1153,6 +1156,7 @@ struct mesh_config { u16 dot11MeshHWMPconfirmationInterval; enum nl80211_mesh_power_mode power_mode; u16 dot11MeshAwakeWindowDuration; + u32 plink_timeout; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1f0019d4cce6..ca6facf4df0c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2577,6 +2577,10 @@ enum nl80211_mesh_power_mode { * * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs) * + * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've + * established peering with for longer than this time (in seconds), then + * remove it from the STA's list of peers. Default is 30 minutes. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2608,6 +2612,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, NL80211_MESHCONF_POWER_MODE, NL80211_MESHCONF_AWAKE_WINDOW, + NL80211_MESHCONF_PLINK_TIMEOUT, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5dfb289ab761..0daaf72e1b81 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -18,6 +18,7 @@ #define MESH_PATH_TO_ROOT_TIMEOUT 6000 #define MESH_ROOT_INTERVAL 5000 #define MESH_ROOT_CONFIRMATION_INTERVAL 2000 +#define MESH_DEFAULT_PLINK_TIMEOUT 1800 /* timeout in seconds */ /* * Minimum interval between two consecutive PREQs originated by the same @@ -75,6 +76,7 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, .power_mode = NL80211_MESH_POWER_ACTIVE, .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, + .plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 88e820b73674..8aa83c04d4eb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4575,7 +4575,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, cur_params.power_mode) || nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, - cur_params.dot11MeshAwakeWindowDuration)) + cur_params.dot11MeshAwakeWindowDuration) || + nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, + cur_params.plink_timeout)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -4616,6 +4618,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, + [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, }; static const struct nla_policy @@ -4753,6 +4756,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff, + mask, NL80211_MESHCONF_PLINK_TIMEOUT, + nla_get_u32); if (mask_out) *mask_out = mask; -- cgit v1.3-14-g43fede From 274038f8c94c493e2977983e2aecb5f5f0778479 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 11 Jun 2013 14:41:24 +0400 Subject: tun: Report "persist" flag to userspace The TUN_PERSIST flag is not reported at all -- both TUNGETIFF, and sysfs "flags" attribute skip one. Knowing whether a device is persistent or not is critical for checkpoint-restore, thus I propose to add the read-only IFF_PERSIST one for this. Setting this new IFF_PERSIST is hardly possible, as TUNSETIFF doesn't check for unknown flags being zero and thus there can be trash. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +++ include/uapi/linux/if_tun.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 89776c592151..b90a73165d30 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1530,6 +1530,9 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_TAP_MQ) flags |= IFF_MULTI_QUEUE; + if (tun->flags & TUN_PERSIST) + flags |= IFF_PERSIST; + return flags; } diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 2835b85fd46d..82334f88967e 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -68,6 +68,8 @@ #define IFF_MULTI_QUEUE 0x0100 #define IFF_ATTACH_QUEUE 0x0200 #define IFF_DETACH_QUEUE 0x0400 +/* read-only flag */ +#define IFF_PERSIST 0x0800 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ -- cgit v1.3-14-g43fede From 9674da8759df0d6c0d24e1ede6e2a1acdef91e3c Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 29 Apr 2013 17:13:27 +0200 Subject: NFC: Add firmware upload netlink command As several NFC chipsets can have their firmwares upgraded and reflashed, this patchset adds a new netlink command to trigger that the driver loads or flashes a new firmware. This will allows userspace triggered firmware upgrade through netlink. The firmware name or hint is passed as a parameter, and the driver will eventually fetch the firmware binary through the request_firmware API. The cmd can only be executed when the nfc dev is not in use. Actual firmware loading/flashing is an asynchronous operation. Result of the operation shall send a new event up to user space through the nfc dev multicast socket. During operation, the nfc dev is not openable and thus not usable. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 2 ++ include/uapi/linux/nfc.h | 6 +++++ net/nfc/core.c | 46 +++++++++++++++++++++++++++++++++++ net/nfc/netlink.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 5 ++++ 5 files changed, 122 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 5eb80bb3cbb2..3563dbdcaaf2 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -70,6 +70,7 @@ struct nfc_ops { int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); int (*enable_se)(struct nfc_dev *dev, u32 secure_element); int (*disable_se)(struct nfc_dev *dev, u32 secure_element); + int (*fw_upload)(struct nfc_dev *dev, const char *firmware_name); }; #define NFC_TARGET_IDX_ANY -1 @@ -104,6 +105,7 @@ struct nfc_dev { int targets_generation; struct device dev; bool dev_up; + bool fw_upload_in_progress; u8 rf_mode; bool polling; struct nfc_target *active_target; diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 7c6f627a717d..b6cbd164f146 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -69,6 +69,8 @@ * starting a poll from a device which has a secure element enabled means * we want to do SE based card emulation. * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element. + * @NFC_CMD_FW_UPLOAD: Request to Load/flash firmware, or event to inform that + * some firmware was loaded */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -92,6 +94,7 @@ enum nfc_commands { NFC_CMD_DISABLE_SE, NFC_CMD_LLC_SDREQ, NFC_EVENT_LLC_SDRES, + NFC_CMD_FW_UPLOAD, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -121,6 +124,7 @@ enum nfc_commands { * @NFC_ATTR_LLC_PARAM_RW: Receive Window size parameter * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter * @NFC_ATTR_SE: Available Secure Elements + * @NFC_ATTR_FIRMWARE_NAME: Free format firmware version */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -143,6 +147,7 @@ enum nfc_attrs { NFC_ATTR_LLC_PARAM_MIUX, NFC_ATTR_SE, NFC_ATTR_LLC_SDP, + NFC_ATTR_FIRMWARE_NAME, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -162,6 +167,7 @@ enum nfc_sdp_attr { #define NFC_SENSB_RES_MAXSIZE 12 #define NFC_SENSF_RES_MAXSIZE 18 #define NFC_GB_MAXSIZE 48 +#define NFC_FIRMWARE_NAME_MAXSIZE 32 /* NFC protocols */ #define NFC_PROTO_JEWEL 1 diff --git a/net/nfc/core.c b/net/nfc/core.c index 40d2527693da..eb3cecf1764e 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,6 +44,47 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); +int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +{ + int rc = 0; + + pr_debug("%s do firmware %s\n", dev_name(&dev->dev), firmware_name); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dev_up) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->fw_upload) { + rc = -EOPNOTSUPP; + goto error; + } + + dev->fw_upload_in_progress = true; + rc = dev->ops->fw_upload(dev, firmware_name); + if (rc) + dev->fw_upload_in_progress = false; + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +{ + dev->fw_upload_in_progress = false; + + return nfc_genl_fw_upload_done(dev, firmware_name); +} +EXPORT_SYMBOL(nfc_fw_upload_done); + /** * nfc_dev_up - turn on the NFC device * @@ -69,6 +110,11 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } + if (dev->fw_upload_in_progress) { + rc = -EBUSY; + goto error; + } + if (dev->dev_up) { rc = -EALREADY; goto error; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f0c4d61f37c0..1deadad9a285 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -56,6 +56,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, + [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, + .len = NFC_FIRMWARE_NAME_MAXSIZE }, }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { @@ -1025,6 +1027,62 @@ exit: return rc; } +static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], + sizeof(firmware_name)); + + rc = nfc_fw_upload(dev, firmware_name); + + nfc_put_device(dev); + return rc; +} + +int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_CMD_FW_UPLOAD); + if (!hdr) + goto free_msg; + + if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || + nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1084,6 +1142,11 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_llc_sdreq, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_FW_UPLOAD, + .doit = nfc_genl_fw_upload, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index afa1f84ba040..cf0c48165996 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -120,6 +120,11 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } +int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); + +int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); + int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); -- cgit v1.3-14-g43fede From 1d8faf48c74b8329a0322dc4b2a2030ae5003c86 Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 13 Jun 2013 13:19:10 +0300 Subject: net/core: Add VF link state control Add netlink directives and ndo entry to allow for controling VF link, which can be in one of three states: Auto - VF link state reflects the PF link state (default) Up - VF link state is up, traffic from VF to VF works even if the actual PF link is down Down - VF link state is down, no traffic from/to this VF, can be of use while configuring the VF Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 3 +++ include/uapi/linux/if_link.h | 13 +++++++++++++ net/core/rtnetlink.c | 22 ++++++++++++++++++++-- 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c3f817c3eb45..a86784dec3d3 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -12,5 +12,6 @@ struct ifla_vf_info { __u32 qos; __u32 tx_rate; __u32 spoofchk; + __u32 linkstate; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8c9fcc42502a..09b4188c1ea7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -829,6 +829,7 @@ struct netdev_fcoe_hbainfo { * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); + * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); @@ -986,6 +987,8 @@ struct net_device_ops { int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *ivf); + int (*ndo_set_vf_link_state)(struct net_device *dev, + int vf, int link_state); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index da05a2698cb5..03f6170ab337 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -338,6 +338,7 @@ enum { IFLA_VF_VLAN, IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ + IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ __IFLA_VF_MAX, }; @@ -364,6 +365,18 @@ struct ifla_vf_spoofchk { __u32 setting; }; +enum { + IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ + IFLA_VF_LINK_STATE_ENABLE, /* link always up */ + IFLA_VF_LINK_STATE_DISABLE, /* link always down */ + __IFLA_VF_LINK_STATE_MAX, +}; + +struct ifla_vf_link_state { + __u32 vf; + __u32 link_state; +}; + /* VF ports management section * * Nested layout of set/get msg is: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 49c14451d8ab..9007533867f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -947,6 +947,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; + struct ifla_vf_link_state vf_linkstate; /* * Not all SR-IOV capable drivers support the @@ -956,18 +957,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); + /* The default value for VF link state is "auto" + * IFLA_VF_LINK_STATE_AUTO which equals zero + */ + ivi.linkstate = 0; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = - vf_spoofchk.vf = ivi.vf; + vf_spoofchk.vf = + vf_linkstate.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; vf_spoofchk.setting = ivi.spoofchk; + vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -978,7 +985,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk)) + &vf_spoofchk) || + nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), + &vf_linkstate)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1238,6 +1247,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivs->setting); break; } + case IFLA_VF_LINK_STATE: { + struct ifla_vf_link_state *ivl; + ivl = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + break; + } default: err = -EINVAL; break; -- cgit v1.3-14-g43fede From 322bce957e9b0e30ef7147dae0414ad8f3f558c8 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 27 May 2013 15:29:11 +0200 Subject: NFC: pn533: Copy NFCID2 through ATR_REQ When using NFC-F we should copy the NFCID2 buffer that we got from SENSF_RES through the ATR_REQ NFCID3 buffer. Not doing so violates NFC Forum digital requirement #189. Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 14 +++++++++++++- include/net/nfc/nfc.h | 2 ++ include/uapi/linux/nfc.h | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 6bd4f598b3e1..e196bdfcfc30 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1235,7 +1235,7 @@ static int pn533_target_found_type_a(struct nfc_target *nfc_tgt, u8 *tgt_data, struct pn533_target_felica { u8 pol_res; u8 opcode; - u8 nfcid2[8]; + u8 nfcid2[NFC_NFCID2_MAXSIZE]; u8 pad[8]; /* optional */ u8 syst_code[]; @@ -1275,6 +1275,9 @@ static int pn533_target_found_felica(struct nfc_target *nfc_tgt, u8 *tgt_data, memcpy(nfc_tgt->sensf_res, &tgt_felica->opcode, 9); nfc_tgt->sensf_res_len = 9; + memcpy(nfc_tgt->nfcid2, tgt_felica->nfcid2, NFC_NFCID2_MAXSIZE); + nfc_tgt->nfcid2_len = NFC_NFCID2_MAXSIZE; + return 0; } @@ -2084,6 +2087,9 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, if (comm_mode == NFC_COMM_PASSIVE) skb_len += PASSIVE_DATA_LEN; + if (target && target->nfcid2_len) + skb_len += NFC_NFCID3_MAXSIZE; + skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; @@ -2100,6 +2106,12 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, *next |= 1; } + if (target && target->nfcid2_len) { + memcpy(skb_put(skb, NFC_NFCID3_MAXSIZE), target->nfcid2, + target->nfcid2_len); + *next |= 2; + } + if (gb != NULL && gb_len > 0) { memcpy(skb_put(skb, gb_len), gb, gb_len); *next |= 4; /* We have some Gi */ diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 3563dbdcaaf2..8fc1784a264d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -84,6 +84,8 @@ struct nfc_target { u8 sel_res; u8 nfcid1_len; u8 nfcid1[NFC_NFCID1_MAXSIZE]; + u8 nfcid2_len; + u8 nfcid2[NFC_NFCID2_MAXSIZE]; u8 sensb_res_len; u8 sensb_res[NFC_SENSB_RES_MAXSIZE]; u8 sensf_res_len; diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index b6cbd164f146..fb304fb774cc 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -164,6 +164,8 @@ enum nfc_sdp_attr { #define NFC_DEVICE_NAME_MAXSIZE 8 #define NFC_NFCID1_MAXSIZE 10 +#define NFC_NFCID2_MAXSIZE 8 +#define NFC_NFCID3_MAXSIZE 10 #define NFC_SENSB_RES_MAXSIZE 12 #define NFC_SENSF_RES_MAXSIZE 18 #define NFC_GB_MAXSIZE 48 -- cgit v1.3-14-g43fede From fed7c25ec0d4894edfc36bbe5c5231e52f45483a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:28:38 +0200 Subject: NFC: Add secure elements addition and removal API This API will allow NFC drivers to add and remove the secure elements they know about or detect. Typically this should be called (asynchronously or not) from the driver or the host interface stack detect_se hook. Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 22 +++++++++++++++++++++- include/uapi/linux/nfc.h | 4 +++- net/nfc/core.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 5187ec70b66a..0e353f1658bb 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -97,6 +97,23 @@ struct nfc_target { u8 logical_idx; }; +/** + * nfc_se - A structure for NFC accessible secure elements. + * + * @idx: The secure element index. User space will enable or + * disable a secure element by its index. + * @type: The secure element type. It can be SE_UICC or + * SE_EMBEDDED. + * @state: The secure element state, either enabled or disabled. + * + */ +struct nfc_se { + struct list_head list; + u32 idx; + u16 type; + u16 state; +}; + struct nfc_genl_data { u32 poll_req_portid; struct mutex genl_data_mutex; @@ -118,7 +135,7 @@ struct nfc_dev { struct nfc_genl_data genl_data; u32 supported_protocols; - u32 active_se; + struct list_head secure_elements; int tx_headroom; int tx_tailroom; @@ -221,4 +238,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); void nfc_driver_failure(struct nfc_dev *dev, int err); +int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type); +int nfc_remove_se(struct nfc_dev *dev, u32 se_idx); + #endif /* __NET_NFC_H */ diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index fb304fb774cc..3a57cef0b986 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -199,10 +199,12 @@ enum nfc_sdp_attr { #define NFC_PROTO_ISO14443_B_MASK (1 << NFC_PROTO_ISO14443_B) /* NFC Secure Elements */ -#define NFC_SE_NONE 0x0 #define NFC_SE_UICC 0x1 #define NFC_SE_EMBEDDED 0x2 +#define NFC_SE_DISABLED 0x0 +#define NFC_SE_ENABLED 0x1 + struct sockaddr_nfc { sa_family_t sa_family; __u32 dev_idx; diff --git a/net/nfc/core.c b/net/nfc/core.c index a43a56d7f4be..dacadfbcacea 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -760,6 +760,49 @@ inline void nfc_driver_failure(struct nfc_dev *dev, int err) } EXPORT_SYMBOL(nfc_driver_failure); +int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct nfc_se *se, *n; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) + return -EALREADY; + + se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->idx = se_idx; + se->type = type; + se->state = NFC_SE_DISABLED; + INIT_LIST_HEAD(&se->list); + + list_add(&se->list, &dev->secure_elements); + + return 0; +} +EXPORT_SYMBOL(nfc_add_se); + +int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) { + list_del(&se->list); + kfree(se); + + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL(nfc_remove_se); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); @@ -856,9 +899,9 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; - dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; + INIT_LIST_HEAD(&dev->secure_elements); nfc_genl_data_init(&dev->genl_data); -- cgit v1.3-14-g43fede From 2757c3723c3d2b13e3a8bfaa034826f64e9cca43 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:47:37 +0200 Subject: NFC: Send netlink events for secure elements additions and removals When an NFC driver or host controller stack discovers a secure element, it will call nfc_add_se(). In order for userspace applications to use these secure elements, a netlink event will then be sent with the SE index and its type. With that information userspace applications can decide wether or not to enable SEs, through their indexes. Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 6 +++++ net/nfc/core.c | 14 +++++++++++ net/nfc/netlink.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 3 +++ 4 files changed, 86 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 3a57cef0b986..caed0f324d5f 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -95,6 +95,8 @@ enum nfc_commands { NFC_CMD_LLC_SDREQ, NFC_EVENT_LLC_SDRES, NFC_CMD_FW_UPLOAD, + NFC_EVENT_SE_ADDED, + NFC_EVENT_SE_REMOVED, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -125,6 +127,8 @@ enum nfc_commands { * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter * @NFC_ATTR_SE: Available Secure Elements * @NFC_ATTR_FIRMWARE_NAME: Free format firmware version + * @NFC_ATTR_SE_INDEX: Secure element index + * @NFC_ATTR_SE_TYPE: Secure element type (UICC or EMBEDDED) */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -148,6 +152,8 @@ enum nfc_attrs { NFC_ATTR_SE, NFC_ATTR_LLC_SDP, NFC_ATTR_FIRMWARE_NAME, + NFC_ATTR_SE_INDEX, + NFC_ATTR_SE_TYPE, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; diff --git a/net/nfc/core.c b/net/nfc/core.c index dacadfbcacea..bb5f16cfc201 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -763,6 +763,7 @@ EXPORT_SYMBOL(nfc_driver_failure); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) { struct nfc_se *se, *n; + int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); @@ -781,6 +782,14 @@ int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) list_add(&se->list, &dev->secure_elements); + rc = nfc_genl_se_added(dev, se_idx, type); + if (rc < 0) { + list_del(&se->list); + kfree(se); + + return rc; + } + return 0; } EXPORT_SYMBOL(nfc_add_se); @@ -788,11 +797,16 @@ EXPORT_SYMBOL(nfc_add_se); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) { struct nfc_se *se, *n; + int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); list_for_each_entry_safe(se, n, &dev->secure_elements, list) if (se->idx == se_idx) { + rc = nfc_genl_se_removed(dev, se_idx); + if (rc < 0) + return rc; + list_del(&se->list); kfree(se); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index fdbc662c564a..8a11a3a27e69 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -426,6 +426,69 @@ free_msg: return rc; } +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_ADDED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_REMOVED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index cf0c48165996..a6aeee094aa4 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -94,6 +94,9 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev); int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) -- cgit v1.3-14-g43fede From 45bfa52e36ef016b789eca73c7847db3f7b4742d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 12 Jun 2013 15:57:10 -0700 Subject: openvswitch: Fix struct comment. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 405918dd7b3f..424672db7f12 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -192,7 +192,6 @@ enum ovs_vport_type { * optional; if not specified a free port number is automatically selected. * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type * of vport. - * and other attributes are ignored. * * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to * look up the vport to operate on; otherwise dp_idx from the &struct -- cgit v1.3-14-g43fede From 8941bbcd572a8860ad03c76e2f3d1dafa820b842 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:36 -0400 Subject: tipc: update code comments to reflect new uapi header path Files tipc.h and tipc_config.h were moved to uapi directory, but the corresponding comments were not updated at the same time. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 2 +- include/uapi/linux/tipc_config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index f2d90091cc20..852373d27dbb 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -1,5 +1,5 @@ /* - * include/linux/tipc.h: Include file for TIPC socket interface + * include/uapi/linux/tipc.h: Header for TIPC socket interface * * Copyright (c) 2003-2006, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 0b1e3f218a36..6b0bff09b3a7 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -1,5 +1,5 @@ /* - * include/linux/tipc_config.h: Include file for TIPC configuration interface + * include/uapi/linux/tipc_config.h: Header for TIPC configuration interface * * Copyright (c) 2003-2006, Ericsson AB * Copyright (c) 2005-2007, 2010-2011, Wind River Systems -- cgit v1.3-14-g43fede From 2f301ab29e4656af824592363039d8f6bd5a9f68 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:28 +0200 Subject: nl80211/cfg80211: add 5 and 10 MHz defines and wiphy flag Add defines for 5 and 10 MHz channel width and fix channel handling functions accordingly. Also check for and report the WIPHY_FLAG_SUPPORTS_5_10_MHZ capability. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer [fix spelling in comment] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/chan.c | 57 +++++++++++++++++++++++++++++++++++++------- net/wireless/nl80211.c | 21 ++++++++++++---- 4 files changed, 72 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6a43c34ce96f..316f34bb8e6e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2342,6 +2342,7 @@ struct cfg80211_ops { * responds to probe-requests in hardware. * @WIPHY_FLAG_OFFCHAN_TX: Device supports direct off-channel TX. * @WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL: Device supports remain-on-channel call. + * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. */ enum wiphy_flags { WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), @@ -2365,6 +2366,7 @@ enum wiphy_flags { WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD = BIT(19), WIPHY_FLAG_OFFCHAN_TX = BIT(20), WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL = BIT(21), + WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ca6facf4df0c..861e5eba3953 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2758,6 +2758,8 @@ enum nl80211_channel_type { * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 * attribute must be provided as well + * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel + * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel */ enum nl80211_chan_width { NL80211_CHAN_WIDTH_20_NOHT, @@ -2766,6 +2768,8 @@ enum nl80211_chan_width { NL80211_CHAN_WIDTH_80, NL80211_CHAN_WIDTH_80P80, NL80211_CHAN_WIDTH_160, + NL80211_CHAN_WIDTH_5, + NL80211_CHAN_WIDTH_10, }; /** diff --git a/net/wireless/chan.c b/net/wireless/chan.c index fd556ac05fdb..50f6195c8b70 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -54,6 +54,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: if (chandef->center_freq1 != control_freq) @@ -152,6 +154,12 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) int width; switch (c->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: width = 20; @@ -194,6 +202,16 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, if (c1->width == c2->width) return NULL; + /* + * can't be compatible if one of them is 5 or 10 MHz, + * but they don't have the same width. + */ + if (c1->width == NL80211_CHAN_WIDTH_5 || + c1->width == NL80211_CHAN_WIDTH_10 || + c2->width == NL80211_CHAN_WIDTH_5 || + c2->width == NL80211_CHAN_WIDTH_10) + return NULL; + if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || c1->width == NL80211_CHAN_WIDTH_20) return c2; @@ -264,11 +282,17 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, u32 bandwidth) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return -EINVAL; @@ -310,11 +334,17 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 prohibited_flags) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return false; @@ -349,6 +379,12 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; @@ -405,6 +441,11 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + /* 5 and 10 MHz are only defined for the OFDM PHY */ + if (width < 20) + prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + + if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1, width, prohibited_flags)) return false; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1c4f7daea6c7..4ab1ffa9df11 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1188,6 +1188,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + goto nla_put_failure; (*split_start)++; if (split) @@ -1731,6 +1734,11 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, IEEE80211_CHAN_DISABLED)) return -EINVAL; + if ((chandef->width == NL80211_CHAN_WIDTH_5 || + chandef->width == NL80211_CHAN_WIDTH_10) && + !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + return -EINVAL; + return 0; } @@ -6280,11 +6288,16 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) return -EINVAL; - if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) - return -EINVAL; - if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && - !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + switch (ibss.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + break; + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS) + break; + default: return -EINVAL; + } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; -- cgit v1.3-14-g43fede From 7d5437c709ded4f152cb8b305d17972d6707f20c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:18 -0700 Subject: openvswitch: Add tunneling interface. Add ovs tunnel interface for set tunnel action for userspace. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 18 +++++ net/openvswitch/actions.c | 4 ++ net/openvswitch/datapath.c | 78 +++++++++++++++++++++- net/openvswitch/datapath.h | 3 + net/openvswitch/flow.c | 125 +++++++++++++++++++++++++++++++++++ net/openvswitch/flow.h | 19 ++++++ net/openvswitch/vport-internal_dev.c | 2 +- net/openvswitch/vport-netdev.c | 2 +- net/openvswitch/vport.c | 4 +- net/openvswitch/vport.h | 3 +- 10 files changed, 251 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 424672db7f12..b15a445927d6 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -246,11 +246,29 @@ enum ovs_key_attr { OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ + OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ + +#ifdef __KERNEL__ + OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ +#endif __OVS_KEY_ATTR_MAX }; #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) +enum ovs_tunnel_key_attr { + OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ + OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ + OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ + OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */ + OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ + OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ + OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + __OVS_TUNNEL_KEY_ATTR_MAX +}; + +#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1) + /** * enum ovs_frag_type - IPv4 and IPv6 fragment type * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 596d6373399d..22c5f399f1cf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -436,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb, skb->mark = nla_get_u32(nested_attr); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key = nla_data(nested_attr); + break; + case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f14816b80b80..bbd310646bc8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, static size_t key_attr_size(void) { return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ @@ -600,8 +608,30 @@ static int validate_tp_port(const struct sw_flow_key *flow_key) return -EINVAL; } +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct ovs_key_ipv4_tunnel tun_key; + int err, start; + + err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key) + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -611,18 +641,27 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - nla_len(ovs_key) != ovs_key_lens[key_type]) + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) return -EINVAL; switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; + int err; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; @@ -771,7 +810,7 @@ static int validate_and_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SET: - err = validate_set(a, key); + err = validate_set(a, key, sfa, &skip_copy); if (err) return err; break; @@ -993,6 +1032,33 @@ static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) return err; } +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -1002,6 +1068,12 @@ static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *s int type = nla_type(a); switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 16b840695216..e88ebc2f1c54 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,9 +88,12 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the + * packet is not being tunneled. */ struct ovs_skb_cb { struct sw_flow *flow; + struct ovs_key_ipv4_tunnel *tun_key; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 940d4b803ff5..976a8b766a6a 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -603,6 +604,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; + if (OVS_CB(skb)->tun_key) + memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); key->phy.in_port = in_port; key->phy.skb_mark = skb->mark; @@ -818,6 +821,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, }; static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, @@ -955,6 +959,105 @@ static int parse_flow_nlattrs(const struct nlattr *attr, return 0; } +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *a; + int rem; + bool ttl = false; + + memset(tun_key, 0, sizeof(*tun_key)); + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX || + ovs_tunnel_key_lens[type] != nla_len(a)) + return -EINVAL; + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + tun_key->tun_id = nla_get_be64(a); + tun_key->tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tun_key->ipv4_src = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tun_key->ipv4_dst = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tun_key->ipv4_tos = nla_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tun_key->ipv4_ttl = nla_get_u8(a); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_key->tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + + } + } + if (rem > 0) + return -EINVAL; + + if (!tun_key->ipv4_dst) + return -EINVAL; + + if (!ttl) + return -EINVAL; + + return 0; +} + +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) + return -EMSGSIZE; + if (tun_key->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) + return -EMSGSIZE; + if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) + return -EMSGSIZE; + if (tun_key->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + /** * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. * @swkey: receives the extracted flow key. @@ -997,6 +1100,14 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); } + if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { + err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); + if (err) + return err; + + attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); + } + /* Data attributes. */ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) return -EINVAL; @@ -1135,17 +1246,21 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr) { + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; const struct nlattr *nla; int rem; flow->key.phy.in_port = DP_MAX_PORTS; flow->key.phy.priority = 0; flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { + int err; + if (nla_len(nla) != ovs_key_lens[type]) return -EINVAL; @@ -1154,6 +1269,12 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, flow->key.phy.priority = nla_get_u32(nla); break; + case OVS_KEY_ATTR_TUNNEL: + err = ovs_ipv4_tun_from_nlattr(nla, tun_key); + if (err) + return err; + break; + case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; @@ -1180,6 +1301,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; + if (swkey->tun_key.ipv4_dst && + ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) + goto nla_put_failure; + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index e370f6246ee9..aec5e43f690b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -40,7 +40,22 @@ struct sw_flow_actions { struct nlattr actions[]; }; +/* Used to memset ovs_key_ipv4_tunnel padding. */ +#define OVS_TUNNEL_KEY_SIZE \ + (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __be32 ipv4_src; + __be32 ipv4_dst; + u16 tun_flags; + u8 ipv4_tos; + u8 ipv4_ttl; +}; + struct sw_flow_key { + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ @@ -179,5 +194,9 @@ u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key); +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key); #endif /* flow.h */ diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index e284c7e1fec4..98d3edbbc235 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { rcu_read_lock(); - ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); + ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); return 0; } diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 40de815b4213..5982f3f62835 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -51,7 +51,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - ovs_vport_receive(vport, skb); + ovs_vport_receive(vport, skb, NULL); return; error: diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 176d449351eb..413287a1877f 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -325,7 +325,8 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ -void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) +void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, + struct ovs_key_ipv4_tunnel *tun_key) { struct pcpu_tstats *stats; @@ -335,6 +336,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); + OVS_CB(skb)->tun_key = tun_key; ovs_dp_process_received_packet(vport, skb); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 293278c4c2df..2d961aedd71d 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -184,7 +184,8 @@ static inline struct vport *vport_from_priv(const void *priv) return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } -void ovs_vport_receive(struct vport *, struct sk_buff *); +void ovs_vport_receive(struct vport *, struct sk_buff *, + struct ovs_key_ipv4_tunnel *); void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also -- cgit v1.3-14-g43fede From aa310701e787087dbfbccf1409982a96e16c57a6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:33 -0700 Subject: openvswitch: Add gre tunnel support. Add gre vport implementation. Most of gre protocol processing is pushed to gre module. It make use of gre demultiplexer therefore it can co-exist with linux device based gre tunnels. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + net/openvswitch/Kconfig | 2 + net/openvswitch/Makefile | 3 +- net/openvswitch/datapath.h | 1 + net/openvswitch/flow.h | 18 ++- net/openvswitch/vport-gre.c | 274 +++++++++++++++++++++++++++++++++++++++ net/openvswitch/vport.c | 19 +++ net/openvswitch/vport.h | 7 + 8 files changed, 323 insertions(+), 2 deletions(-) create mode 100644 net/openvswitch/vport-gre.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index b15a445927d6..c55efaaa9bb4 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -164,6 +164,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_UNSPEC, OVS_VPORT_TYPE_NETDEV, /* network device */ OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ + OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ __OVS_VPORT_TYPE_MAX }; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d9ea33c361be..9fbc04a31ed6 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -19,6 +19,8 @@ config OPENVSWITCH which is able to accept configuration from a variety of sources and translate it into packet processing rules. + Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. + See http://openvswitch.org for more information and userspace utilities. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 15e7384745c1..01bddb2991e3 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -10,5 +10,6 @@ openvswitch-y := \ dp_notify.o \ flow.o \ vport.o \ + vport-gre.o \ vport-internal_dev.o \ - vport-netdev.o \ + vport-netdev.o diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index e88ebc2f1c54..a91486484916 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -122,6 +122,7 @@ struct dp_upcall_info { struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; + struct vport_net vport_net; }; extern int ovs_net_id; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 999842f247a0..66ef7220293e 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -49,11 +49,27 @@ struct ovs_key_ipv4_tunnel { __be64 tun_id; __be32 ipv4_src; __be32 ipv4_dst; - u16 tun_flags; + __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; }; +static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, + const struct iphdr *iph, __be64 tun_id, + __be16 tun_flags) +{ + tun_key->tun_id = tun_id; + tun_key->ipv4_src = iph->saddr; + tun_key->ipv4_dst = iph->daddr; + tun_key->ipv4_tos = iph->tos; + tun_key->ipv4_ttl = iph->ttl; + tun_key->tun_flags = tun_flags; + + /* clear struct padding. */ + memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, + sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); +} + struct sw_flow_key { struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c new file mode 100644 index 000000000000..3a8d1900aa78 --- /dev/null +++ b/net/openvswitch/vport-gre.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifdef CONFIG_NET_IPGRE_DEMUX +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vport.h" + +/* Returns the least-significant 32 bits of a __be64. */ +static __be32 be64_get_low32(__be64 x) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)x; +#else + return (__force __be32)((__force u64)x >> 32); +#endif +} + +static __be16 filter_tnl_flags(__be16 flags) +{ + return flags & (TUNNEL_CSUM | TUNNEL_KEY); +} + +static struct sk_buff *__build_header(struct sk_buff *skb, + int tunnel_hlen) +{ + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; + struct tnl_ptk_info tpi; + + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); + if (IS_ERR(skb)) + return NULL; + + tpi.flags = filter_tnl_flags(tun_key->tun_flags); + tpi.proto = htons(ETH_P_TEB); + tpi.key = be64_get_low32(tun_key->tun_id); + tpi.seq = 0; + gre_build_header(skb, &tpi, tunnel_hlen); + + return skb; +} + +static __be64 key_to_tunnel_id(__be32 key, __be32 seq) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)((__force u64)seq << 32 | (__force u32)key); +#else + return (__force __be64)((__force u64)key << 32 | (__force u32)seq); +#endif +} + +/* Called with rcu_read_lock and BH disabled. */ +static int gre_rcv(struct sk_buff *skb, + const struct tnl_ptk_info *tpi) +{ + struct ovs_key_ipv4_tunnel tun_key; + struct ovs_net *ovs_net; + struct vport *vport; + __be64 key; + + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + if (unlikely(!vport)) + return PACKET_REJECT; + + key = key_to_tunnel_id(tpi->key, tpi->seq); + ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, + filter_tnl_flags(tpi->flags)); + + ovs_vport_receive(vport, skb, &tun_key); + return PACKET_RCVD; +} + +static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct flowi4 fl; + struct rtable *rt; + int min_headroom; + int tunnel_hlen; + __be16 df; + int err; + + if (unlikely(!OVS_CB(skb)->tun_key)) { + err = -EINVAL; + goto error; + } + + /* Route lookup */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst; + fl.saddr = OVS_CB(skb)->tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + tunnel_hlen + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, + skb->vlan_proto, + vlan_tx_tag_get(skb)))) { + err = -ENOMEM; + goto err_free_rt; + } + skb->vlan_tci = 0; + } + + /* Push Tunnel header. */ + skb = __build_header(skb, tunnel_hlen); + if (unlikely(!skb)) { + err = 0; + goto err_free_rt; + } + + df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? + htons(IP_DF) : 0; + + skb->local_df = 1; + + return iptunnel_xmit(net, rt, skb, fl.saddr, + OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, + OVS_CB(skb)->tun_key->ipv4_tos, + OVS_CB(skb)->tun_key->ipv4_ttl, df); +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + +static struct gre_cisco_protocol gre_protocol = { + .handler = gre_rcv, + .priority = 1, +}; + +static int gre_ports; +static int gre_init(void) +{ + int err; + + gre_ports++; + if (gre_ports > 1) + return 0; + + err = gre_cisco_register(&gre_protocol); + if (err) + pr_warn("cannot register gre protocol handler\n"); + + return err; +} + +static void gre_exit(void) +{ + gre_ports--; + if (gre_ports > 0) + return; + + gre_cisco_unregister(&gre_protocol); +} + +static const char *gre_get_name(const struct vport *vport) +{ + return vport_priv(vport); +} + +static struct vport *gre_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct ovs_net *ovs_net; + struct vport *vport; + int err; + + err = gre_init(); + if (err) + return ERR_PTR(err); + + ovs_net = net_generic(net, ovs_net_id); + if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { + vport = ERR_PTR(-EEXIST); + goto error; + } + + vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); + if (IS_ERR(vport)) + goto error; + + strncpy(vport_priv(vport), parms->name, IFNAMSIZ); + rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); + return vport; + +error: + gre_exit(); + return vport; +} + +static void gre_tnl_destroy(struct vport *vport) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct ovs_net *ovs_net; + + ovs_net = net_generic(net, ovs_net_id); + + rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + ovs_vport_deferred_free(vport); + gre_exit(); +} + +const struct vport_ops ovs_gre_vport_ops = { + .type = OVS_VPORT_TYPE_GRE, + .create = gre_create, + .destroy = gre_tnl_destroy, + .get_name = gre_get_name, + .send = gre_tnl_send, +}; +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 413287a1877f..f52dfb9cb5a7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -38,6 +38,10 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, + +#ifdef CONFIG_NET_IPGRE_DEMUX + &ovs_gre_vport_ops, +#endif }; /* Protected by RCU read lock for reading, ovs_mutex for writing. */ @@ -404,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) spin_unlock(&vport->stats_lock); } + +static void free_vport_rcu(struct rcu_head *rcu) +{ + struct vport *vport = container_of(rcu, struct vport, rcu); + + ovs_vport_free(vport); +} + +void ovs_vport_deferred_free(struct vport *vport) +{ + if (!vport) + return; + + call_rcu(&vport->rcu, free_vport_rcu); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 2d961aedd71d..376045c42f8b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -34,6 +34,11 @@ struct vport_parms; /* The following definitions are for users of the vport subsytem: */ +/* The following definitions are for users of the vport subsytem: */ +struct vport_net { + struct vport __rcu *gre_vport; +}; + int ovs_vport_init(void); void ovs_vport_exit(void); @@ -152,6 +157,7 @@ enum vport_err_type { struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); +void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 @@ -192,6 +198,7 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); * add yours to the list at the top of vport.c. */ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +extern const struct vport_ops ovs_gre_vport_ops; static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) -- cgit v1.3-14-g43fede From bcefe17cffd06efdda3e7ad679ea743236e6271a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 15 Jun 2013 09:39:18 +0800 Subject: tcp: introduce a per-route knob for quick ack In previous discussions, I tried to find some reasonable heuristics for delayed ACK, however this seems not possible, according to Eric: "ACKS might also be delayed because of bidirectional traffic, and is more controlled by the application response time. TCP stack can not easily estimate it." "ACK can be incredibly useful to recover from losses in a short time. The vast majority of TCP sessions are small lived, and we send one ACK per received segment anyway at beginning or retransmits to let the sender smoothly increase its cwnd, so an auto-tuning facility wont help them that much." and according to David: "ACKs are the only information we have to detect loss. And, for the same reasons that TCP VEGAS is fundamentally broken, we cannot measure the pipe or some other receiver-side-visible piece of information to determine when it's "safe" to stretch ACK. And even if it's "safe", we should not do it so that losses are accurately detected and we don't spuriously retransmit. The only way to know when the bandwidth increases is to "test" it, by sending more and more packets until drops happen. That's why all successful congestion control algorithms must operate on explicited tested pieces of information. Similarly, it's not really possible to universally know if it's safe to stretch ACK or not." It still makes sense to enable or disable quick ack mode like what TCP_QUICK_ACK does. Similar to TCP_QUICK_ACK option, but for people who can't modify the source code and still wants to control TCP delayed ACK behavior. As David suggested, this should belong to per-path scope, since different pathes may want different behaviors. Cc: Eric Dumazet Cc: Rick Jones Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf CC: David Laight Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ net/ipv4/tcp_input.c | 5 ++++- net/ipv4/tcp_output.c | 6 ++++-- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a2144e1afae..eb0f1a554d7b 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -386,6 +386,8 @@ enum { #define RTAX_RTO_MIN RTAX_RTO_MIN RTAX_INITRWND, #define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK __RTAX_MAX }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 46271cdcf088..28af45abe062 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3717,6 +3717,7 @@ void tcp_reset(struct sock *sk) static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst; inet_csk_schedule_ack(sk); @@ -3728,7 +3729,9 @@ static void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; + dst = __sk_dst_get(sk); + if (!dst || !dst_metric(dst, RTAX_QUICKACK)) + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2c1333ee318..3d609490f118 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -160,6 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; + const struct dst_entry *dst = __sk_dst_get(sk); if (sysctl_tcp_slow_start_after_idle && (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) @@ -170,8 +171,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp, /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - icsk->icsk_ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato && + (!dst || !dst_metric(dst, RTAX_QUICKACK))) + icsk->icsk_ack.pingpong = 1; } /* Account for an ACK we sent. */ -- cgit v1.3-14-g43fede From 681f130f39e10087475383e6771b9366e26bab0c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jun 2013 05:52:22 -0700 Subject: netfilter: xt_socket: add XT_SOCKET_NOWILDCARD flag xt_socket module can be a nice replacement to conntrack module in some cases (SYN filtering for example) But it lacks the ability to match the 3rd packet of TCP handshake (ACK coming from the client). Add a XT_SOCKET_NOWILDCARD flag to disable the wildcard mechanism. The wildcard is the legacy socket match behavior, that ignores LISTEN sockets bound to INADDR_ANY (or ipv6 equivalent) iptables -I INPUT -p tcp --syn -j SYN_CHAIN iptables -I INPUT -m socket --nowildcard -j ACCEPT Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_socket.h | 7 ++++ net/netfilter/xt_socket.c | 70 ++++++++++++++++++++++++++++---- 2 files changed, 69 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index 26d7217bd4f1..6315e2ac3474 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -5,10 +5,17 @@ enum { XT_SOCKET_TRANSPARENT = 1 << 0, + XT_SOCKET_NOWILDCARD = 1 << 1, }; struct xt_socket_mtinfo1 { __u8 flags; }; +#define XT_SOCKET_FLAGS_V1 XT_SOCKET_TRANSPARENT + +struct xt_socket_mtinfo2 { + __u8 flags; +}; +#define XT_SOCKET_FLAGS_V2 (XT_SOCKET_TRANSPARENT | XT_SOCKET_NOWILDCARD) #endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 02704245710e..f8b71911037a 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -163,8 +163,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY, + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, @@ -197,7 +200,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) } static bool -socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -259,7 +262,7 @@ extract_icmp6_fields(const struct sk_buff *skb, } static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -302,8 +305,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); /* Ignore non-transparent sockets, @@ -331,6 +337,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_v1_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V1) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + return -EINVAL; + } + return 0; +} + +static int socket_mt_v2_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V2) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -345,7 +373,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), @@ -356,7 +385,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v1_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v2_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), -- cgit v1.3-14-g43fede From b71c99801e18eb172ae34851daf25044a3bf644a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 26 May 2013 10:03:01 -0300 Subject: [media] v4l2-core: remove support for obsolete VIDIOC_DBG_G_CHIP_IDENT This has been replaced by the new and much better VIDIOC_DBG_G_CHIP_INFO. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 1 - drivers/media/v4l2-core/v4l2-dev.c | 1 - drivers/media/v4l2-core/v4l2-ioctl.c | 28 -- include/media/v4l2-chip-ident.h | 354 -------------------------- include/media/v4l2-ioctl.h | 2 - include/media/v4l2-subdev.h | 4 +- include/uapi/linux/videodev2.h | 17 +- 7 files changed, 4 insertions(+), 403 deletions(-) delete mode 100644 include/media/v4l2-chip-ident.h (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index f1295519f285..8f7a6a454a4c 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1074,7 +1074,6 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) case VIDIOC_TRY_DECODER_CMD: case VIDIOC_DBG_S_REGISTER: case VIDIOC_DBG_G_REGISTER: - case VIDIOC_DBG_G_CHIP_IDENT: case VIDIOC_S_HW_FREQ_SEEK: case VIDIOC_S_DV_TIMINGS: case VIDIOC_G_DV_TIMINGS: diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 2f3fac5345db..0c061e16502e 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -596,7 +596,6 @@ static void determine_valid_ioctls(struct video_device *vdev) set_bit(_IOC_NR(VIDIOC_DBG_G_REGISTER), valid_ioctls); set_bit(_IOC_NR(VIDIOC_DBG_S_REGISTER), valid_ioctls); #endif - SET_VALID_IOCTL(ops, VIDIOC_DBG_G_CHIP_IDENT, vidioc_g_chip_ident); /* yes, really vidioc_subscribe_event */ SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event); SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 423cbf948600..c9d9f01d21bc 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -26,7 +26,6 @@ #include #include #include -#include #include /* Zero out the end of the struct pointed to by p. Everything after, but @@ -619,20 +618,6 @@ static void v4l_print_decoder_cmd(const void *arg, bool write_only) pr_info("pts=%llu\n", p->stop.pts); } -static void v4l_print_dbg_chip_ident(const void *arg, bool write_only) -{ - const struct v4l2_dbg_chip_ident *p = arg; - - pr_cont("type=%u, ", p->match.type); - if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER) - pr_cont("name=%.*s, ", - (int)sizeof(p->match.name), p->match.name); - else - pr_cont("addr=%u, ", p->match.addr); - pr_cont("chip_ident=%u, revision=0x%x\n", - p->ident, p->revision); -} - static void v4l_print_dbg_chip_info(const void *arg, bool write_only) { const struct v4l2_dbg_chip_info *p = arg; @@ -1813,18 +1798,6 @@ static int v4l_dbg_s_register(const struct v4l2_ioctl_ops *ops, #endif } -static int v4l_dbg_g_chip_ident(const struct v4l2_ioctl_ops *ops, - struct file *file, void *fh, void *arg) -{ - struct v4l2_dbg_chip_ident *p = arg; - - p->ident = V4L2_IDENT_NONE; - p->revision = 0; - if (p->match.type == V4L2_CHIP_MATCH_SUBDEV) - return -EINVAL; - return ops->vidioc_g_chip_ident(file, fh, p); -} - static int v4l_dbg_g_chip_info(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { @@ -2074,7 +2047,6 @@ static struct v4l2_ioctl_info v4l2_ioctls[] = { IOCTL_INFO_STD(VIDIOC_TRY_DECODER_CMD, vidioc_try_decoder_cmd, v4l_print_decoder_cmd, 0), IOCTL_INFO_FNC(VIDIOC_DBG_S_REGISTER, v4l_dbg_s_register, v4l_print_dbg_register, 0), IOCTL_INFO_FNC(VIDIOC_DBG_G_REGISTER, v4l_dbg_g_register, v4l_print_dbg_register, 0), - IOCTL_INFO_FNC(VIDIOC_DBG_G_CHIP_IDENT, v4l_dbg_g_chip_ident, v4l_print_dbg_chip_ident, 0), IOCTL_INFO_FNC(VIDIOC_S_HW_FREQ_SEEK, v4l_s_hw_freq_seek, v4l_print_hw_freq_seek, INFO_FL_PRIO), IOCTL_INFO_STD(VIDIOC_S_DV_TIMINGS, vidioc_s_dv_timings, v4l_print_dv_timings, INFO_FL_PRIO), IOCTL_INFO_STD(VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings, v4l_print_dv_timings, 0), diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h deleted file mode 100644 index 543f89c18896..000000000000 --- a/include/media/v4l2-chip-ident.h +++ /dev/null @@ -1,354 +0,0 @@ -/* - v4l2 chip identifiers header - - This header provides a list of chip identifiers that can be returned - through the VIDIOC_DBG_G_CHIP_IDENT ioctl. - - Copyright (C) 2007 Hans Verkuil - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef V4L2_CHIP_IDENT_H_ -#define V4L2_CHIP_IDENT_H_ - -/* VIDIOC_DBG_G_CHIP_IDENT: identifies the actual chip installed on the board */ - -/* KEEP THIS LIST ORDERED BY ID! - Otherwise it will be hard to see which ranges are already in use when - adding support to a new chip family. */ -enum { - /* general idents: reserved range 0-49 */ - V4L2_IDENT_NONE = 0, /* No chip matched */ - V4L2_IDENT_AMBIGUOUS = 1, /* Match too general, multiple chips matched */ - V4L2_IDENT_UNKNOWN = 2, /* Chip found, but cannot identify */ - - /* module tvaudio: reserved range 50-99 */ - V4L2_IDENT_TVAUDIO = 50, /* A tvaudio chip, unknown which it is exactly */ - - /* Sony IMX074 */ - V4L2_IDENT_IMX074 = 74, - - /* module saa7110: just ident 100 */ - V4L2_IDENT_SAA7110 = 100, - - /* module saa7115: reserved range 101-149 */ - V4L2_IDENT_SAA7111 = 101, - V4L2_IDENT_SAA7111A = 102, - V4L2_IDENT_SAA7113 = 103, - V4L2_IDENT_SAA7114 = 104, - V4L2_IDENT_SAA7115 = 105, - V4L2_IDENT_SAA7118 = 108, - - V4L2_IDENT_GM7113C = 140, - - /* module saa7127: reserved range 150-199 */ - V4L2_IDENT_SAA7127 = 157, - V4L2_IDENT_SAA7129 = 159, - - /* module cx25840: reserved range 200-249 */ - V4L2_IDENT_CX25836 = 236, - V4L2_IDENT_CX25837 = 237, - V4L2_IDENT_CX25840 = 240, - V4L2_IDENT_CX25841 = 241, - V4L2_IDENT_CX25842 = 242, - V4L2_IDENT_CX25843 = 243, - - /* OmniVision sensors: reserved range 250-299 */ - V4L2_IDENT_OV7670 = 250, - V4L2_IDENT_OV7720 = 251, - V4L2_IDENT_OV7725 = 252, - V4L2_IDENT_OV7660 = 253, - V4L2_IDENT_OV9650 = 254, - V4L2_IDENT_OV9655 = 255, - V4L2_IDENT_SOI968 = 256, - V4L2_IDENT_OV9640 = 257, - V4L2_IDENT_OV6650 = 258, - V4L2_IDENT_OV2640 = 259, - V4L2_IDENT_OV9740 = 260, - V4L2_IDENT_OV5642 = 261, - - /* module saa7146: reserved range 300-309 */ - V4L2_IDENT_SAA7146 = 300, - - /* Conexant MPEG encoder/decoders: reserved range 400-420 */ - V4L2_IDENT_CX23418_843 = 403, /* Integrated A/V Decoder on the '418 */ - V4L2_IDENT_CX23415 = 415, - V4L2_IDENT_CX23416 = 416, - V4L2_IDENT_CX23417 = 417, - V4L2_IDENT_CX23418 = 418, - - /* module bt819: reserved range 810-819 */ - V4L2_IDENT_BT815A = 815, - V4L2_IDENT_BT817A = 817, - V4L2_IDENT_BT819A = 819, - - /* module au0828 */ - V4L2_IDENT_AU0828 = 828, - - /* module bttv: ident 848 + 849 */ - V4L2_IDENT_BT848 = 848, - V4L2_IDENT_BT849 = 849, - - /* module bt856: just ident 856 */ - V4L2_IDENT_BT856 = 856, - - /* module bt866: just ident 866 */ - V4L2_IDENT_BT866 = 866, - - /* module bttv: ident 878 + 879 */ - V4L2_IDENT_BT878 = 878, - V4L2_IDENT_BT879 = 879, - - /* module ks0127: reserved range 1120-1129 */ - V4L2_IDENT_KS0122S = 1122, - V4L2_IDENT_KS0127 = 1127, - V4L2_IDENT_KS0127B = 1128, - - /* module indycam: just ident 2000 */ - V4L2_IDENT_INDYCAM = 2000, - - /* module vp27smpx: just ident 2700 */ - V4L2_IDENT_VP27SMPX = 2700, - - /* module vpx3220: reserved range: 3210-3229 */ - V4L2_IDENT_VPX3214C = 3214, - V4L2_IDENT_VPX3216B = 3216, - V4L2_IDENT_VPX3220A = 3220, - - /* VX855 just ident 3409 */ - /* Other via devs could use 3314, 3324, 3327, 3336, 3364, 3353 */ - V4L2_IDENT_VIA_VX855 = 3409, - - /* module tvp5150 */ - V4L2_IDENT_TVP5150 = 5150, - - /* module saa5246a: just ident 5246 */ - V4L2_IDENT_SAA5246A = 5246, - - /* module saa5249: just ident 5249 */ - V4L2_IDENT_SAA5249 = 5249, - - /* module cs5345: just ident 5345 */ - V4L2_IDENT_CS5345 = 5345, - - /* module tea6415c: just ident 6415 */ - V4L2_IDENT_TEA6415C = 6415, - - /* module tea6420: just ident 6420 */ - V4L2_IDENT_TEA6420 = 6420, - - /* module saa6588: just ident 6588 */ - V4L2_IDENT_SAA6588 = 6588, - - /* module vs6624: just ident 6624 */ - V4L2_IDENT_VS6624 = 6624, - - /* module saa6752hs: reserved range 6750-6759 */ - V4L2_IDENT_SAA6752HS = 6752, - V4L2_IDENT_SAA6752HS_AC3 = 6753, - - /* modules tef6862: just ident 6862 */ - V4L2_IDENT_TEF6862 = 6862, - - /* module tvp7002: just ident 7002 */ - V4L2_IDENT_TVP7002 = 7002, - - /* module adv7170: just ident 7170 */ - V4L2_IDENT_ADV7170 = 7170, - - /* module adv7175: just ident 7175 */ - V4L2_IDENT_ADV7175 = 7175, - - /* module adv7180: just ident 7180 */ - V4L2_IDENT_ADV7180 = 7180, - - /* module adv7183: just ident 7183 */ - V4L2_IDENT_ADV7183 = 7183, - - /* module saa7185: just ident 7185 */ - V4L2_IDENT_SAA7185 = 7185, - - /* module saa7191: just ident 7191 */ - V4L2_IDENT_SAA7191 = 7191, - - /* module ths7303: just ident 7303 */ - V4L2_IDENT_THS7303 = 7303, - - /* module adv7343: just ident 7343 */ - V4L2_IDENT_ADV7343 = 7343, - - /* module ths7353: just ident 7353 */ - V4L2_IDENT_THS7353 = 7353, - - /* module adv7393: just ident 7393 */ - V4L2_IDENT_ADV7393 = 7393, - - /* module adv7604: just ident 7604 */ - V4L2_IDENT_ADV7604 = 7604, - - /* module saa7706h: just ident 7706 */ - V4L2_IDENT_SAA7706H = 7706, - - /* module mt9v011, just ident 8243 */ - V4L2_IDENT_MT9V011 = 8243, - - /* module wm8739: just ident 8739 */ - V4L2_IDENT_WM8739 = 8739, - - /* module wm8775: just ident 8775 */ - V4L2_IDENT_WM8775 = 8775, - - /* Marvell controllers starting at 8801 */ - V4L2_IDENT_CAFE = 8801, - V4L2_IDENT_ARMADA610 = 8802, - - /* AKM AK8813/AK8814 */ - V4L2_IDENT_AK8813 = 8813, - V4L2_IDENT_AK8814 = 8814, - - /* module cx23885 and cx25840 */ - V4L2_IDENT_CX23885 = 8850, - V4L2_IDENT_CX23885_AV = 8851, /* Integrated A/V decoder */ - V4L2_IDENT_CX23887 = 8870, - V4L2_IDENT_CX23887_AV = 8871, /* Integrated A/V decoder */ - V4L2_IDENT_CX23888 = 8880, - V4L2_IDENT_CX23888_AV = 8881, /* Integrated A/V decoder */ - V4L2_IDENT_CX23888_IR = 8882, /* Integrated infrared controller */ - - /* module ad9389b: just ident 9389 */ - V4L2_IDENT_AD9389B = 9389, - - /* module tda9840: just ident 9840 */ - V4L2_IDENT_TDA9840 = 9840, - - /* module tw9910: just ident 9910 */ - V4L2_IDENT_TW9910 = 9910, - - /* module sn9c20x: just ident 10000 */ - V4L2_IDENT_SN9C20X = 10000, - - /* module cx231xx and cx25840 */ - V4L2_IDENT_CX2310X_AV = 23099, /* Integrated A/V decoder; not in '100 */ - V4L2_IDENT_CX23100 = 23100, - V4L2_IDENT_CX23101 = 23101, - V4L2_IDENT_CX23102 = 23102, - - /* module msp3400: reserved range 34000-34999 for msp34xx */ - V4L2_IDENT_MSPX4XX = 34000, /* generic MSPX4XX identifier, only - use internally (tveeprom.c). */ - - V4L2_IDENT_MSP3400B = 34002, - V4L2_IDENT_MSP3400C = 34003, - V4L2_IDENT_MSP3400D = 34004, - V4L2_IDENT_MSP3400G = 34007, - V4L2_IDENT_MSP3401G = 34017, - V4L2_IDENT_MSP3402G = 34027, - V4L2_IDENT_MSP3405D = 34054, - V4L2_IDENT_MSP3405G = 34057, - V4L2_IDENT_MSP3407D = 34074, - V4L2_IDENT_MSP3407G = 34077, - - V4L2_IDENT_MSP3410B = 34102, - V4L2_IDENT_MSP3410C = 34103, - V4L2_IDENT_MSP3410D = 34104, - V4L2_IDENT_MSP3410G = 34107, - V4L2_IDENT_MSP3411G = 34117, - V4L2_IDENT_MSP3412G = 34127, - V4L2_IDENT_MSP3415D = 34154, - V4L2_IDENT_MSP3415G = 34157, - V4L2_IDENT_MSP3417D = 34174, - V4L2_IDENT_MSP3417G = 34177, - - V4L2_IDENT_MSP3420G = 34207, - V4L2_IDENT_MSP3421G = 34217, - V4L2_IDENT_MSP3422G = 34227, - V4L2_IDENT_MSP3425G = 34257, - V4L2_IDENT_MSP3427G = 34277, - - V4L2_IDENT_MSP3430G = 34307, - V4L2_IDENT_MSP3431G = 34317, - V4L2_IDENT_MSP3435G = 34357, - V4L2_IDENT_MSP3437G = 34377, - - V4L2_IDENT_MSP3440G = 34407, - V4L2_IDENT_MSP3441G = 34417, - V4L2_IDENT_MSP3442G = 34427, - V4L2_IDENT_MSP3445G = 34457, - V4L2_IDENT_MSP3447G = 34477, - - V4L2_IDENT_MSP3450G = 34507, - V4L2_IDENT_MSP3451G = 34517, - V4L2_IDENT_MSP3452G = 34527, - V4L2_IDENT_MSP3455G = 34557, - V4L2_IDENT_MSP3457G = 34577, - - V4L2_IDENT_MSP3460G = 34607, - V4L2_IDENT_MSP3461G = 34617, - V4L2_IDENT_MSP3465G = 34657, - V4L2_IDENT_MSP3467G = 34677, - - /* module msp3400: reserved range 44000-44999 for msp44xx */ - V4L2_IDENT_MSP4400G = 44007, - V4L2_IDENT_MSP4408G = 44087, - V4L2_IDENT_MSP4410G = 44107, - V4L2_IDENT_MSP4418G = 44187, - V4L2_IDENT_MSP4420G = 44207, - V4L2_IDENT_MSP4428G = 44287, - V4L2_IDENT_MSP4440G = 44407, - V4L2_IDENT_MSP4448G = 44487, - V4L2_IDENT_MSP4450G = 44507, - V4L2_IDENT_MSP4458G = 44587, - - /* Micron CMOS sensor chips: 45000-45099 */ - V4L2_IDENT_MT9M001C12ST = 45000, - V4L2_IDENT_MT9M001C12STM = 45005, - V4L2_IDENT_MT9M111 = 45007, - V4L2_IDENT_MT9M112 = 45008, - V4L2_IDENT_MT9V022IX7ATC = 45010, /* No way to detect "normal" I77ATx */ - V4L2_IDENT_MT9V022IX7ATM = 45015, /* and "lead free" IA7ATx chips */ - V4L2_IDENT_MT9T031 = 45020, - V4L2_IDENT_MT9T111 = 45021, - V4L2_IDENT_MT9T112 = 45022, - V4L2_IDENT_MT9V111 = 45031, - V4L2_IDENT_MT9V112 = 45032, - - /* HV7131R CMOS sensor: just ident 46000 */ - V4L2_IDENT_HV7131R = 46000, - - /* Sharp RJ54N1CB0C, 0xCB0C = 51980 */ - V4L2_IDENT_RJ54N1CB0C = 51980, - - /* module m52790: just ident 52790 */ - V4L2_IDENT_M52790 = 52790, - - /* module cs53132a: just ident 53132 */ - V4L2_IDENT_CS53l32A = 53132, - - /* modules upd61151 MPEG2 encoder: just ident 54000 */ - V4L2_IDENT_UPD61161 = 54000, - /* modules upd61152 MPEG2 encoder with AC3: just ident 54001 */ - V4L2_IDENT_UPD61162 = 54001, - - /* module upd64031a: just ident 64031 */ - V4L2_IDENT_UPD64031A = 64031, - - /* module upd64083: just ident 64083 */ - V4L2_IDENT_UPD64083 = 64083, - - /* Don't just add new IDs at the end: KEEP THIS LIST ORDERED BY ID! */ -}; - -#endif diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index 931652f0e2af..e0b74a430b3a 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -247,8 +247,6 @@ struct v4l2_ioctl_ops { int (*vidioc_g_chip_info) (struct file *file, void *fh, struct v4l2_dbg_chip_info *chip); #endif - int (*vidioc_g_chip_ident) (struct file *file, void *fh, - struct v4l2_dbg_chip_ident *chip); int (*vidioc_enum_framesizes) (struct file *file, void *fh, struct v4l2_frmsizeenum *fsize); diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 5298d678d0f3..21fc9e16d7be 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -88,7 +88,6 @@ struct v4l2_decode_vbi_line { /* Core ops: it is highly recommended to implement at least these ops: - g_chip_ident log_status g_register s_register @@ -145,7 +144,6 @@ struct v4l2_subdev_io_pin_config { performed later. It must not sleep. *Called from an IRQ context*. */ struct v4l2_subdev_core_ops { - int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip); int (*log_status)(struct v4l2_subdev *sd); int (*s_io_pin_config)(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *pincfg); @@ -660,7 +658,7 @@ void v4l2_subdev_init(struct v4l2_subdev *sd, /* Call an ops of a v4l2_subdev, doing the right checks against NULL pointers. - Example: err = v4l2_subdev_call(sd, core, g_chip_ident, &chip); + Example: err = v4l2_subdev_call(sd, core, s_std, norm); */ #define v4l2_subdev_call(sd, o, f, args...) \ (!(sd) ? -ENODEV : (((sd)->ops->o && (sd)->ops->o->f) ? \ diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 2c5e67a45436..95ef4551edc1 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1787,11 +1787,13 @@ struct v4l2_event_subscription { /* VIDIOC_DBG_G_REGISTER and VIDIOC_DBG_S_REGISTER */ #define V4L2_CHIP_MATCH_BRIDGE 0 /* Match against chip ID on the bridge (0 for the bridge) */ +#define V4L2_CHIP_MATCH_SUBDEV 4 /* Match against subdev index */ + +/* The following four defines are no longer in use */ #define V4L2_CHIP_MATCH_HOST V4L2_CHIP_MATCH_BRIDGE #define V4L2_CHIP_MATCH_I2C_DRIVER 1 /* Match against I2C driver name */ #define V4L2_CHIP_MATCH_I2C_ADDR 2 /* Match against I2C 7-bit address */ #define V4L2_CHIP_MATCH_AC97 3 /* Match against ancillary AC97 chip */ -#define V4L2_CHIP_MATCH_SUBDEV 4 /* Match against subdev index */ struct v4l2_dbg_match { __u32 type; /* Match type */ @@ -1808,13 +1810,6 @@ struct v4l2_dbg_register { __u64 val; } __attribute__ ((packed)); -/* VIDIOC_DBG_G_CHIP_IDENT */ -struct v4l2_dbg_chip_ident { - struct v4l2_dbg_match match; - __u32 ident; /* chip identifier as specified in */ - __u32 revision; /* chip revision, chip specific */ -} __attribute__ ((packed)); - #define V4L2_CHIP_FL_READABLE (1 << 0) #define V4L2_CHIP_FL_WRITABLE (1 << 1) @@ -1915,12 +1910,6 @@ struct v4l2_create_buffers { #define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_dbg_register) #define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_dbg_register) -/* Experimental, meant for debugging, testing and internal use. - Never use this ioctl in applications! - Note: this ioctl is deprecated in favor of VIDIOC_DBG_G_CHIP_INFO and - will go away in the future. */ -#define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident) - #define VIDIOC_S_HW_FREQ_SEEK _IOW('V', 82, struct v4l2_hw_freq_seek) #define VIDIOC_S_DV_TIMINGS _IOWR('V', 87, struct v4l2_dv_timings) -- cgit v1.3-14-g43fede From 166fd7d94afdac040b28c473e45241820ca522a2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Jun 2013 09:38:02 -0600 Subject: vfio: hugepage support for vfio_iommu_type1 We currently send all mappings to the iommu in PAGE_SIZE chunks, which prevents the iommu from enabling support for larger page sizes. We still need to pin pages, which means we step through them in PAGE_SIZE chunks, but we can batch up contiguous physical memory chunks to allow the iommu the opportunity to use larger pages. The approach here is a bit different that the one currently used for legacy KVM device assignment. Rather than looking at the vma page size and using that as the maximum size to pass to the iommu, we instead simply look at whether the next page is physically contiguous. This means we might ask the iommu to map a 4MB region, while legacy KVM might limit itself to a maximum of 2MB. Splitting our mapping path also allows us to be smarter about locked memory because we can more easily unwind if the user attempts to exceed the limit. Therefore, rather than assuming that a mapping will result in locked memory, we test each page as it is pinned to determine whether it locks RAM vs an mmap'd MMIO region. This should result in better locking granularity and less locked page fudge factors in userspace. The unmap path uses the same algorithm as legacy KVM. We don't want to track the pfn for each mapping ourselves, but we need the pfn in order to unpin pages. We therefore ask the iommu for the iova to physical address translation, ask it to unpin a page, and see how many pages were actually unpinned. iommus supporting large pages will often return something bigger than a page here, which we know will be physically contiguous and we can unpin a batch of pfns. iommus not supporting large mappings won't see an improvement in batching here as they only unmap a page at a time. With this change, we also make a clarification to the API for mapping and unmapping DMA. We can only guarantee unmaps at the same granularity as used for the original mapping. In other words, unmapping a subregion of a previous mapping is not guaranteed and may result in a larger or smaller unmapping than requested. The size field in the unmapping structure is updated to reflect this. Previously this was unmodified on mapping, always returning the the requested unmap size. This is now updated to return the actual unmap size on success, allowing userspace to appropriately track mappings. Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 523 ++++++++++++++++++++++++++-------------- include/uapi/linux/vfio.h | 8 +- 2 files changed, 344 insertions(+), 187 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0e863b3ddcab..6654a7eb42d3 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -60,7 +60,7 @@ struct vfio_dma { struct rb_node node; dma_addr_t iova; /* Device address */ unsigned long vaddr; /* Process virtual addr */ - long npage; /* Number of pages */ + size_t size; /* Map size (bytes) */ int prot; /* IOMMU_READ/WRITE */ }; @@ -74,8 +74,6 @@ struct vfio_group { * into DMA'ble space using the IOMMU */ -#define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT) - static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, dma_addr_t start, size_t size) { @@ -86,7 +84,7 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, if (start + size <= dma->iova) node = node->rb_left; - else if (start >= dma->iova + NPAGE_TO_SIZE(dma->npage)) + else if (start >= dma->iova + dma->size) node = node->rb_right; else return dma; @@ -104,7 +102,7 @@ static void vfio_insert_dma(struct vfio_iommu *iommu, struct vfio_dma *new) parent = *link; dma = rb_entry(parent, struct vfio_dma, node); - if (new->iova + NPAGE_TO_SIZE(new->npage) <= dma->iova) + if (new->iova + new->size <= dma->iova) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -144,8 +142,8 @@ static void vfio_lock_acct(long npage) struct vwork *vwork; struct mm_struct *mm; - if (!current->mm) - return; /* process exited */ + if (!current->mm || !npage) + return; /* process exited or nothing to do */ if (down_write_trylock(¤t->mm->mmap_sem)) { current->mm->locked_vm += npage; @@ -217,33 +215,6 @@ static int put_pfn(unsigned long pfn, int prot) return 0; } -/* Unmap DMA region */ -static long __vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova, - long npage, int prot) -{ - long i, unlocked = 0; - - for (i = 0; i < npage; i++, iova += PAGE_SIZE) { - unsigned long pfn; - - pfn = iommu_iova_to_phys(iommu->domain, iova) >> PAGE_SHIFT; - if (pfn) { - iommu_unmap(iommu->domain, iova, PAGE_SIZE); - unlocked += put_pfn(pfn, prot); - } - } - return unlocked; -} - -static void vfio_dma_unmap(struct vfio_iommu *iommu, dma_addr_t iova, - long npage, int prot) -{ - long unlocked; - - unlocked = __vfio_dma_do_unmap(iommu, iova, npage, prot); - vfio_lock_acct(-unlocked); -} - static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) { struct page *page[1]; @@ -270,79 +241,142 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) return ret; } -/* Map DMA region */ -static int __vfio_dma_map(struct vfio_iommu *iommu, dma_addr_t iova, - unsigned long vaddr, long npage, int prot) +/* + * Attempt to pin pages. We really don't want to track all the pfns and + * the iommu can only map chunks of consecutive pfns anyway, so get the + * first page and all consecutive pages with the same locking. + */ +static long vfio_pin_pages(unsigned long vaddr, long npage, + int prot, unsigned long *pfn_base) { - dma_addr_t start = iova; - long i, locked = 0; - int ret; + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); + long ret, i; - /* Verify that pages are not already mapped */ - for (i = 0; i < npage; i++, iova += PAGE_SIZE) - if (iommu_iova_to_phys(iommu->domain, iova)) - return -EBUSY; + if (!current->mm) + return -ENODEV; - iova = start; + ret = vaddr_get_pfn(vaddr, prot, pfn_base); + if (ret) + return ret; - if (iommu->cache) - prot |= IOMMU_CACHE; + if (is_invalid_reserved_pfn(*pfn_base)) + return 1; - /* - * XXX We break mappings into pages and use get_user_pages_fast to - * pin the pages in memory. It's been suggested that mlock might - * provide a more efficient mechanism, but nothing prevents the - * user from munlocking the pages, which could then allow the user - * access to random host memory. We also have no guarantee from the - * IOMMU API that the iommu driver can unmap sub-pages of previous - * mappings. This means we might lose an entire range if a single - * page within it is unmapped. Single page mappings are inefficient, - * but provide the most flexibility for now. - */ - for (i = 0; i < npage; i++, iova += PAGE_SIZE, vaddr += PAGE_SIZE) { + if (!lock_cap && current->mm->locked_vm + 1 > limit) { + put_pfn(*pfn_base, prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, + limit << PAGE_SHIFT); + return -ENOMEM; + } + + /* Lock all the consecutive pages from pfn_base */ + for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { unsigned long pfn = 0; ret = vaddr_get_pfn(vaddr, prot, &pfn); - if (ret) { - __vfio_dma_do_unmap(iommu, start, i, prot); - return ret; + if (ret) + break; + + if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { + put_pfn(pfn, prot); + break; } + if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { + put_pfn(pfn, prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); + break; + } + } + + vfio_lock_acct(i); + + return i; +} + +static long vfio_unpin_pages(unsigned long pfn, long npage, + int prot, bool do_accounting) +{ + unsigned long unlocked = 0; + long i; + + for (i = 0; i < npage; i++) + unlocked += put_pfn(pfn++, prot); + + if (do_accounting) + vfio_lock_acct(-unlocked); + + return unlocked; +} + +static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, + dma_addr_t iova, size_t *size) +{ + dma_addr_t start = iova, end = iova + *size; + long unlocked = 0; + + while (iova < end) { + size_t unmapped; + phys_addr_t phys; + /* - * Only add actual locked pages to accounting - * XXX We're effectively marking a page locked for every - * IOVA page even though it's possible the user could be - * backing multiple IOVAs with the same vaddr. This over- - * penalizes the user process, but we currently have no - * easy way to do this properly. + * We use the IOMMU to track the physical address. This + * saves us from having a lot more entries in our mapping + * tree. The downside is that we don't track the size + * used to do the mapping. We request unmap of a single + * page, but expect IOMMUs that support large pages to + * unmap a larger chunk. */ - if (!is_invalid_reserved_pfn(pfn)) - locked++; - - ret = iommu_map(iommu->domain, iova, - (phys_addr_t)pfn << PAGE_SHIFT, - PAGE_SIZE, prot); - if (ret) { - /* Back out mappings on error */ - put_pfn(pfn, prot); - __vfio_dma_do_unmap(iommu, start, i, prot); - return ret; + phys = iommu_iova_to_phys(iommu->domain, iova); + if (WARN_ON(!phys)) { + iova += PAGE_SIZE; + continue; } + + unmapped = iommu_unmap(iommu->domain, iova, PAGE_SIZE); + if (!unmapped) + break; + + unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT, + unmapped >> PAGE_SHIFT, + dma->prot, false); + iova += unmapped; } - vfio_lock_acct(locked); + + vfio_lock_acct(-unlocked); + + *size = iova - start; + return 0; } static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start, - size_t size, struct vfio_dma *dma) + size_t *size, struct vfio_dma *dma) { + size_t offset, overlap, tmp; struct vfio_dma *split; - long npage_lo, npage_hi; + int ret; + + /* + * Existing dma region is completely covered, unmap all. This is + * the likely case since userspace tends to map and unmap buffers + * in one shot rather than multiple mappings within a buffer. + */ + if (likely(start <= dma->iova && + start + *size >= dma->iova + dma->size)) { + *size = dma->size; + ret = vfio_unmap_unpin(iommu, dma, dma->iova, size); + if (ret) + return ret; + + /* + * Did we remove more than we have? Should never happen + * since a vfio_dma is contiguous in iova and vaddr. + */ + WARN_ON(*size != dma->size); - /* Existing dma region is completely covered, unmap all */ - if (start <= dma->iova && - start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { - vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); vfio_remove_dma(iommu, dma); kfree(dma); return 0; @@ -350,47 +384,79 @@ static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start, /* Overlap low address of existing range */ if (start <= dma->iova) { - size_t overlap; + overlap = start + *size - dma->iova; + ret = vfio_unmap_unpin(iommu, dma, dma->iova, &overlap); + if (ret) + return ret; - overlap = start + size - dma->iova; - npage_lo = overlap >> PAGE_SHIFT; + vfio_remove_dma(iommu, dma); - vfio_dma_unmap(iommu, dma->iova, npage_lo, dma->prot); - dma->iova += overlap; - dma->vaddr += overlap; - dma->npage -= npage_lo; + /* + * Check, we may have removed to whole vfio_dma. If not + * fixup and re-insert. + */ + if (overlap < dma->size) { + dma->iova += overlap; + dma->vaddr += overlap; + dma->size -= overlap; + vfio_insert_dma(iommu, dma); + } + *size = overlap; return 0; } /* Overlap high address of existing range */ - if (start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { - size_t overlap; + if (start + *size >= dma->iova + dma->size) { + offset = start - dma->iova; + overlap = dma->size - offset; - overlap = dma->iova + NPAGE_TO_SIZE(dma->npage) - start; - npage_hi = overlap >> PAGE_SHIFT; + ret = vfio_unmap_unpin(iommu, dma, start, &overlap); + if (ret) + return ret; + + /* + * We may have unmapped the entire vfio_dma if the user is + * trying to unmap a sub-region of what was originally + * mapped. If anything left, we can resize in place since + * iova is unchanged. + */ + if (overlap < dma->size) + dma->size -= overlap; + else + vfio_remove_dma(iommu, dma); - vfio_dma_unmap(iommu, start, npage_hi, dma->prot); - dma->npage -= npage_hi; + *size = overlap; return 0; } /* Split existing */ - npage_lo = (start - dma->iova) >> PAGE_SHIFT; - npage_hi = dma->npage - (size >> PAGE_SHIFT) - npage_lo; + offset = start - dma->iova; - split = kzalloc(sizeof *split, GFP_KERNEL); - if (!split) - return -ENOMEM; + ret = vfio_unmap_unpin(iommu, dma, start, size); + if (ret) + return ret; - vfio_dma_unmap(iommu, start, size >> PAGE_SHIFT, dma->prot); + WARN_ON(!*size); + tmp = dma->size; - dma->npage = npage_lo; + /* + * Resize the lower vfio_dma in place, insert new for remaining + * upper segment. + */ + dma->size = offset; + + if (offset + *size < tmp) { + split = kzalloc(sizeof(*split), GFP_KERNEL); + if (!split) + return -ENOMEM; + + split->size = tmp - offset - *size; + split->iova = dma->iova + offset + *size; + split->vaddr = dma->vaddr + offset + *size; + split->prot = dma->prot; + vfio_insert_dma(iommu, split); + } - split->npage = npage_hi; - split->iova = start + size; - split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size; - split->prot = dma->prot; - vfio_insert_dma(iommu, split); return 0; } @@ -399,6 +465,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, { uint64_t mask; struct vfio_dma *dma; + size_t unmapped = 0, size; int ret = 0; mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; @@ -408,30 +475,66 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap->size & mask) return -EINVAL; - /* XXX We still break these down into PAGE_SIZE */ WARN_ON(mask & PAGE_MASK); mutex_lock(&iommu->lock); - while (!ret && (dma = vfio_find_dma(iommu, - unmap->iova, unmap->size))) - ret = vfio_remove_dma_overlap(iommu, unmap->iova, - unmap->size, dma); + while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { + size = unmap->size; + ret = vfio_remove_dma_overlap(iommu, unmap->iova, &size, dma); + if (ret) + break; + unmapped += size; + } mutex_unlock(&iommu->lock); + + /* + * We may unmap more than requested, update the unmap struct so + * userspace can know. + */ + unmap->size = unmapped; + + return ret; +} + +/* + * Turns out AMD IOMMU has a page table bug where it won't map large pages + * to a region that previously mapped smaller pages. This should be fixed + * soon, so this is just a temporary workaround to break mappings down into + * PAGE_SIZE. Better to map smaller pages than nothing. + */ +static int map_try_harder(struct vfio_iommu *iommu, dma_addr_t iova, + unsigned long pfn, long npage, int prot) +{ + long i; + int ret; + + for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) { + ret = iommu_map(iommu->domain, iova, + (phys_addr_t)pfn << PAGE_SHIFT, + PAGE_SIZE, prot); + if (ret) + break; + } + + for (; i < npage && i > 0; i--, iova -= PAGE_SIZE) + iommu_unmap(iommu->domain, iova, PAGE_SIZE); + return ret; } static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { - struct vfio_dma *dma; - dma_addr_t iova = map->iova; - unsigned long locked, lock_limit, vaddr = map->vaddr; + dma_addr_t end, iova; + unsigned long vaddr = map->vaddr; size_t size = map->size; + long npage; int ret = 0, prot = 0; uint64_t mask; - long npage; + + end = map->iova + map->size; mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; @@ -444,92 +547,138 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (!prot) return -EINVAL; /* No READ/WRITE? */ + if (iommu->cache) + prot |= IOMMU_CACHE; + if (vaddr & mask) return -EINVAL; - if (iova & mask) + if (map->iova & mask) return -EINVAL; - if (size & mask) + if (!map->size || map->size & mask) return -EINVAL; - /* XXX We still break these down into PAGE_SIZE */ WARN_ON(mask & PAGE_MASK); /* Don't allow IOVA wrap */ - if (iova + size && iova + size < iova) + if (end && end < map->iova) return -EINVAL; /* Don't allow virtual address wrap */ - if (vaddr + size && vaddr + size < vaddr) - return -EINVAL; - - npage = size >> PAGE_SHIFT; - if (!npage) + if (vaddr + map->size && vaddr + map->size < vaddr) return -EINVAL; - dma = kzalloc(sizeof *dma, GFP_KERNEL); - if (!dma) - return -ENOMEM; - mutex_lock(&iommu->lock); - if (vfio_find_dma(iommu, iova, size)) { - ret = -EBUSY; - goto out_lock; + if (vfio_find_dma(iommu, map->iova, map->size)) { + mutex_unlock(&iommu->lock); + return -EEXIST; } - /* account for locked pages */ - locked = current->mm->locked_vm + npage; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", - __func__, rlimit(RLIMIT_MEMLOCK)); - ret = -ENOMEM; - goto out_lock; - } + for (iova = map->iova; iova < end; iova += size, vaddr += size) { + struct vfio_dma *dma = NULL; + unsigned long pfn; + long i; + + /* Pin a contiguous chunk of memory */ + npage = vfio_pin_pages(vaddr, (end - iova) >> PAGE_SHIFT, + prot, &pfn); + if (npage <= 0) { + WARN_ON(!npage); + ret = (int)npage; + break; + } - ret = __vfio_dma_map(iommu, iova, vaddr, npage, prot); - if (ret) - goto out_lock; - - dma->npage = npage; - dma->iova = iova; - dma->vaddr = vaddr; - dma->prot = prot; - - /* Check if we abut a region below - nothing below 0 */ - if (iova) { - struct vfio_dma *tmp = vfio_find_dma(iommu, iova - 1, 1); - if (tmp && tmp->prot == prot && - tmp->vaddr + NPAGE_TO_SIZE(tmp->npage) == vaddr) { - vfio_remove_dma(iommu, tmp); - dma->npage += tmp->npage; - dma->iova = iova = tmp->iova; - dma->vaddr = vaddr = tmp->vaddr; - kfree(tmp); - npage = dma->npage; - size = NPAGE_TO_SIZE(npage); + /* Verify pages are not already mapped */ + for (i = 0; i < npage; i++) { + if (iommu_iova_to_phys(iommu->domain, + iova + (i << PAGE_SHIFT))) { + vfio_unpin_pages(pfn, npage, prot, true); + ret = -EBUSY; + break; + } + } + + ret = iommu_map(iommu->domain, iova, + (phys_addr_t)pfn << PAGE_SHIFT, + npage << PAGE_SHIFT, prot); + if (ret) { + if (ret != -EBUSY || + map_try_harder(iommu, iova, pfn, npage, prot)) { + vfio_unpin_pages(pfn, npage, prot, true); + break; + } + } + + size = npage << PAGE_SHIFT; + + /* + * Check if we abut a region below - nothing below 0. + * This is the most likely case when mapping chunks of + * physically contiguous regions within a virtual address + * range. Update the abutting entry in place since iova + * doesn't change. + */ + if (likely(iova)) { + struct vfio_dma *tmp; + tmp = vfio_find_dma(iommu, iova - 1, 1); + if (tmp && tmp->prot == prot && + tmp->vaddr + tmp->size == vaddr) { + tmp->size += size; + + iova = tmp->iova; + size = tmp->size; + vaddr = tmp->vaddr; + dma = tmp; + } + } + + /* Check if we abut a region above - nothing above ~0 + 1 */ + if (likely(iova + size)) { + struct vfio_dma *tmp; + + tmp = vfio_find_dma(iommu, iova + size, 1); + if (tmp && tmp->prot == prot && + tmp->vaddr == vaddr + size) { + vfio_remove_dma(iommu, tmp); + if (dma) + dma->size += tmp->size; + else + size += tmp->size; + kfree(tmp); + } } - } - /* Check if we abut a region above - nothing above ~0 + 1 */ - if (iova + size) { - struct vfio_dma *tmp = vfio_find_dma(iommu, iova + size, 1); - if (tmp && tmp->prot == prot && - tmp->vaddr == vaddr + size) { - vfio_remove_dma(iommu, tmp); - dma->npage += tmp->npage; - kfree(tmp); - npage = dma->npage; - size = NPAGE_TO_SIZE(npage); + if (!dma) { + dma = kzalloc(sizeof(*dma), GFP_KERNEL); + if (!dma) { + iommu_unmap(iommu->domain, iova, size); + vfio_unpin_pages(pfn, npage, prot, true); + ret = -ENOMEM; + break; + } + + dma->size = size; + dma->iova = iova; + dma->vaddr = vaddr; + dma->prot = prot; + vfio_insert_dma(iommu, dma); } } - vfio_insert_dma(iommu, dma); + if (ret) { + struct vfio_dma *tmp; + iova = map->iova; + size = map->size; + while ((tmp = vfio_find_dma(iommu, iova, size))) { + if (vfio_remove_dma_overlap(iommu, iova, &size, tmp)) { + pr_warn("%s: Error rolling back failed map\n", + __func__); + break; + } + } + } -out_lock: mutex_unlock(&iommu->lock); - if (ret) - kfree(dma); return ret; } @@ -651,9 +800,8 @@ static void vfio_iommu_type1_release(void *iommu_data) while ((node = rb_first(&iommu->dma_list))) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); - vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); - vfio_remove_dma(iommu, dma); - kfree(dma); + size_t size = dma->size; + vfio_remove_dma_overlap(iommu, dma->iova, &size, dma); } iommu_domain_free(iommu->domain); @@ -708,6 +856,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { struct vfio_iommu_type1_dma_unmap unmap; + long ret; minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); @@ -717,7 +866,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (unmap.argsz < minsz || unmap.flags) return -EINVAL; - return vfio_dma_do_unmap(iommu, &unmap); + ret = vfio_dma_do_unmap(iommu, &unmap); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, &unmap, minsz); } return -ENOTTY; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 284ff2436829..513600612995 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -361,10 +361,14 @@ struct vfio_iommu_type1_dma_map { #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) /** - * VFIO_IOMMU_UNMAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 14, struct vfio_dma_unmap) + * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_dma_unmap) * * Unmap IO virtual addresses using the provided struct vfio_dma_unmap. - * Caller sets argsz. + * Caller sets argsz. The actual unmapped size is returned in the size + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; -- cgit v1.3-14-g43fede From 77e2af0312b12dccd5043a7cc9cd49ab6a212996 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jun 2013 19:38:06 +0200 Subject: net: if_arp: add ARPHRD_NETLINK type This small patch adds the definition of ARPHRD_NETLINK which can for example be used by netlink monitoring devices as device type. So that sockaddr_ll can pick it up and based on that choose the correct packet dissector. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index 82c7d1bdadeb..d7fea3496f32 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -93,6 +93,7 @@ #define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_CAIF 822 /* CAIF media type */ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ +#define ARPHRD_NETLINK 824 /* Netlink header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ -- cgit v1.3-14-g43fede From eba3b5a78799d21dea05118b294524958f0ab592 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Wed, 19 Jun 2013 10:54:25 +0100 Subject: ipvs: SH fallback and L4 hashing By default the SH scheduler rejects connections that are hashed onto a realserver of weight 0. This patch adds a flag to make SH choose a different realserver in this case, instead of rejecting the connection. The patch also adds a flag to make SH include the source port (TCP, UDP, SCTP) in the hash as well as the source address. This basically allows for deterministic round-robin load balancing (i.e., where any director in a cluster of directors with identical config will send the same packet the same way). The flags are service flags (IP_VS_SVC_F_SCHED*) so that these options can be set per service. They are set using a new option to ipvsadm. Signed-off-by: Alexander Frolkin Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/uapi/linux/ip_vs.h | 6 +++ net/netfilter/ipvs/ip_vs_sh.c | 100 +++++++++++++++++++++++++++++++++++------- 2 files changed, 91 insertions(+), 15 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index a24537725e80..29458223d044 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -20,6 +20,12 @@ #define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */ #define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */ #define IP_VS_SVC_F_ONEPACKET 0x0004 /* one-packet scheduling */ +#define IP_VS_SVC_F_SCHED1 0x0008 /* scheduler flag 1 */ +#define IP_VS_SVC_F_SCHED2 0x0010 /* scheduler flag 2 */ +#define IP_VS_SVC_F_SCHED3 0x0020 /* scheduler flag 3 */ + +#define IP_VS_SVC_F_SCHED_SH_FALLBACK IP_VS_SVC_F_SCHED1 /* SH fallback */ +#define IP_VS_SVC_F_SCHED_SH_PORT IP_VS_SVC_F_SCHED2 /* SH use port */ /* * Destination Server Flags diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e0d5d1653566..f16c027df15b 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -48,6 +48,10 @@ #include +#include +#include +#include + /* * IPVS SH bucket @@ -71,10 +75,19 @@ struct ip_vs_sh_state { struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; +/* Helper function to determine if server is unavailable */ +static inline bool is_unavailable(struct ip_vs_dest *dest) +{ + return atomic_read(&dest->weight) <= 0 || + dest->flags & IP_VS_DEST_F_OVERLOAD; +} + /* * Returns hash value for IPVS SH entry */ -static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int +ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, + __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; @@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; + return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & + IP_VS_SH_TAB_MASK; } @@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) +ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) { - return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); + unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); + + return (!dest || is_unavailable(dest)) ? NULL : dest; } +/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +static inline struct ip_vs_dest * +ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) +{ + unsigned int offset; + unsigned int hash; + struct ip_vs_dest *dest; + + for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { + hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + dest = rcu_dereference(s->buckets[hash].dest); + if (!dest) + break; + if (is_unavailable(dest)) + IP_VS_DBG_BUF(6, "SH: selected unavailable server " + "%s:%d (offset %d)", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), offset); + else + return dest; + } + + return NULL; +} + /* * Assign all the hash buckets of the specified table with the service. */ @@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, } -/* - * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, - * consider that the server is overloaded here. - */ -static inline int is_overloaded(struct ip_vs_dest *dest) +/* Helper function to get port number */ +static inline __be16 +ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - return dest->flags & IP_VS_DEST_F_OVERLOAD; + __be16 port; + struct tcphdr _tcph, *th; + struct udphdr _udph, *uh; + sctp_sctphdr_t _sctph, *sh; + + switch (iph->protocol) { + case IPPROTO_TCP: + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + port = th->source; + break; + case IPPROTO_UDP: + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + port = uh->source; + break; + case IPPROTO_SCTP: + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + port = sh->source; + break; + default: + port = 0; + } + + return port; } @@ -232,15 +296,21 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; + __be16 port = 0; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); + if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) + port = ip_vs_sh_get_port(skb, iph); + s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph->saddr); - if (!dest - || !(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest)) { + + if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) + dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + else + dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + + if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } -- cgit v1.3-14-g43fede From 5a1d3e9f18ceb8d791bdc9a78d8ee10ddc80a6e8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 21 Jun 2013 02:05:34 -0300 Subject: [media] v4l2-controls.h: fix copy-and-paste error in comment The comment for the FM_RX class was copied from the DV class unchanged. Fixed. Also made the FM_TX comment consistent with the others. Signed-off-by: Hans Verkuil Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 69bd5bb0d5af..e90a88a8708f 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -53,13 +53,13 @@ #define V4L2_CTRL_CLASS_USER 0x00980000 /* Old-style 'user' controls */ #define V4L2_CTRL_CLASS_MPEG 0x00990000 /* MPEG-compression controls */ #define V4L2_CTRL_CLASS_CAMERA 0x009a0000 /* Camera class controls */ -#define V4L2_CTRL_CLASS_FM_TX 0x009b0000 /* FM Modulator control class */ +#define V4L2_CTRL_CLASS_FM_TX 0x009b0000 /* FM Modulator controls */ #define V4L2_CTRL_CLASS_FLASH 0x009c0000 /* Camera flash controls */ #define V4L2_CTRL_CLASS_JPEG 0x009d0000 /* JPEG-compression controls */ #define V4L2_CTRL_CLASS_IMAGE_SOURCE 0x009e0000 /* Image source controls */ #define V4L2_CTRL_CLASS_IMAGE_PROC 0x009f0000 /* Image processing controls */ #define V4L2_CTRL_CLASS_DV 0x00a00000 /* Digital Video controls */ -#define V4L2_CTRL_CLASS_FM_RX 0x00a10000 /* Digital Video controls */ +#define V4L2_CTRL_CLASS_FM_RX 0x00a10000 /* FM Receiver controls */ /* User-class control IDs */ -- cgit v1.3-14-g43fede From 496e4ae7dc944faa1721bfda7e9d834d5611a874 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jun 2013 14:15:47 +0200 Subject: netfilter: nf_queue: add NFQA_SKB_CSUM_NOTVERIFIED info flag The common case is that TCP/IP checksums have already been verified, e.g. by hardware (rx checksum offload), or conntrack. Userspace can use this flag to determine when the checksum has not been validated yet. If the flag is set, this doesn't necessarily mean that the packet has an invalid checksum, e.g. if NIC doesn't support rx checksum. Userspace that sucessfully enabled NFQA_CFG_F_GSO queue feature flag can infer that IP/TCP checksum has already been validated if either the SKB_INFO attribute is not present or the NFQA_SKB_CSUM_NOTVERIFIED flag is unset. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 2 ++ net/netfilter/nfnetlink_queue_core.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index a2308ae5a73d..3a9b92147339 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -105,5 +105,7 @@ enum nfqnl_attr_config { #define NFQA_SKB_CSUMNOTREADY (1 << 0) /* packet is GSO (i.e., exceeds device mtu) */ #define NFQA_SKB_GSO (1 << 1) +/* csum not validated (incoming device doesn't support hw checksum, etc.) */ +#define NFQA_SKB_CSUM_NOTVERIFIED (1 << 2) #endif /* _NFNETLINK_QUEUE_H */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 299a48ae5dc9..971ea145ab3e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -280,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) skb_shinfo(to)->nr_frags = j; } -static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +static int +nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, + bool csum_verify) { __u32 flags = 0; if (packet->ip_summed == CHECKSUM_PARTIAL) flags = NFQA_SKB_CSUMNOTREADY; + else if (csum_verify) + flags = NFQA_SKB_CSUM_NOTVERIFIED; + if (skb_is_gso(packet)) flags |= NFQA_SKB_GSO; @@ -310,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct net_device *outdev; struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); + bool csum_verify; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -327,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + if (entry->hook <= NF_INET_FORWARD || + (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) + csum_verify = !skb_csum_unnecessary(entskb); + else + csum_verify = false; + outdev = entry->outdev; switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { @@ -476,7 +488,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; - if (nfqnl_put_packet_info(skb, entskb)) + if (nfqnl_put_packet_info(skb, entskb, csum_verify)) goto nla_put_failure; if (data_len) { -- cgit v1.3-14-g43fede From d753601a2a5ff67bbc96ce6512a16305bfd293e7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 18 Jun 2013 16:55:41 +0000 Subject: MIPS: BCM63XX: recognize Cable Modem firmware format Add the firmware header format which is used by Broadcom Cable Modem SoCs such as the BCM3368 SoC. We export the bcm_hcs firmware format structure because it is used by user-land tools to create firmware images for these SoCs and will later be used by a corresponding MTD parser. Signed-off-by: Florian Fainelli Cc: linux-mips@linux-mips.org Cc: cernekee@gmail.com Cc: jogo@openwrt.org Patchwork: https://patchwork.linux-mips.org/patch/5496/ Signed-off-by: Ralf Baechle --- arch/mips/bcm63xx/boards/board_bcm963xx.c | 14 ++++++++++++-- include/uapi/linux/Kbuild | 1 + include/uapi/linux/bcm933xx_hcs.h | 24 ++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 include/uapi/linux/bcm933xx_hcs.h (limited to 'include/uapi/linux') diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index a9505c4867e8..46eabb9aff51 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -28,8 +28,12 @@ #include #include +#include + #define PFX "board_bcm963xx: " +#define HCS_OFFSET_128K 0x20000 + static struct board_info board; /* @@ -722,8 +726,9 @@ void __init board_prom_init(void) unsigned int i; u8 *boot_addr, *cfe; char cfe_version[32]; - char *board_name; + char *board_name = NULL; u32 val; + struct bcm_hcs *hcs; /* read base address of boot chip select (0) * 6328/6362 do not have MPI but boot from a fixed address @@ -747,7 +752,12 @@ void __init board_prom_init(void) bcm63xx_nvram_init(boot_addr + BCM963XX_NVRAM_OFFSET); - board_name = bcm63xx_nvram_get_name(); + if (BCMCPU_IS_3368()) { + hcs = (struct bcm_hcs *)boot_addr; + board_name = hcs->filename; + } else { + board_name = bcm63xx_nvram_get_name(); + } /* find board by name */ for (i = 0; i < ARRAY_SIZE(bcm963xx_boards); i++) { if (strncmp(board_name, bcm963xx_boards[i]->name, 16)) diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index ab5d4992e568..ba1c11ab0d11 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -62,6 +62,7 @@ header-y += auxvec.h header-y += ax25.h header-y += b1lli.h header-y += baycom.h +header-y += bcm933xx_hcs.h header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h diff --git a/include/uapi/linux/bcm933xx_hcs.h b/include/uapi/linux/bcm933xx_hcs.h new file mode 100644 index 000000000000..d22821831549 --- /dev/null +++ b/include/uapi/linux/bcm933xx_hcs.h @@ -0,0 +1,24 @@ +/* + * Broadcom Cable Modem firmware format + */ + +#ifndef __BCM933XX_HCS_H +#define __BCM933XX_HCS_H + +#include + +struct bcm_hcs { + __u16 magic; + __u16 control; + __u16 rev_maj; + __u16 rev_min; + __u32 build_date; + __u32 filelen; + __u32 ldaddress; + char filename[64]; + __u16 hcs; + __u16 her_znaet_chto; + __u32 crc; +}; + +#endif /* __BCM933XX_HCS */ -- cgit v1.3-14-g43fede From b1a5a34bd0b8767ea689e68f8ea513e9710b671e Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 Jun 2013 00:15:51 +0800 Subject: net: Swap ver and type in pppoe_hdr Ver and type in pppoe_hdr should be swapped as defined by RFC2516 section-4. Signed-off-by: David S. Miller --- include/uapi/linux/if_pppox.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 0b46fd57c8f6..e36a4aecd311 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -135,11 +135,11 @@ struct pppoe_tag { struct pppoe_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 ver : 4; __u8 type : 4; + __u8 ver : 4; #elif defined(__BIG_ENDIAN_BITFIELD) - __u8 type : 4; __u8 ver : 4; + __u8 type : 4; #else #error "Please fix " #endif -- cgit v1.3-14-g43fede From 62525a00b87cc967bce9779d63fcc84fb9199130 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 8 Jul 2013 10:33:37 +0930 Subject: virtio: VIRTIO_F_ANY_LAYOUT feature Also known as the "no really, I read the spec" bit. Signed-off-by: Rusty Russell --- include/uapi/linux/virtio_config.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index b7cda390fd00..3ce768c6910d 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -51,4 +51,7 @@ * suppressed them? */ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT 27 + #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit v1.3-14-g43fede From 83d5e5b0af907d46d241a86d9e44003b3f0accbd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 10 Jul 2013 23:41:18 +0100 Subject: dm: optimize use SRCU and RCU This patch removes "io_lock" and "map_lock" in struct mapped_device and "holders" in struct dm_table and replaces these mechanisms with sleepable-rcu. Previously, the code would call "dm_get_live_table" and "dm_table_put" to get and release table. Now, the code is changed to call "dm_get_live_table" and "dm_put_live_table". dm_get_live_table locks sleepable-rcu and dm_put_live_table unlocks it. dm_get_live_table_fast/dm_put_live_table_fast can be used instead of dm_get_live_table/dm_put_live_table. These *_fast functions use non-sleepable RCU, so the caller must not block between them. If the code changes active or inactive dm table, it must call dm_sync_table before destroying the old table. Signed-off-by: Mikulas Patocka Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 122 +++++++++++++++++++++----------- drivers/md/dm-table.c | 35 --------- drivers/md/dm.c | 160 ++++++++++++++++++++++++------------------ include/linux/device-mapper.h | 6 +- include/uapi/linux/dm-ioctl.h | 4 +- 5 files changed, 180 insertions(+), 147 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 12f04868e899..f1b758675ec7 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -36,6 +36,14 @@ struct hash_cell { struct dm_table *new_map; }; +/* + * A dummy definition to make RCU happy. + * struct dm_table should never be dereferenced in this file. + */ +struct dm_table { + int undefined__; +}; + struct vers_iter { size_t param_size; struct dm_target_versions *vers, *old_vers; @@ -242,9 +250,10 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi return -EBUSY; } -static void __hash_remove(struct hash_cell *hc) +static struct dm_table *__hash_remove(struct hash_cell *hc) { struct dm_table *table; + int srcu_idx; /* remove from the dev hash */ list_del(&hc->uuid_list); @@ -253,16 +262,18 @@ static void __hash_remove(struct hash_cell *hc) dm_set_mdptr(hc->md, NULL); mutex_unlock(&dm_hash_cells_mutex); - table = dm_get_live_table(hc->md); - if (table) { + table = dm_get_live_table(hc->md, &srcu_idx); + if (table) dm_table_event(table); - dm_table_put(table); - } + dm_put_live_table(hc->md, srcu_idx); + table = NULL; if (hc->new_map) - dm_table_destroy(hc->new_map); + table = hc->new_map; dm_put(hc->md); free_cell(hc); + + return table; } static void dm_hash_remove_all(int keep_open_devices) @@ -270,6 +281,7 @@ static void dm_hash_remove_all(int keep_open_devices) int i, dev_skipped; struct hash_cell *hc; struct mapped_device *md; + struct dm_table *t; retry: dev_skipped = 0; @@ -287,10 +299,14 @@ retry: continue; } - __hash_remove(hc); + t = __hash_remove(hc); up_write(&_hash_lock); + if (t) { + dm_sync_table(md); + dm_table_destroy(t); + } dm_put(md); if (likely(keep_open_devices)) dm_destroy(md); @@ -356,6 +372,7 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, struct dm_table *table; struct mapped_device *md; unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0; + int srcu_idx; /* * duplicate new. @@ -418,11 +435,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, /* * Wake up any dm event waiters. */ - table = dm_get_live_table(hc->md); - if (table) { + table = dm_get_live_table(hc->md, &srcu_idx); + if (table) dm_table_event(table); - dm_table_put(table); - } + dm_put_live_table(hc->md, srcu_idx); if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; @@ -620,11 +636,14 @@ static int check_name(const char *name) * _hash_lock without first calling dm_table_put, because dm_table_destroy * waits for this dm_table_put and could be called under this lock. */ -static struct dm_table *dm_get_inactive_table(struct mapped_device *md) +static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx) { struct hash_cell *hc; struct dm_table *table = NULL; + /* increment rcu count, we don't care about the table pointer */ + dm_get_live_table(md, srcu_idx); + down_read(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { @@ -633,8 +652,6 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md) } table = hc->new_map; - if (table) - dm_table_get(table); out: up_read(&_hash_lock); @@ -643,10 +660,11 @@ out: } static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md, - struct dm_ioctl *param) + struct dm_ioctl *param, + int *srcu_idx) { return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ? - dm_get_inactive_table(md) : dm_get_live_table(md); + dm_get_inactive_table(md, srcu_idx) : dm_get_live_table(md, srcu_idx); } /* @@ -657,6 +675,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) { struct gendisk *disk = dm_disk(md); struct dm_table *table; + int srcu_idx; param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | DM_ACTIVE_PRESENT_FLAG); @@ -676,26 +695,27 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) param->event_nr = dm_get_event_nr(md); param->target_count = 0; - table = dm_get_live_table(md); + table = dm_get_live_table(md, &srcu_idx); if (table) { if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) { if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; param->target_count = dm_table_get_num_targets(table); } - dm_table_put(table); param->flags |= DM_ACTIVE_PRESENT_FLAG; } + dm_put_live_table(md, srcu_idx); if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) { - table = dm_get_inactive_table(md); + int srcu_idx; + table = dm_get_inactive_table(md, &srcu_idx); if (table) { if (!(dm_table_get_mode(table) & FMODE_WRITE)) param->flags |= DM_READONLY_FLAG; param->target_count = dm_table_get_num_targets(table); - dm_table_put(table); } + dm_put_live_table(md, srcu_idx); } } @@ -796,6 +816,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) struct hash_cell *hc; struct mapped_device *md; int r; + struct dm_table *t; down_write(&_hash_lock); hc = __find_device_hash_cell(param); @@ -819,9 +840,14 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) return r; } - __hash_remove(hc); + t = __hash_remove(hc); up_write(&_hash_lock); + if (t) { + dm_sync_table(md); + dm_table_destroy(t); + } + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; @@ -986,6 +1012,7 @@ static int do_resume(struct dm_ioctl *param) old_map = dm_swap_table(md, new_map); if (IS_ERR(old_map)) { + dm_sync_table(md); dm_table_destroy(new_map); dm_put(md); return PTR_ERR(old_map); @@ -1003,6 +1030,10 @@ static int do_resume(struct dm_ioctl *param) param->flags |= DM_UEVENT_GENERATED_FLAG; } + /* + * Since dm_swap_table synchronizes RCU, nobody should be in + * read-side critical section already. + */ if (old_map) dm_table_destroy(old_map); @@ -1125,6 +1156,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) int r = 0; struct mapped_device *md; struct dm_table *table; + int srcu_idx; md = find_device(param); if (!md) @@ -1145,11 +1177,10 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) */ __dev_status(md, param); - table = dm_get_live_or_inactive_table(md, param); - if (table) { + table = dm_get_live_or_inactive_table(md, param, &srcu_idx); + if (table) retrieve_status(table, param, param_size); - dm_table_put(table); - } + dm_put_live_table(md, srcu_idx); out: dm_put(md); @@ -1221,7 +1252,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) { int r; struct hash_cell *hc; - struct dm_table *t; + struct dm_table *t, *old_map = NULL; struct mapped_device *md; struct target_type *immutable_target_type; @@ -1277,14 +1308,14 @@ static int table_load(struct dm_ioctl *param, size_t param_size) hc = dm_get_mdptr(md); if (!hc || hc->md != md) { DMWARN("device has been removed from the dev hash table."); - dm_table_destroy(t); up_write(&_hash_lock); + dm_table_destroy(t); r = -ENXIO; goto out; } if (hc->new_map) - dm_table_destroy(hc->new_map); + old_map = hc->new_map; hc->new_map = t; up_write(&_hash_lock); @@ -1292,6 +1323,11 @@ static int table_load(struct dm_ioctl *param, size_t param_size) __dev_status(md, param); out: + if (old_map) { + dm_sync_table(md); + dm_table_destroy(old_map); + } + dm_put(md); return r; @@ -1301,6 +1337,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) { struct hash_cell *hc; struct mapped_device *md; + struct dm_table *old_map = NULL; down_write(&_hash_lock); @@ -1312,7 +1349,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) } if (hc->new_map) { - dm_table_destroy(hc->new_map); + old_map = hc->new_map; hc->new_map = NULL; } @@ -1321,6 +1358,10 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) __dev_status(hc->md, param); md = hc->md; up_write(&_hash_lock); + if (old_map) { + dm_sync_table(md); + dm_table_destroy(old_map); + } dm_put(md); return 0; @@ -1370,6 +1411,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; struct dm_table *table; + int srcu_idx; md = find_device(param); if (!md) @@ -1377,11 +1419,10 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) __dev_status(md, param); - table = dm_get_live_or_inactive_table(md, param); - if (table) { + table = dm_get_live_or_inactive_table(md, param, &srcu_idx); + if (table) retrieve_deps(table, param, param_size); - dm_table_put(table); - } + dm_put_live_table(md, srcu_idx); dm_put(md); @@ -1396,6 +1437,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; struct dm_table *table; + int srcu_idx; md = find_device(param); if (!md) @@ -1403,11 +1445,10 @@ static int table_status(struct dm_ioctl *param, size_t param_size) __dev_status(md, param); - table = dm_get_live_or_inactive_table(md, param); - if (table) { + table = dm_get_live_or_inactive_table(md, param, &srcu_idx); + if (table) retrieve_status(table, param, param_size); - dm_table_put(table); - } + dm_put_live_table(md, srcu_idx); dm_put(md); @@ -1443,6 +1484,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_target_msg *tmsg = (void *) param + param->data_start; size_t maxlen; char *result = get_result_buffer(param, param_size, &maxlen); + int srcu_idx; md = find_device(param); if (!md) @@ -1470,9 +1512,9 @@ static int target_message(struct dm_ioctl *param, size_t param_size) if (r <= 1) goto out_argv; - table = dm_get_live_table(md); + table = dm_get_live_table(md, &srcu_idx); if (!table) - goto out_argv; + goto out_table; if (dm_deleting_md(md)) { r = -ENXIO; @@ -1491,7 +1533,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size) } out_table: - dm_table_put(table); + dm_put_live_table(md, srcu_idx); out_argv: kfree(argv); out: diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1ff252ab7d46..f221812b7dbc 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -26,22 +26,8 @@ #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) -/* - * The table has always exactly one reference from either mapped_device->map - * or hash_cell->new_map. This reference is not counted in table->holders. - * A pair of dm_create_table/dm_destroy_table functions is used for table - * creation/destruction. - * - * Temporary references from the other code increase table->holders. A pair - * of dm_table_get/dm_table_put functions is used to manipulate it. - * - * When the table is about to be destroyed, we wait for table->holders to - * drop to zero. - */ - struct dm_table { struct mapped_device *md; - atomic_t holders; unsigned type; /* btree table */ @@ -208,7 +194,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); - atomic_set(&t->holders, 0); if (!num_targets) num_targets = KEYS_PER_NODE; @@ -246,10 +231,6 @@ void dm_table_destroy(struct dm_table *t) if (!t) return; - while (atomic_read(&t->holders)) - msleep(1); - smp_mb(); - /* free the indexes */ if (t->depth >= 2) vfree(t->index[t->depth - 2]); @@ -274,22 +255,6 @@ void dm_table_destroy(struct dm_table *t) kfree(t); } -void dm_table_get(struct dm_table *t) -{ - atomic_inc(&t->holders); -} -EXPORT_SYMBOL(dm_table_get); - -void dm_table_put(struct dm_table *t) -{ - if (!t) - return; - - smp_mb__before_atomic_dec(); - atomic_dec(&t->holders); -} -EXPORT_SYMBOL(dm_table_put); - /* * Checks to see if we need to extend highs or targets. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 33f20103d8d5..ecff83f5b53a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -116,13 +116,20 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_NOFLUSH_SUSPENDING 5 #define DMF_MERGE_IS_OPTIONAL 6 +/* + * A dummy definition to make RCU happy. + * struct dm_table should never be dereferenced in this file. + */ +struct dm_table { + int undefined__; +}; + /* * Work processed by per-device workqueue. */ struct mapped_device { - struct rw_semaphore io_lock; + struct srcu_struct io_barrier; struct mutex suspend_lock; - rwlock_t map_lock; atomic_t holders; atomic_t open_count; @@ -156,6 +163,8 @@ struct mapped_device { /* * The current mapping. + * Use dm_get_live_table{_fast} or take suspend_lock for + * dereference. */ struct dm_table *map; @@ -386,12 +395,14 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct mapped_device *md = bdev->bd_disk->private_data; + int srcu_idx; struct dm_table *map; struct dm_target *tgt; int r = -ENOTTY; retry: - map = dm_get_live_table(md); + map = dm_get_live_table(md, &srcu_idx); + if (!map || !dm_table_get_size(map)) goto out; @@ -410,7 +421,7 @@ retry: r = tgt->type->ioctl(tgt, cmd, arg); out: - dm_table_put(map); + dm_put_live_table(md, srcu_idx); if (r == -ENOTCONN) { msleep(10); @@ -509,20 +520,39 @@ static void queue_io(struct mapped_device *md, struct bio *bio) /* * Everyone (including functions in this file), should use this * function to access the md->map field, and make sure they call - * dm_table_put() when finished. + * dm_put_live_table() when finished. */ -struct dm_table *dm_get_live_table(struct mapped_device *md) +struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier) { - struct dm_table *t; - unsigned long flags; + *srcu_idx = srcu_read_lock(&md->io_barrier); + + return srcu_dereference(md->map, &md->io_barrier); +} - read_lock_irqsave(&md->map_lock, flags); - t = md->map; - if (t) - dm_table_get(t); - read_unlock_irqrestore(&md->map_lock, flags); +void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier) +{ + srcu_read_unlock(&md->io_barrier, srcu_idx); +} + +void dm_sync_table(struct mapped_device *md) +{ + synchronize_srcu(&md->io_barrier); + synchronize_rcu_expedited(); +} + +/* + * A fast alternative to dm_get_live_table/dm_put_live_table. + * The caller must not block between these two functions. + */ +static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU) +{ + rcu_read_lock(); + return rcu_dereference(md->map); +} - return t; +static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) +{ + rcu_read_unlock(); } /* @@ -1356,17 +1386,18 @@ static int __split_and_process_non_flush(struct clone_info *ci) /* * Entry point to split a bio into clones and submit them to the targets. */ -static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) +static void __split_and_process_bio(struct mapped_device *md, + struct dm_table *map, struct bio *bio) { struct clone_info ci; int error = 0; - ci.map = dm_get_live_table(md); - if (unlikely(!ci.map)) { + if (unlikely(!map)) { bio_io_error(bio); return; } + ci.map = map; ci.md = md; ci.io = alloc_io(md); ci.io->error = 0; @@ -1393,7 +1424,6 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) /* drop the extra reference count */ dec_pending(ci.io, error); - dm_table_put(ci.map); } /*----------------------------------------------------------------- * CRUD END @@ -1404,7 +1434,7 @@ static int dm_merge_bvec(struct request_queue *q, struct bio_vec *biovec) { struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; sector_t max_sectors; int max_size = 0; @@ -1414,7 +1444,7 @@ static int dm_merge_bvec(struct request_queue *q, ti = dm_table_find_target(map, bvm->bi_sector); if (!dm_target_is_valid(ti)) - goto out_table; + goto out; /* * Find maximum amount of I/O that won't need splitting @@ -1443,10 +1473,8 @@ static int dm_merge_bvec(struct request_queue *q, max_size = 0; -out_table: - dm_table_put(map); - out: + dm_put_live_table_fast(md); /* * Always allow an entire first page */ @@ -1465,8 +1493,10 @@ static void _dm_request(struct request_queue *q, struct bio *bio) int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; int cpu; + int srcu_idx; + struct dm_table *map; - down_read(&md->io_lock); + map = dm_get_live_table(md, &srcu_idx); cpu = part_stat_lock(); part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); @@ -1475,7 +1505,7 @@ static void _dm_request(struct request_queue *q, struct bio *bio) /* if we're suspended, we have to queue this io for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { - up_read(&md->io_lock); + dm_put_live_table(md, srcu_idx); if (bio_rw(bio) != READA) queue_io(md, bio); @@ -1484,8 +1514,8 @@ static void _dm_request(struct request_queue *q, struct bio *bio) return; } - __split_and_process_bio(md, bio); - up_read(&md->io_lock); + __split_and_process_bio(md, map, bio); + dm_put_live_table(md, srcu_idx); return; } @@ -1671,7 +1701,8 @@ static struct request *dm_start_request(struct mapped_device *md, struct request static void dm_request_fn(struct request_queue *q) { struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + int srcu_idx; + struct dm_table *map = dm_get_live_table(md, &srcu_idx); struct dm_target *ti; struct request *rq, *clone; sector_t pos; @@ -1726,7 +1757,7 @@ requeued: delay_and_out: blk_delay_queue(q, HZ / 10); out: - dm_table_put(map); + dm_put_live_table(md, srcu_idx); } int dm_underlying_device_busy(struct request_queue *q) @@ -1739,14 +1770,14 @@ static int dm_lld_busy(struct request_queue *q) { int r; struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + struct dm_table *map = dm_get_live_table_fast(md); if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) r = 1; else r = dm_table_any_busy_target(map); - dm_table_put(map); + dm_put_live_table_fast(md); return r; } @@ -1758,7 +1789,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) struct dm_table *map; if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { - map = dm_get_live_table(md); + map = dm_get_live_table_fast(md); if (map) { /* * Request-based dm cares about only own queue for @@ -1769,9 +1800,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits) bdi_bits; else r = dm_table_any_congested(map, bdi_bits); - - dm_table_put(map); } + dm_put_live_table_fast(md); } return r; @@ -1876,12 +1906,14 @@ static struct mapped_device *alloc_dev(int minor) if (r < 0) goto bad_minor; + r = init_srcu_struct(&md->io_barrier); + if (r < 0) + goto bad_io_barrier; + md->type = DM_TYPE_NONE; - init_rwsem(&md->io_lock); mutex_init(&md->suspend_lock); mutex_init(&md->type_lock); spin_lock_init(&md->deferred_lock); - rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); @@ -1944,6 +1976,8 @@ bad_thread: bad_disk: blk_cleanup_queue(md->queue); bad_queue: + cleanup_srcu_struct(&md->io_barrier); +bad_io_barrier: free_minor(minor); bad_minor: module_put(THIS_MODULE); @@ -1967,6 +2001,7 @@ static void free_dev(struct mapped_device *md) bioset_free(md->bs); blk_integrity_unregister(md->disk); del_gendisk(md->disk); + cleanup_srcu_struct(&md->io_barrier); free_minor(minor); spin_lock(&_minor_lock); @@ -2109,7 +2144,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct dm_table *old_map; struct request_queue *q = md->queue; sector_t size; - unsigned long flags; int merge_is_optional; size = dm_table_get_size(t); @@ -2138,9 +2172,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, merge_is_optional = dm_table_merge_is_optional(t); - write_lock_irqsave(&md->map_lock, flags); old_map = md->map; - md->map = t; + rcu_assign_pointer(md->map, t); md->immutable_target_type = dm_table_get_immutable_target_type(t); dm_table_set_restrictions(t, q, limits); @@ -2148,7 +2181,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); else clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); - write_unlock_irqrestore(&md->map_lock, flags); + dm_sync_table(md); return old_map; } @@ -2159,15 +2192,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, static struct dm_table *__unbind(struct mapped_device *md) { struct dm_table *map = md->map; - unsigned long flags; if (!map) return NULL; dm_table_event_callback(map, NULL, NULL); - write_lock_irqsave(&md->map_lock, flags); - md->map = NULL; - write_unlock_irqrestore(&md->map_lock, flags); + rcu_assign_pointer(md->map, NULL); + dm_sync_table(md); return map; } @@ -2319,11 +2350,12 @@ EXPORT_SYMBOL_GPL(dm_device_name); static void __dm_destroy(struct mapped_device *md, bool wait) { struct dm_table *map; + int srcu_idx; might_sleep(); spin_lock(&_minor_lock); - map = dm_get_live_table(md); + map = dm_get_live_table(md, &srcu_idx); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); @@ -2333,6 +2365,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait) dm_table_postsuspend_targets(map); } + /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ + dm_put_live_table(md, srcu_idx); + /* * Rare, but there may be I/O requests still going to complete, * for example. Wait for all references to disappear. @@ -2347,7 +2382,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) dm_device_name(md), atomic_read(&md->holders)); dm_sysfs_exit(md); - dm_table_put(map); dm_table_destroy(__unbind(md)); free_dev(md); } @@ -2404,8 +2438,10 @@ static void dm_wq_work(struct work_struct *work) struct mapped_device *md = container_of(work, struct mapped_device, work); struct bio *c; + int srcu_idx; + struct dm_table *map; - down_read(&md->io_lock); + map = dm_get_live_table(md, &srcu_idx); while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { spin_lock_irq(&md->deferred_lock); @@ -2415,17 +2451,13 @@ static void dm_wq_work(struct work_struct *work) if (!c) break; - up_read(&md->io_lock); - if (dm_request_based(md)) generic_make_request(c); else - __split_and_process_bio(md, c); - - down_read(&md->io_lock); + __split_and_process_bio(md, map, c); } - up_read(&md->io_lock); + dm_put_live_table(md, srcu_idx); } static void dm_queue_flush(struct mapped_device *md) @@ -2457,10 +2489,10 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) * reappear. */ if (dm_table_has_no_data_devices(table)) { - live_map = dm_get_live_table(md); + live_map = dm_get_live_table_fast(md); if (live_map) limits = md->queue->limits; - dm_table_put(live_map); + dm_put_live_table_fast(md); } if (!live_map) { @@ -2540,7 +2572,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) goto out_unlock; } - map = dm_get_live_table(md); + map = md->map; /* * DMF_NOFLUSH_SUSPENDING must be set before presuspend. @@ -2561,7 +2593,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) if (!noflush && do_lockfs) { r = lock_fs(md); if (r) - goto out; + goto out_unlock; } /* @@ -2576,9 +2608,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call * flush_workqueue(md->wq). */ - down_write(&md->io_lock); set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); - up_write(&md->io_lock); + synchronize_srcu(&md->io_barrier); /* * Stop md->queue before flushing md->wq in case request-based @@ -2596,10 +2627,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) */ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); - down_write(&md->io_lock); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - up_write(&md->io_lock); + synchronize_srcu(&md->io_barrier); /* were we interrupted ? */ if (r < 0) { @@ -2609,7 +2639,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) start_queue(md->queue); unlock_fs(md); - goto out; /* pushback list is already flushed, so skip flush */ + goto out_unlock; /* pushback list is already flushed, so skip flush */ } /* @@ -2622,9 +2652,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) dm_table_postsuspend_targets(map); -out: - dm_table_put(map); - out_unlock: mutex_unlock(&md->suspend_lock); return r; @@ -2639,7 +2666,7 @@ int dm_resume(struct mapped_device *md) if (!dm_suspended_md(md)) goto out; - map = dm_get_live_table(md); + map = md->map; if (!map || !dm_table_get_size(map)) goto out; @@ -2663,7 +2690,6 @@ int dm_resume(struct mapped_device *md) r = 0; out: - dm_table_put(map); mutex_unlock(&md->suspend_lock); return r; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3cd32478f2fd..e151d4c9298d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -446,9 +446,9 @@ int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len); /* * Table reference counting. */ -struct dm_table *dm_get_live_table(struct mapped_device *md); -void dm_table_get(struct dm_table *t); -void dm_table_put(struct dm_table *t); +struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx); +void dm_put_live_table(struct mapped_device *md, int srcu_idx); +void dm_sync_table(struct mapped_device *md); /* * Queries diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 7e75b6fd8d45..afd0cbd52edb 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 24 +#define DM_VERSION_MINOR 25 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2013-01-15)" +#define DM_VERSION_EXTRA "-ioctl (2013-06-26)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.3-14-g43fede