aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/ndisc.c13
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/batman-adv/Makefile5
-rw-r--r--net/batman-adv/bat_algo.c140
-rw-r--r--net/batman-adv/bat_algo.h35
-rw-r--r--net/batman-adv/bat_iv_ogm.c40
-rw-r--r--net/batman-adv/bat_iv_ogm.h25
-rw-r--r--net/batman-adv/bat_v.c29
-rw-r--r--net/batman-adv/bat_v.h52
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/bat_v_elp.h4
-rw-r--r--net/batman-adv/bat_v_ogm.c5
-rw-r--r--net/batman-adv/bat_v_ogm.h4
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c1
-rw-r--r--net/batman-adv/debugfs.c217
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c41
-rw-r--r--net/batman-adv/fragmentation.h6
-rw-r--r--net/batman-adv/gateway_client.c1
-rw-r--r--net/batman-adv/gateway_common.c4
-rw-r--r--net/batman-adv/hard-interface.c21
-rw-r--r--net/batman-adv/icmp_socket.c1
-rw-r--r--net/batman-adv/log.c231
-rw-r--r--net/batman-adv/log.h111
-rw-r--r--net/batman-adv/main.c707
-rw-r--r--net/batman-adv/main.h121
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/netlink.c424
-rw-r--r--net/batman-adv/netlink.h32
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/originator.c51
-rw-r--r--net/batman-adv/packet.h54
-rw-r--r--net/batman-adv/routing.c43
-rw-r--r--net/batman-adv/send.c39
-rw-r--r--net/batman-adv/soft-interface.c3
-rw-r--r--net/batman-adv/sysfs.c7
-rw-r--r--net/batman-adv/tp_meter.c1507
-rw-r--r--net/batman-adv/tp_meter.h34
-rw-r--r--net/batman-adv/translation-table.c8
-rw-r--r--net/batman-adv/tvlv.c632
-rw-r--r--net/batman-adv/tvlv.h61
-rw-r--r--net/batman-adv/types.h226
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_core.c52
-rw-r--r--net/bluetooth/hci_debugfs.c35
-rw-r--r--net/bluetooth/hci_event.c18
-rw-r--r--net/bluetooth/hci_sock.c7
-rw-r--r--net/bluetooth/hci_sysfs.c99
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/mgmt.c18
-rw-r--r--net/bluetooth/smp.c67
-rw-r--r--net/bridge/br_device.c24
-rw-r--r--net/bridge/br_forward.c202
-rw-r--r--net/bridge/br_input.c64
-rw-r--r--net/bridge/br_multicast.c48
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/br_private.h34
-rw-r--r--net/bridge/netfilter/ebt_802_3.c6
-rw-r--r--net/bridge/netfilter/ebt_arp.c43
-rw-r--r--net/bridge/netfilter/ebt_ip.c28
-rw-r--r--net/bridge/netfilter/ebt_ip6.c41
-rw-r--r--net/bridge/netfilter/ebt_stp.c97
-rw-r--r--net/bridge/netfilter/ebtables.c32
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c8
-rw-r--r--net/core/dev.c127
-rw-r--r--net/core/devlink.c4
-rw-r--r--net/core/drop_monitor.c3
-rw-r--r--net/core/filter.c141
-rw-r--r--net/core/flow_dissector.c43
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c42
-rw-r--r--net/core/rtnetlink.c68
-rw-r--r--net/core/skbuff.c18
-rw-r--r--net/decnet/dn_fib.c21
-rw-r--r--net/dsa/dsa.c6
-rw-r--r--net/dsa/dsa2.c9
-rw-r--r--net/dsa/slave.c41
-rw-r--r--net/ieee802154/6lowpan/core.c5
-rw-r--r--net/ieee802154/6lowpan/rx.c2
-rw-r--r--net/ieee802154/core.c70
-rw-r--r--net/ieee802154/core.h2
-rw-r--r--net/ieee802154/nl802154.c54
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ip_tunnel_core.c9
-rw-r--r--net/ipv4/ipip.c137
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/netfilter/arp_tables.c41
-rw-r--r--net/ipv4/netfilter/ip_tables.c20
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c4
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/tcp_timer.c81
-rw-r--r--net/ipv4/tunnel4.c72
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6mr.c26
-rw-r--r--net/ipv6/netfilter/ip6_tables.c16
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/sit.c93
-rw-r--r--net/mac80211/agg-rx.c18
-rw-r--r--net/mac80211/cfg.c1
-rw-r--r--net/mac80211/ieee80211_i.h1
-rw-r--r--net/mac80211/mesh.c20
-rw-r--r--net/mac80211/mesh_plink.c16
-rw-r--r--net/mac80211/rx.c7
-rw-r--r--net/mac80211/scan.c42
-rw-r--r--net/mac80211/spectmgmt.c45
-rw-r--r--net/mac80211/tdls.c1
-rw-r--r--net/mac80211/tx.c10
-rw-r--r--net/mpls/af_mpls.c6
-rw-r--r--net/ncsi/Kconfig12
-rw-r--r--net/ncsi/Makefile4
-rw-r--r--net/ncsi/internal.h328
-rw-r--r--net/ncsi/ncsi-aen.c193
-rw-r--r--net/ncsi/ncsi-cmd.c367
-rw-r--r--net/ncsi/ncsi-manage.c1199
-rw-r--r--net/ncsi/ncsi-pkt.h415
-rw-r--r--net/ncsi/ncsi-rsp.c1035
-rw-r--r--net/netfilter/Kconfig10
-rw-r--r--net/netfilter/nf_conntrack_core.c76
-rw-r--r--net/netfilter/nf_conntrack_helper.c61
-rw-r--r--net/netfilter/nf_conntrack_standalone.c36
-rw-r--r--net/netfilter/nf_log.c33
-rw-r--r--net/netfilter/nf_tables_api.c366
-rw-r--r--net/netfilter/nfnetlink_log.c9
-rw-r--r--net/netfilter/nft_dynset.c7
-rw-r--r--net/netfilter/nft_hash.c6
-rw-r--r--net/netfilter/nft_log.c21
-rw-r--r--net/netfilter/nft_lookup.c43
-rw-r--r--net/netfilter/nft_meta.c9
-rw-r--r--net/netfilter/nft_rbtree.c2
-rw-r--r--net/netfilter/x_tables.c3
-rw-r--r--net/netfilter/xt_NFLOG.c3
-rw-r--r--net/netfilter/xt_TRACE.c25
-rw-r--r--net/netfilter/xt_owner.c41
-rw-r--r--net/netfilter/xt_tcpudp.c7
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rds/bind.c6
-rw-r--r--net/rds/connection.c17
-rw-r--r--net/rds/message.c1
-rw-r--r--net/rds/rds.h25
-rw-r--r--net/rds/recv.c75
-rw-r--r--net/rds/send.c71
-rw-r--r--net/rds/tcp.c36
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_connect.c7
-rw-r--r--net/rds/tcp_listen.c65
-rw-r--r--net/rds/tcp_send.c18
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/ar-internal.h156
-rw-r--r--net/rxrpc/call_accept.c38
-rw-r--r--net/rxrpc/call_event.c14
-rw-r--r--net/rxrpc/call_object.c273
-rw-r--r--net/rxrpc/conn_client.c282
-rw-r--r--net/rxrpc/conn_event.c44
-rw-r--r--net/rxrpc/conn_object.c636
-rw-r--r--net/rxrpc/conn_service.c230
-rw-r--r--net/rxrpc/input.c71
-rw-r--r--net/rxrpc/insecure.c7
-rw-r--r--net/rxrpc/local_object.c19
-rw-r--r--net/rxrpc/peer_object.c2
-rw-r--r--net/rxrpc/proc.c23
-rw-r--r--net/rxrpc/rxkad.c191
-rw-r--r--net/rxrpc/utils.c37
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_skbedit.c26
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sctp/associola.c1
-rw-r--r--net/sctp/chunk.c25
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/input.c12
-rw-r--r--net/sctp/inqueue.c9
-rw-r--r--net/sctp/ipv6.c9
-rw-r--r--net/sctp/offload.c23
-rw-r--r--net/sctp/output.c20
-rw-r--r--net/sctp/outqueue.c99
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/sm_make_chunk.c47
-rw-r--r--net/sctp/sm_statefuns.c9
-rw-r--r--net/sctp/socket.c282
-rw-r--r--net/sctp/ulpevent.c14
-rw-r--r--net/switchdev/switchdev.c5
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/wireless/core.c4
-rw-r--r--net/wireless/core.h12
-rw-r--r--net/wireless/nl80211.c173
-rw-r--r--net/wireless/scan.c18
-rw-r--r--net/wireless/trace.h33
199 files changed, 11249 insertions, 3547 deletions
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index ae1d4199aa4c..86450b7e2899 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -47,6 +47,9 @@ static int lowpan_ndisc_parse_options(const struct net_device *dev,
struct nd_opt_hdr *nd_opt,
struct ndisc_options *ndopts)
{
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
@@ -94,10 +97,13 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
}
write_lock_bh(&n->lock);
- if (lladdr_short)
+ if (lladdr_short) {
ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
- else
+ if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr))
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ } else {
neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ }
write_unlock_bh(&n->lock);
}
@@ -135,8 +141,9 @@ static int lowpan_ndisc_opt_addr_space(const struct net_device *dev,
read_unlock_bh(&neigh->lock);
addr_space += __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
*ha = ha_buf;
+ } else {
+ read_unlock_bh(&neigh->lock);
}
- read_unlock_bh(&neigh->lock);
break;
case NDISC_NEIGHBOUR_ADVERTISEMENT:
case NDISC_NEIGHBOUR_SOLICITATION:
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86ae75b77390..c8f422c90856 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
+ .ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
+ .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = switchdev_port_fdb_add,
.ndo_fdb_del = switchdev_port_fdb_del,
.ndo_fdb_dump = switchdev_port_fdb_dump,
diff --git a/net/Kconfig b/net/Kconfig
index ff40562a782c..c2cdbce629bd 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -237,6 +237,7 @@ source "net/hsr/Kconfig"
source "net/switchdev/Kconfig"
source "net/l3mdev/Kconfig"
source "net/qrtr/Kconfig"
+source "net/ncsi/Kconfig"
config RPS
bool
diff --git a/net/Makefile b/net/Makefile
index bdd14553a774..9bd20bb86cc6 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -79,3 +79,4 @@ ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
obj-y += l3mdev/
endif
obj-$(CONFIG_QRTR) += qrtr/
+obj-$(CONFIG_NET_NCSI) += ncsi/
diff --git a/net/atm/clip.c b/net/atm/clip.c
index e07f551a863c..53b4ac09e7b7 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = {
.connected_output = neigh_direct_output,
};
-static int clip_constructor(struct neighbour *neigh)
+static int clip_constructor(struct net_device *dev, struct neighbour *neigh)
{
struct atmarp_entry *entry = neighbour_priv(neigh);
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 797cf2fc88c1..a83fc6c58d19 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -17,6 +17,7 @@
#
obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
+batman-adv-y += bat_algo.o
batman-adv-y += bat_iv_ogm.o
batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o
batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
@@ -31,12 +32,16 @@ batman-adv-y += gateway_common.o
batman-adv-y += hard-interface.o
batman-adv-y += hash.o
batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-y += netlink.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
batman-adv-y += routing.o
batman-adv-y += send.o
batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
+batman-adv-y += tp_meter.o
batman-adv-y += translation-table.o
+batman-adv-y += tvlv.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
new file mode 100644
index 000000000000..81dbbf569bd4
--- /dev/null
+++ b/net/batman-adv/bat_algo.c
@@ -0,0 +1,140 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+
+#include "bat_algo.h"
+
+char batadv_routing_algo[20] = "BATMAN_IV";
+static struct hlist_head batadv_algo_list;
+
+/**
+ * batadv_algo_init - Initialize batman-adv algorithm management data structures
+ */
+void batadv_algo_init(void)
+{
+ INIT_HLIST_HEAD(&batadv_algo_list);
+}
+
+static struct batadv_algo_ops *batadv_algo_get(char *name)
+{
+ struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
+
+ hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
+ if (strcmp(bat_algo_ops_tmp->name, name) != 0)
+ continue;
+
+ bat_algo_ops = bat_algo_ops_tmp;
+ break;
+ }
+
+ return bat_algo_ops;
+}
+
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
+{
+ struct batadv_algo_ops *bat_algo_ops_tmp;
+
+ bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
+ if (bat_algo_ops_tmp) {
+ pr_info("Trying to register already registered routing algorithm: %s\n",
+ bat_algo_ops->name);
+ return -EEXIST;
+ }
+
+ /* all algorithms must implement all ops (for now) */
+ if (!bat_algo_ops->iface.enable ||
+ !bat_algo_ops->iface.disable ||
+ !bat_algo_ops->iface.update_mac ||
+ !bat_algo_ops->iface.primary_set ||
+ !bat_algo_ops->neigh.cmp ||
+ !bat_algo_ops->neigh.is_similar_or_better) {
+ pr_info("Routing algo '%s' does not implement required ops\n",
+ bat_algo_ops->name);
+ return -EINVAL;
+ }
+
+ INIT_HLIST_NODE(&bat_algo_ops->list);
+ hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
+
+ return 0;
+}
+
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
+{
+ struct batadv_algo_ops *bat_algo_ops;
+
+ bat_algo_ops = batadv_algo_get(name);
+ if (!bat_algo_ops)
+ return -EINVAL;
+
+ bat_priv->algo_ops = bat_algo_ops;
+
+ return 0;
+}
+
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
+{
+ struct batadv_algo_ops *bat_algo_ops;
+
+ seq_puts(seq, "Available routing algorithms:\n");
+
+ hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+ seq_printf(seq, " * %s\n", bat_algo_ops->name);
+ }
+
+ return 0;
+}
+
+static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
+{
+ struct batadv_algo_ops *bat_algo_ops;
+ char *algo_name = (char *)val;
+ size_t name_len = strlen(algo_name);
+
+ if (name_len > 0 && algo_name[name_len - 1] == '\n')
+ algo_name[name_len - 1] = '\0';
+
+ bat_algo_ops = batadv_algo_get(algo_name);
+ if (!bat_algo_ops) {
+ pr_err("Routing algorithm '%s' is not supported\n", algo_name);
+ return -EINVAL;
+ }
+
+ return param_set_copystring(algo_name, kp);
+}
+
+static const struct kernel_param_ops batadv_param_ops_ra = {
+ .set = batadv_param_set_ra,
+ .get = param_get_string,
+};
+
+static struct kparam_string batadv_param_string_ra = {
+ .maxlen = sizeof(batadv_routing_algo),
+ .string = batadv_routing_algo,
+};
+
+module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
+ 0644);
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 36542962de7d..860d773dd8fa 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -20,35 +20,16 @@
#include "main.h"
-int batadv_iv_init(void);
+#include <linux/types.h>
-#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+struct seq_file;
-int batadv_v_init(void);
-void batadv_v_hardif_init(struct batadv_hard_iface *hardif);
-int batadv_v_mesh_init(struct batadv_priv *bat_priv);
-void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+extern char batadv_routing_algo[];
+extern struct list_head batadv_hardif_list;
-#else
-
-static inline int batadv_v_init(void)
-{
- return 0;
-}
-
-static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif)
-{
-}
-
-static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
-{
- return 0;
-}
-
-static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
-{
-}
-
-#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+void batadv_algo_init(void);
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 4815db978c27..19b0abd6c640 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -15,7 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "bat_algo.h"
+#include "bat_iv_ogm.h"
#include "main.h"
#include <linux/atomic.h>
@@ -31,8 +31,8 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
-#include <linux/list.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
@@ -49,15 +49,18 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "bitarray.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
#include "translation-table.h"
+#include "tvlv.h"
static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work);
@@ -1850,8 +1853,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
/* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
* that does not have B.A.T.M.A.N. IV enabled ?
*/
- if (bat_priv->bat_algo_ops->bat_iface_enable !=
- batadv_iv_ogm_iface_enable)
+ if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
return NET_RX_DROP;
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
@@ -2117,18 +2119,24 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.name = "BATMAN_IV",
- .bat_iface_activate = batadv_iv_iface_activate,
- .bat_iface_enable = batadv_iv_ogm_iface_enable,
- .bat_iface_disable = batadv_iv_ogm_iface_disable,
- .bat_iface_update_mac = batadv_iv_ogm_iface_update_mac,
- .bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
- .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
- .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
- .bat_neigh_print = batadv_iv_neigh_print,
- .bat_orig_print = batadv_iv_ogm_orig_print,
- .bat_orig_free = batadv_iv_ogm_orig_free,
- .bat_orig_add_if = batadv_iv_ogm_orig_add_if,
- .bat_orig_del_if = batadv_iv_ogm_orig_del_if,
+ .iface = {
+ .activate = batadv_iv_iface_activate,
+ .enable = batadv_iv_ogm_iface_enable,
+ .disable = batadv_iv_ogm_iface_disable,
+ .update_mac = batadv_iv_ogm_iface_update_mac,
+ .primary_set = batadv_iv_ogm_primary_iface_set,
+ },
+ .neigh = {
+ .cmp = batadv_iv_ogm_neigh_cmp,
+ .is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+ .print = batadv_iv_neigh_print,
+ },
+ .orig = {
+ .print = batadv_iv_ogm_orig_print,
+ .free = batadv_iv_ogm_orig_free,
+ .add_if = batadv_iv_ogm_orig_add_if,
+ .del_if = batadv_iv_ogm_orig_del_if,
+ },
};
int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
new file mode 100644
index 000000000000..b9f3550faaf7
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_
+#define _BATMAN_ADV_BATADV_IV_OGM_H_
+
+#include "main.h"
+
+int batadv_iv_init(void);
+
+#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index c2fea812fb48..0366cbf5e444 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -15,7 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "bat_algo.h"
+#include "bat_v.h"
#include "main.h"
#include <linux/atomic.h>
@@ -31,6 +31,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
#include "hard-interface.h"
@@ -321,16 +322,22 @@ err_ifinfo1:
static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.name = "BATMAN_V",
- .bat_iface_activate = batadv_v_iface_activate,
- .bat_iface_enable = batadv_v_iface_enable,
- .bat_iface_disable = batadv_v_iface_disable,
- .bat_iface_update_mac = batadv_v_iface_update_mac,
- .bat_primary_iface_set = batadv_v_primary_iface_set,
- .bat_hardif_neigh_init = batadv_v_hardif_neigh_init,
- .bat_orig_print = batadv_v_orig_print,
- .bat_neigh_cmp = batadv_v_neigh_cmp,
- .bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob,
- .bat_neigh_print = batadv_v_neigh_print,
+ .iface = {
+ .activate = batadv_v_iface_activate,
+ .enable = batadv_v_iface_enable,
+ .disable = batadv_v_iface_disable,
+ .update_mac = batadv_v_iface_update_mac,
+ .primary_set = batadv_v_primary_iface_set,
+ },
+ .neigh = {
+ .hardif_init = batadv_v_hardif_neigh_init,
+ .cmp = batadv_v_neigh_cmp,
+ .is_similar_or_better = batadv_v_neigh_is_sob,
+ .print = batadv_v_neigh_print,
+ },
+ .orig = {
+ .print = batadv_v_orig_print,
+ },
};
/**
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
new file mode 100644
index 000000000000..83b77639729e
--- /dev/null
+++ b/net/batman-adv/bat_v.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Linus Lüssing
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_BAT_V_H_
+#define _NET_BATMAN_ADV_BAT_V_H_
+
+#include "main.h"
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+
+int batadv_v_init(void);
+void batadv_v_hardif_init(struct batadv_hard_iface *hardif);
+int batadv_v_mesh_init(struct batadv_priv *bat_priv);
+void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_v_init(void)
+{
+ return 0;
+}
+
+static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif)
+{
+}
+
+static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
+{
+}
+
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+#endif /* _NET_BATMAN_ADV_BAT_V_H_ */
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index cf0262becd08..7d170010beb9 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -43,6 +43,7 @@
#include "bat_algo.h"
#include "bat_v_ogm.h"
#include "hard-interface.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
@@ -503,7 +504,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
/* did we receive a B.A.T.M.A.N. V ELP packet on an interface
* that does not have B.A.T.M.A.N. V ELP enabled ?
*/
- if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
return NET_RX_DROP;
elp_packet = (struct batadv_elp_packet *)skb->data;
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index cc130b2d05e5..be17c0b1369e 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -15,11 +15,11 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
-
#ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
#define _NET_BATMAN_ADV_BAT_V_ELP_H_
+#include "main.h"
+
struct sk_buff;
struct work_struct;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 23ea9bfb9f67..6fbba4eb0617 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -39,13 +39,16 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
#include "translation-table.h"
+#include "tvlv.h"
/**
* batadv_v_ogm_orig_get - retrieve and possibly create an originator node
@@ -751,7 +754,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
/* did we receive a OGM2 packet on an interface that does not have
* B.A.T.M.A.N. V enabled ?
*/
- if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
return NET_RX_DROP;
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index d849c75ada0e..4c4d45caa422 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -18,10 +18,10 @@
#ifndef _BATMAN_ADV_BATADV_V_OGM_H_
#define _BATMAN_ADV_BATADV_V_OGM_H_
+#include "main.h"
+
#include <linux/types.h>
-struct batadv_hard_iface;
-struct batadv_priv;
struct sk_buff;
int batadv_v_ogm_init(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index a0c7913837a5..032271421a20 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -20,6 +20,8 @@
#include <linux/bitmap.h>
+#include "log.h"
+
/* shift the packet array by n places. */
static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
{
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 748a9ead7ce5..e4f7494fb974 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -48,6 +48,7 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "sysfs.h"
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index f187a8ff2184..1d68b6e63b96 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -18,36 +18,26 @@
#include "debugfs.h"
#include "main.h"
-#include <linux/compiler.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/export.h>
-#include <linux/fcntl.h>
#include <linux/fs.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/netdevice.h>
-#include <linux/poll.h>
#include <linux/printk.h>
#include <linux/sched.h> /* for linux/wait.h */
#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/wait.h>
-#include <stdarg.h>
+#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "icmp_socket.h"
+#include "log.h"
#include "multicast.h"
#include "network-coding.h"
#include "originator.h"
@@ -55,209 +45,6 @@
static struct dentry *batadv_debugfs;
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
-
-static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
-
-static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
- size_t idx)
-{
- return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
-}
-
-static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
- char c)
-{
- char *char_addr;
-
- char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
- *char_addr = c;
- debug_log->log_end++;
-
- if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
- debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
-}
-
-__printf(2, 3)
-static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
- const char *fmt, ...)
-{
- va_list args;
- static char debug_log_buf[256];
- char *p;
-
- if (!debug_log)
- return 0;
-
- spin_lock_bh(&debug_log->lock);
- va_start(args, fmt);
- vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
- va_end(args);
-
- for (p = debug_log_buf; *p != 0; p++)
- batadv_emit_log_char(debug_log, *p);
-
- spin_unlock_bh(&debug_log->lock);
-
- wake_up(&debug_log->queue_wait);
-
- return 0;
-}
-
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
- va_list args;
- char tmp_log_buf[256];
-
- va_start(args, fmt);
- vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
- batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
- jiffies_to_msecs(jiffies), tmp_log_buf);
- va_end(args);
-
- return 0;
-}
-
-static int batadv_log_open(struct inode *inode, struct file *file)
-{
- if (!try_module_get(THIS_MODULE))
- return -EBUSY;
-
- nonseekable_open(inode, file);
- file->private_data = inode->i_private;
- return 0;
-}
-
-static int batadv_log_release(struct inode *inode, struct file *file)
-{
- module_put(THIS_MODULE);
- return 0;
-}
-
-static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
-{
- return !(debug_log->log_start - debug_log->log_end);
-}
-
-static ssize_t batadv_log_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct batadv_priv *bat_priv = file->private_data;
- struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
- int error, i = 0;
- char *char_addr;
- char c;
-
- if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
- return -EAGAIN;
-
- if (!buf)
- return -EINVAL;
-
- if (count == 0)
- return 0;
-
- if (!access_ok(VERIFY_WRITE, buf, count))
- return -EFAULT;
-
- error = wait_event_interruptible(debug_log->queue_wait,
- (!batadv_log_empty(debug_log)));
-
- if (error)
- return error;
-
- spin_lock_bh(&debug_log->lock);
-
- while ((!error) && (i < count) &&
- (debug_log->log_start != debug_log->log_end)) {
- char_addr = batadv_log_char_addr(debug_log,
- debug_log->log_start);
- c = *char_addr;
-
- debug_log->log_start++;
-
- spin_unlock_bh(&debug_log->lock);
-
- error = __put_user(c, buf);
-
- spin_lock_bh(&debug_log->lock);
-
- buf++;
- i++;
- }
-
- spin_unlock_bh(&debug_log->lock);
-
- if (!error)
- return i;
-
- return error;
-}
-
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
-{
- struct batadv_priv *bat_priv = file->private_data;
- struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
-
- poll_wait(file, &debug_log->queue_wait, wait);
-
- if (!batadv_log_empty(debug_log))
- return POLLIN | POLLRDNORM;
-
- return 0;
-}
-
-static const struct file_operations batadv_log_fops = {
- .open = batadv_log_open,
- .release = batadv_log_release,
- .read = batadv_log_read,
- .poll = batadv_log_poll,
- .llseek = no_llseek,
-};
-
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
- struct dentry *d;
-
- if (!bat_priv->debug_dir)
- goto err;
-
- bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
- if (!bat_priv->debug_log)
- goto err;
-
- spin_lock_init(&bat_priv->debug_log->lock);
- init_waitqueue_head(&bat_priv->debug_log->queue_wait);
-
- d = debugfs_create_file("log", S_IFREG | S_IRUSR,
- bat_priv->debug_dir, bat_priv,
- &batadv_log_fops);
- if (!d)
- goto err;
-
- return 0;
-
-err:
- return -ENOMEM;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
- kfree(bat_priv->debug_log);
- bat_priv->debug_log = NULL;
-}
-#else /* CONFIG_BATMAN_ADV_DEBUG */
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
- return 0;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
-}
-#endif
-
static int batadv_algorithms_open(struct inode *inode, struct file *file)
{
return single_open(file, batadv_algo_seq_print_text, NULL);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 278800a99c69..fa7646532a13 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -45,9 +45,11 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "send.h"
#include "translation-table.h"
+#include "tvlv.h"
static void batadv_dat_purge(struct work_struct *work);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 9f41a0a0d6ab..0934730fb7ff 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -433,11 +433,12 @@ err:
* @orig_node: final destination of the created fragments
* @neigh_node: next-hop of the created fragments
*
- * Return: true on success, false otherwise.
+ * Return: the netdev tx status or -1 in case of error.
+ * When -1 is returned the skb is not consumed.
*/
-bool batadv_frag_send_packet(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- struct batadv_neigh_node *neigh_node)
+int batadv_frag_send_packet(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node)
{
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
@@ -446,7 +447,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
unsigned int header_size = sizeof(frag_header);
unsigned int max_fragment_size, max_packet_size;
- bool ret = false;
+ int ret = -1;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -457,12 +458,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
/* Don't even try to fragment, if we need more than 16 fragments */
if (skb->len > max_packet_size)
- goto out_err;
+ goto out;
bat_priv = orig_node->bat_priv;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
- goto out_err;
+ goto out;
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -488,23 +489,33 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
while (skb->len > max_fragment_size) {
skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
if (!skb_fragment)
- goto out_err;
+ goto out;
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb_fragment->len + ETH_HLEN);
- batadv_send_unicast_skb(skb_fragment, neigh_node);
+ ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
+ if (ret != NET_XMIT_SUCCESS) {
+ /* return -1 so that the caller can free the original
+ * skb
+ */
+ ret = -1;
+ goto out;
+ }
+
frag_header.no++;
/* The initial check in this function should cover this case */
- if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
- goto out_err;
+ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
+ ret = -1;
+ goto out;
+ }
}
/* Make room for the fragment header. */
if (batadv_skb_head_push(skb, header_size) < 0 ||
pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
- goto out_err;
+ goto out;
memcpy(skb->data, &frag_header, header_size);
@@ -512,11 +523,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb->len + ETH_HLEN);
- batadv_send_unicast_skb(skb, neigh_node);
+ ret = batadv_send_unicast_skb(skb, neigh_node);
- ret = true;
-
-out_err:
+out:
if (primary_if)
batadv_hardif_put(primary_if);
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 9ff77c7ef7c7..3202fe329e63 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -34,9 +34,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src);
bool batadv_frag_skb_buffer(struct sk_buff **skb,
struct batadv_orig_node *orig_node);
-bool batadv_frag_send_packet(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- struct batadv_neigh_node *neigh_node);
+int batadv_frag_send_packet(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node);
/**
* batadv_frag_check_entry - check if a list of fragments has timed out
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 18c3715e5e27..63a805d3f96e 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -42,6 +42,7 @@
#include "gateway_common.h"
#include "hard-interface.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 3c269457776e..d7bc6a87bcc9 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -19,8 +19,8 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/errno.h>
#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/netdevice.h>
@@ -28,7 +28,9 @@
#include <linux/string.h>
#include "gateway_client.h"
+#include "log.h"
#include "packet.h"
+#include "tvlv.h"
/**
* batadv_parse_throughput - parse supplied string buffer to extract throughput
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3696929e5692..1f9080840566 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -23,9 +23,9 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
-#include <linux/if.h>
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -37,11 +37,12 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
-#include "bat_algo.h"
+#include "bat_v.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "send.h"
@@ -246,7 +247,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
if (!new_hard_iface)
goto out;
- bat_priv->bat_algo_ops->bat_primary_iface_set(new_hard_iface);
+ bat_priv->algo_ops->iface.primary_set(new_hard_iface);
batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
out:
@@ -393,7 +394,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
bat_priv = netdev_priv(hard_iface->soft_iface);
- bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+ bat_priv->algo_ops->iface.update_mac(hard_iface);
hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED;
/* the first active interface becomes our primary interface or
@@ -408,8 +409,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
batadv_update_min_mtu(hard_iface->soft_iface);
- if (bat_priv->bat_algo_ops->bat_iface_activate)
- bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
+ if (bat_priv->algo_ops->iface.activate)
+ bat_priv->algo_ops->iface.activate(hard_iface);
out:
if (primary_if)
@@ -507,7 +508,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (ret)
goto err_dev;
- ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
+ ret = bat_priv->algo_ops->iface.enable(hard_iface);
if (ret < 0)
goto err_upper;
@@ -516,7 +517,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->if_status = BATADV_IF_INACTIVE;
ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
if (ret < 0) {
- bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+ bat_priv->algo_ops->iface.disable(hard_iface);
bat_priv->num_ifaces--;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
goto err_upper;
@@ -597,7 +598,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_hardif_put(new_if);
}
- bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+ bat_priv->algo_ops->iface.disable(hard_iface);
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
/* delete all references to this hard_iface */
@@ -782,7 +783,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
batadv_check_known_mac_addr(hard_iface->net_dev);
bat_priv = netdev_priv(hard_iface->soft_iface);
- bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+ bat_priv->algo_ops->iface.update_mac(hard_iface);
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 777aea10cd8f..378cc1119d66 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -45,6 +45,7 @@
#include <linux/wait.h>
#include "hard-interface.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "send.h"
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
new file mode 100644
index 000000000000..56dc532f7a2c
--- /dev/null
+++ b/net/batman-adv/log.c
@@ -0,0 +1,231 @@
+/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.h"
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <stdarg.h>
+
+#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
+
+static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
+
+static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
+ size_t idx)
+{
+ return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
+}
+
+static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
+ char c)
+{
+ char *char_addr;
+
+ char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
+ *char_addr = c;
+ debug_log->log_end++;
+
+ if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
+ debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
+}
+
+__printf(2, 3)
+static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
+ const char *fmt, ...)
+{
+ va_list args;
+ static char debug_log_buf[256];
+ char *p;
+
+ if (!debug_log)
+ return 0;
+
+ spin_lock_bh(&debug_log->lock);
+ va_start(args, fmt);
+ vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
+ va_end(args);
+
+ for (p = debug_log_buf; *p != 0; p++)
+ batadv_emit_log_char(debug_log, *p);
+
+ spin_unlock_bh(&debug_log->lock);
+
+ wake_up(&debug_log->queue_wait);
+
+ return 0;
+}
+
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+ va_list args;
+ char tmp_log_buf[256];
+
+ va_start(args, fmt);
+ vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
+ batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
+ jiffies_to_msecs(jiffies), tmp_log_buf);
+ va_end(args);
+
+ return 0;
+}
+
+static int batadv_log_open(struct inode *inode, struct file *file)
+{
+ if (!try_module_get(THIS_MODULE))
+ return -EBUSY;
+
+ nonseekable_open(inode, file);
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static int batadv_log_release(struct inode *inode, struct file *file)
+{
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
+{
+ return !(debug_log->log_start - debug_log->log_end);
+}
+
+static ssize_t batadv_log_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct batadv_priv *bat_priv = file->private_data;
+ struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+ int error, i = 0;
+ char *char_addr;
+ char c;
+
+ if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
+ return -EAGAIN;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (count == 0)
+ return 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ error = wait_event_interruptible(debug_log->queue_wait,
+ (!batadv_log_empty(debug_log)));
+
+ if (error)
+ return error;
+
+ spin_lock_bh(&debug_log->lock);
+
+ while ((!error) && (i < count) &&
+ (debug_log->log_start != debug_log->log_end)) {
+ char_addr = batadv_log_char_addr(debug_log,
+ debug_log->log_start);
+ c = *char_addr;
+
+ debug_log->log_start++;
+
+ spin_unlock_bh(&debug_log->lock);
+
+ error = __put_user(c, buf);
+
+ spin_lock_bh(&debug_log->lock);
+
+ buf++;
+ i++;
+ }
+
+ spin_unlock_bh(&debug_log->lock);
+
+ if (!error)
+ return i;
+
+ return error;
+}
+
+static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
+{
+ struct batadv_priv *bat_priv = file->private_data;
+ struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+
+ poll_wait(file, &debug_log->queue_wait, wait);
+
+ if (!batadv_log_empty(debug_log))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static const struct file_operations batadv_log_fops = {
+ .open = batadv_log_open,
+ .release = batadv_log_release,
+ .read = batadv_log_read,
+ .poll = batadv_log_poll,
+ .llseek = no_llseek,
+};
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+ struct dentry *d;
+
+ if (!bat_priv->debug_dir)
+ goto err;
+
+ bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
+ if (!bat_priv->debug_log)
+ goto err;
+
+ spin_lock_init(&bat_priv->debug_log->lock);
+ init_waitqueue_head(&bat_priv->debug_log->queue_wait);
+
+ d = debugfs_create_file("log", S_IFREG | S_IRUSR,
+ bat_priv->debug_dir, bat_priv,
+ &batadv_log_fops);
+ if (!d)
+ goto err;
+
+ return 0;
+
+err:
+ return -ENOMEM;
+}
+
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+ kfree(bat_priv->debug_log);
+ bat_priv->debug_log = NULL;
+}
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
new file mode 100644
index 000000000000..e0e1a88c3e58
--- /dev/null
+++ b/net/batman-adv/log.h
@@ -0,0 +1,111 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_LOG_H_
+#define _NET_BATMAN_ADV_LOG_H_
+
+#include "main.h"
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/printk.h>
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv);
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+}
+
+#endif
+
+/**
+ * enum batadv_dbg_level - available log levels
+ * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
+ * @BATADV_DBG_ROUTES: route added / changed / deleted
+ * @BATADV_DBG_TT: translation table messages
+ * @BATADV_DBG_BLA: bridge loop avoidance messages
+ * @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_NC: network coding related messages
+ * @BATADV_DBG_MCAST: multicast related messages
+ * @BATADV_DBG_TP_METER: throughput meter messages
+ * @BATADV_DBG_ALL: the union of all the above log levels
+ */
+enum batadv_dbg_level {
+ BATADV_DBG_BATMAN = BIT(0),
+ BATADV_DBG_ROUTES = BIT(1),
+ BATADV_DBG_TT = BIT(2),
+ BATADV_DBG_BLA = BIT(3),
+ BATADV_DBG_DAT = BIT(4),
+ BATADV_DBG_NC = BIT(5),
+ BATADV_DBG_MCAST = BIT(6),
+ BATADV_DBG_TP_METER = BIT(7),
+ BATADV_DBG_ALL = 127,
+};
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+__printf(2, 3);
+
+/* possibly ratelimited debug output */
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
+ do { \
+ if (atomic_read(&bat_priv->log_level) & type && \
+ (!ratelimited || net_ratelimit())) \
+ batadv_debug_log(bat_priv, fmt, ## arg);\
+ } \
+ while (0)
+#else /* !CONFIG_BATMAN_ADV_DEBUG */
+__printf(4, 5)
+static inline void _batadv_dbg(int type __always_unused,
+ struct batadv_priv *bat_priv __always_unused,
+ int ratelimited __always_unused,
+ const char *fmt __always_unused, ...)
+{
+}
+#endif
+
+#define batadv_dbg(type, bat_priv, arg...) \
+ _batadv_dbg(type, bat_priv, 0, ## arg)
+#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
+ _batadv_dbg(type, bat_priv, 1, ## arg)
+
+#define batadv_info(net_dev, fmt, arg...) \
+ do { \
+ struct net_device *_netdev = (net_dev); \
+ struct batadv_priv *_batpriv = netdev_priv(_netdev); \
+ batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
+ pr_info("%s: " fmt, _netdev->name, ## arg); \
+ } while (0)
+#define batadv_err(net_dev, fmt, arg...) \
+ do { \
+ struct net_device *_netdev = (net_dev); \
+ struct batadv_priv *_batpriv = netdev_priv(_netdev); \
+ batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
+ pr_err("%s: " fmt, _netdev->name, ## arg); \
+ } while (0)
+
+#endif /* _NET_BATMAN_ADV_LOG_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 627d14ececaf..fe4c5e29f96b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -31,16 +31,13 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
+#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
-#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
@@ -49,6 +46,8 @@
#include <net/rtnetlink.h>
#include "bat_algo.h"
+#include "bat_iv_ogm.h"
+#include "bat_v.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
@@ -56,13 +55,16 @@
#include "gateway_common.h"
#include "hard-interface.h"
#include "icmp_socket.h"
+#include "log.h"
#include "multicast.h"
+#include "netlink.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
#include "soft-interface.h"
+#include "tp_meter.h"
#include "translation-table.h"
/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -71,8 +73,6 @@
struct list_head batadv_hardif_list;
static int (*batadv_rx_handler[256])(struct sk_buff *,
struct batadv_hard_iface *);
-char batadv_routing_algo[20] = "BATMAN_IV";
-static struct hlist_head batadv_algo_list;
unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@@ -83,13 +83,14 @@ static void batadv_recv_handler_init(void);
static int __init batadv_init(void)
{
INIT_LIST_HEAD(&batadv_hardif_list);
- INIT_HLIST_HEAD(&batadv_algo_list);
+ batadv_algo_init();
batadv_recv_handler_init();
batadv_v_init();
batadv_iv_init();
batadv_nc_init();
+ batadv_tp_meter_init();
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
@@ -101,6 +102,7 @@ static int __init batadv_init(void)
register_netdevice_notifier(&batadv_hard_if_notifier);
rtnl_link_register(&batadv_link_ops);
+ batadv_netlink_register();
pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -111,6 +113,7 @@ static int __init batadv_init(void)
static void __exit batadv_exit(void)
{
batadv_debugfs_destroy();
+ batadv_netlink_unregister();
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
batadv_hardif_remove_interfaces();
@@ -141,6 +144,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->tvlv.container_list_lock);
spin_lock_init(&bat_priv->tvlv.handler_list_lock);
spin_lock_init(&bat_priv->softif_vlan_list_lock);
+ spin_lock_init(&bat_priv->tp_list_lock);
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
@@ -159,6 +163,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
+ INIT_HLIST_HEAD(&bat_priv->tp_list);
ret = batadv_v_mesh_init(bat_priv);
if (ret < 0)
@@ -538,76 +543,6 @@ void batadv_recv_handler_unregister(u8 packet_type)
batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
}
-static struct batadv_algo_ops *batadv_algo_get(char *name)
-{
- struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
-
- hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
- if (strcmp(bat_algo_ops_tmp->name, name) != 0)
- continue;
-
- bat_algo_ops = bat_algo_ops_tmp;
- break;
- }
-
- return bat_algo_ops;
-}
-
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
-{
- struct batadv_algo_ops *bat_algo_ops_tmp;
-
- bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
- if (bat_algo_ops_tmp) {
- pr_info("Trying to register already registered routing algorithm: %s\n",
- bat_algo_ops->name);
- return -EEXIST;
- }
-
- /* all algorithms must implement all ops (for now) */
- if (!bat_algo_ops->bat_iface_enable ||
- !bat_algo_ops->bat_iface_disable ||
- !bat_algo_ops->bat_iface_update_mac ||
- !bat_algo_ops->bat_primary_iface_set ||
- !bat_algo_ops->bat_neigh_cmp ||
- !bat_algo_ops->bat_neigh_is_similar_or_better) {
- pr_info("Routing algo '%s' does not implement required ops\n",
- bat_algo_ops->name);
- return -EINVAL;
- }
-
- INIT_HLIST_NODE(&bat_algo_ops->list);
- hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
-
- return 0;
-}
-
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
-{
- struct batadv_algo_ops *bat_algo_ops;
-
- bat_algo_ops = batadv_algo_get(name);
- if (!bat_algo_ops)
- return -EINVAL;
-
- bat_priv->bat_algo_ops = bat_algo_ops;
-
- return 0;
-}
-
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
-{
- struct batadv_algo_ops *bat_algo_ops;
-
- seq_puts(seq, "Available routing algorithms:\n");
-
- hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
- seq_printf(seq, " * %s\n", bat_algo_ops->name);
- }
-
- return 0;
-}
-
/**
* batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in
* the header
@@ -642,594 +577,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
}
/**
- * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
- * free after rcu grace period
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_handler_release(struct kref *ref)
-{
- struct batadv_tvlv_handler *tvlv_handler;
-
- tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
- kfree_rcu(tvlv_handler, rcu);
-}
-
-/**
- * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
- * possibly release it
- * @tvlv_handler: the tvlv handler to free
- */
-static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
-{
- kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
-}
-
-/**
- * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
- * based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to look for
- * @version: tvlv handler version to look for
- *
- * Return: tvlv handler if found or NULL otherwise.
- */
-static struct batadv_tvlv_handler *
-batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
- struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tvlv_handler_tmp,
- &bat_priv->tvlv.handler_list, list) {
- if (tvlv_handler_tmp->type != type)
- continue;
-
- if (tvlv_handler_tmp->version != version)
- continue;
-
- if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
- continue;
-
- tvlv_handler = tvlv_handler_tmp;
- break;
- }
- rcu_read_unlock();
-
- return tvlv_handler;
-}
-
-/**
- * batadv_tvlv_container_release - release tvlv from lists and free
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_container_release(struct kref *ref)
-{
- struct batadv_tvlv_container *tvlv;
-
- tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
- kfree(tvlv);
-}
-
-/**
- * batadv_tvlv_container_put - decrement the tvlv container refcounter and
- * possibly release it
- * @tvlv: the tvlv container to free
- */
-static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
-{
- kref_put(&tvlv->refcount, batadv_tvlv_container_release);
-}
-
-/**
- * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
- * list based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to look for
- * @version: tvlv container version to look for
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: tvlv container if found or NULL otherwise.
- */
-static struct batadv_tvlv_container *
-batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
- struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
-
- lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
- hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
- if (tvlv_tmp->tvlv_hdr.type != type)
- continue;
-
- if (tvlv_tmp->tvlv_hdr.version != version)
- continue;
-
- kref_get(&tvlv_tmp->refcount);
- tvlv = tvlv_tmp;
- break;
- }
-
- return tvlv;
-}
-
-/**
- * batadv_tvlv_container_list_size - calculate the size of the tvlv container
- * list entries
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: size of all currently registered tvlv containers in bytes.
- */
-static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
-{
- struct batadv_tvlv_container *tvlv;
- u16 tvlv_len = 0;
-
- lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
- hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
- tvlv_len += sizeof(struct batadv_tvlv_hdr);
- tvlv_len += ntohs(tvlv->tvlv_hdr.len);
- }
-
- return tvlv_len;
-}
-
-/**
- * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
- * list
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv: the to be removed tvlv container
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- */
-static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
- struct batadv_tvlv_container *tvlv)
-{
- lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
- if (!tvlv)
- return;
-
- hlist_del(&tvlv->list);
-
- /* first call to decrement the counter, second call to free */
- batadv_tvlv_container_put(tvlv);
- batadv_tvlv_container_put(tvlv);
-}
-
-/**
- * batadv_tvlv_container_unregister - unregister tvlv container based on the
- * provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to unregister
- * @version: tvlv container type to unregister
- */
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version)
-{
- struct batadv_tvlv_container *tvlv;
-
- spin_lock_bh(&bat_priv->tvlv.container_list_lock);
- tvlv = batadv_tvlv_container_get(bat_priv, type, version);
- batadv_tvlv_container_remove(bat_priv, tvlv);
- spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_container_register - register tvlv type, version and content
- * to be propagated with each (primary interface) OGM
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type
- * @version: tvlv container version
- * @tvlv_value: tvlv container content
- * @tvlv_value_len: tvlv container content length
- *
- * If a container of the same type and version was already registered the new
- * content is going to replace the old one.
- */
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
- u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len)
-{
- struct batadv_tvlv_container *tvlv_old, *tvlv_new;
-
- if (!tvlv_value)
- tvlv_value_len = 0;
-
- tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
- if (!tvlv_new)
- return;
-
- tvlv_new->tvlv_hdr.version = version;
- tvlv_new->tvlv_hdr.type = type;
- tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
-
- memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
- INIT_HLIST_NODE(&tvlv_new->list);
- kref_init(&tvlv_new->refcount);
-
- spin_lock_bh(&bat_priv->tvlv.container_list_lock);
- tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
- batadv_tvlv_container_remove(bat_priv, tvlv_old);
- hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
- spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
- * requested packet size
- * @packet_buff: packet buffer
- * @packet_buff_len: packet buffer size
- * @min_packet_len: requested packet minimum size
- * @additional_packet_len: requested additional packet size on top of minimum
- * size
- *
- * Return: true of the packet buffer could be changed to the requested size,
- * false otherwise.
- */
-static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
- int *packet_buff_len,
- int min_packet_len,
- int additional_packet_len)
-{
- unsigned char *new_buff;
-
- new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
-
- /* keep old buffer if kmalloc should fail */
- if (!new_buff)
- return false;
-
- memcpy(new_buff, *packet_buff, min_packet_len);
- kfree(*packet_buff);
- *packet_buff = new_buff;
- *packet_buff_len = min_packet_len + additional_packet_len;
-
- return true;
-}
-
-/**
- * batadv_tvlv_container_ogm_append - append tvlv container content to given
- * OGM packet buffer
- * @bat_priv: the bat priv with all the soft interface information
- * @packet_buff: ogm packet buffer
- * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
- * content
- * @packet_min_len: ogm header size to be preserved for the OGM itself
- *
- * The ogm packet might be enlarged or shrunk depending on the current size
- * and the size of the to-be-appended tvlv containers.
- *
- * Return: size of all appended tvlv containers in bytes.
- */
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len, int packet_min_len)
-{
- struct batadv_tvlv_container *tvlv;
- struct batadv_tvlv_hdr *tvlv_hdr;
- u16 tvlv_value_len;
- void *tvlv_value;
- bool ret;
-
- spin_lock_bh(&bat_priv->tvlv.container_list_lock);
- tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
-
- ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
- packet_min_len, tvlv_value_len);
-
- if (!ret)
- goto end;
-
- if (!tvlv_value_len)
- goto end;
-
- tvlv_value = (*packet_buff) + packet_min_len;
-
- hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
- tvlv_hdr = tvlv_value;
- tvlv_hdr->type = tvlv->tvlv_hdr.type;
- tvlv_hdr->version = tvlv->tvlv_hdr.version;
- tvlv_hdr->len = tvlv->tvlv_hdr.len;
- tvlv_value = tvlv_hdr + 1;
- memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
- tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
- }
-
-end:
- spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
- return tvlv_value_len;
-}
-
-/**
- * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
- * appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success if handler was not found or the return value of the handler
- * callback.
- */
-static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
- struct batadv_tvlv_handler *tvlv_handler,
- bool ogm_source,
- struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
-{
- if (!tvlv_handler)
- return NET_RX_SUCCESS;
-
- if (ogm_source) {
- if (!tvlv_handler->ogm_handler)
- return NET_RX_SUCCESS;
-
- if (!orig_node)
- return NET_RX_SUCCESS;
-
- tvlv_handler->ogm_handler(bat_priv, orig_node,
- BATADV_NO_FLAGS,
- tvlv_value, tvlv_value_len);
- tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
- } else {
- if (!src)
- return NET_RX_SUCCESS;
-
- if (!dst)
- return NET_RX_SUCCESS;
-
- if (!tvlv_handler->unicast_handler)
- return NET_RX_SUCCESS;
-
- return tvlv_handler->unicast_handler(bat_priv, src,
- dst, tvlv_value,
- tvlv_value_len);
- }
-
- return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
- * appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success when processing an OGM or the return value of all called
- * handler callbacks.
- */
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
- struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
-{
- struct batadv_tvlv_handler *tvlv_handler;
- struct batadv_tvlv_hdr *tvlv_hdr;
- u16 tvlv_value_cont_len;
- u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
- int ret = NET_RX_SUCCESS;
-
- while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
- tvlv_hdr = tvlv_value;
- tvlv_value_cont_len = ntohs(tvlv_hdr->len);
- tvlv_value = tvlv_hdr + 1;
- tvlv_value_len -= sizeof(*tvlv_hdr);
-
- if (tvlv_value_cont_len > tvlv_value_len)
- break;
-
- tvlv_handler = batadv_tvlv_handler_get(bat_priv,
- tvlv_hdr->type,
- tvlv_hdr->version);
-
- ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
- ogm_source, orig_node,
- src, dst, tvlv_value,
- tvlv_value_cont_len);
- if (tvlv_handler)
- batadv_tvlv_handler_put(tvlv_handler);
- tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
- tvlv_value_len -= tvlv_value_cont_len;
- }
-
- if (!ogm_source)
- return ret;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tvlv_handler,
- &bat_priv->tvlv.handler_list, list) {
- if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
- !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
- tvlv_handler->ogm_handler(bat_priv, orig_node,
- cifnotfound, NULL, 0);
-
- tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
- }
- rcu_read_unlock();
-
- return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
- * handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @batadv_ogm_packet: ogm packet containing the tvlv containers
- * @orig_node: orig node emitting the ogm packet
- */
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
- struct batadv_ogm_packet *batadv_ogm_packet,
- struct batadv_orig_node *orig_node)
-{
- void *tvlv_value;
- u16 tvlv_value_len;
-
- if (!batadv_ogm_packet)
- return;
-
- tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
- if (!tvlv_value_len)
- return;
-
- tvlv_value = batadv_ogm_packet + 1;
-
- batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
- tvlv_value, tvlv_value_len);
-}
-
-/**
- * batadv_tvlv_handler_register - register tvlv handler based on the provided
- * type and version (both need to match) for ogm tvlv payload and/or unicast
- * payload
- * @bat_priv: the bat priv with all the soft interface information
- * @optr: ogm tvlv handler callback function. This function receives the orig
- * node, flags and the tvlv content as argument to process.
- * @uptr: unicast tvlv handler callback function. This function receives the
- * source & destination of the unicast packet as well as the tvlv content
- * to process.
- * @type: tvlv handler type to be registered
- * @version: tvlv handler version to be registered
- * @flags: flags to enable or disable TVLV API behavior
- */
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
- void (*optr)(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig,
- u8 flags,
- void *tvlv_value,
- u16 tvlv_value_len),
- int (*uptr)(struct batadv_priv *bat_priv,
- u8 *src, u8 *dst,
- void *tvlv_value,
- u16 tvlv_value_len),
- u8 type, u8 version, u8 flags)
-{
- struct batadv_tvlv_handler *tvlv_handler;
-
- tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
- if (tvlv_handler) {
- batadv_tvlv_handler_put(tvlv_handler);
- return;
- }
-
- tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
- if (!tvlv_handler)
- return;
-
- tvlv_handler->ogm_handler = optr;
- tvlv_handler->unicast_handler = uptr;
- tvlv_handler->type = type;
- tvlv_handler->version = version;
- tvlv_handler->flags = flags;
- kref_init(&tvlv_handler->refcount);
- INIT_HLIST_NODE(&tvlv_handler->list);
-
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
- hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
- spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
-}
-
-/**
- * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
- * provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to be unregistered
- * @version: tvlv handler version to be unregistered
- */
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version)
-{
- struct batadv_tvlv_handler *tvlv_handler;
-
- tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
- if (!tvlv_handler)
- return;
-
- batadv_tvlv_handler_put(tvlv_handler);
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
- hlist_del_rcu(&tvlv_handler->list);
- spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
- batadv_tvlv_handler_put(tvlv_handler);
-}
-
-/**
- * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
- * specified host
- * @bat_priv: the bat priv with all the soft interface information
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @type: tvlv type
- * @version: tvlv version
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- */
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
- u8 *dst, u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len)
-{
- struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
- struct batadv_tvlv_hdr *tvlv_hdr;
- struct batadv_orig_node *orig_node;
- struct sk_buff *skb;
- unsigned char *tvlv_buff;
- unsigned int tvlv_len;
- ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
-
- orig_node = batadv_orig_hash_find(bat_priv, dst);
- if (!orig_node)
- return;
-
- tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
-
- skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
- if (!skb)
- goto out;
-
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
- tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
- unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
- unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
- unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
- unicast_tvlv_packet->ttl = BATADV_TTL;
- unicast_tvlv_packet->reserved = 0;
- unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
- unicast_tvlv_packet->align = 0;
- ether_addr_copy(unicast_tvlv_packet->src, src);
- ether_addr_copy(unicast_tvlv_packet->dst, dst);
-
- tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
- tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
- tvlv_hdr->version = version;
- tvlv_hdr->type = type;
- tvlv_hdr->len = htons(tvlv_value_len);
- tvlv_buff += sizeof(*tvlv_hdr);
- memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
-
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
- kfree_skb(skb);
-out:
- batadv_orig_node_put(orig_node);
-}
-
-/**
* batadv_get_vid - extract the VLAN identifier from skb if any
* @skb: the buffer containing the packet
* @header_len: length of the batman header preceding the ethernet header
@@ -1282,36 +629,6 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
return ap_isolation_enabled;
}
-static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
-{
- struct batadv_algo_ops *bat_algo_ops;
- char *algo_name = (char *)val;
- size_t name_len = strlen(algo_name);
-
- if (name_len > 0 && algo_name[name_len - 1] == '\n')
- algo_name[name_len - 1] = '\0';
-
- bat_algo_ops = batadv_algo_get(algo_name);
- if (!bat_algo_ops) {
- pr_err("Routing algorithm '%s' is not supported\n", algo_name);
- return -EINVAL;
- }
-
- return param_set_copystring(algo_name, kp);
-}
-
-static const struct kernel_param_ops batadv_param_ops_ra = {
- .set = batadv_param_set_ra,
- .get = param_get_string,
-};
-
-static struct kparam_string batadv_param_string_ra = {
- .maxlen = sizeof(batadv_routing_algo),
- .string = batadv_routing_algo,
-};
-
-module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
- 0644);
module_init(batadv_init);
module_exit(batadv_exit);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index cd83e2824f70..06a860845434 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -100,6 +100,9 @@
#define BATADV_NUM_BCASTS_WIRELESS 3
#define BATADV_NUM_BCASTS_MAX 3
+/* length of the single packet used by the TP meter */
+#define BATADV_TP_PACKET_LEN ETH_DATA_LEN
+
/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
#define ARP_REQ_DELAY 250
/* numbers of originator to contact for any PUT/GET DHT operation */
@@ -131,6 +134,11 @@
#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
+/**
+ * BATADV_TP_MAX_NUM - maximum number of simultaneously active tp sessions
+ */
+#define BATADV_TP_MAX_NUM 5
+
enum batadv_mesh_state {
BATADV_MESH_INACTIVE,
BATADV_MESH_ACTIVE,
@@ -175,29 +183,26 @@ enum batadv_uev_type {
/* Kernel headers */
-#include <linux/atomic.h>
#include <linux/bitops.h> /* for packet.h */
#include <linux/compiler.h>
#include <linux/cpumask.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h> /* for packet.h */
-#include <linux/netdevice.h>
-#include <linux/printk.h>
-#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/jiffies.h>
#include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
#include "types.h"
-struct batadv_ogm_packet;
+struct net_device;
+struct packet_type;
struct seq_file;
struct sk_buff;
#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
(int)(vid & VLAN_VID_MASK) : -1)
-extern char batadv_routing_algo[];
extern struct list_head batadv_hardif_list;
extern unsigned char batadv_broadcast_addr[];
@@ -218,76 +223,9 @@ batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *));
void batadv_recv_handler_unregister(u8 packet_type);
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
/**
- * enum batadv_dbg_level - available log levels
- * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
- * @BATADV_DBG_ROUTES: route added / changed / deleted
- * @BATADV_DBG_TT: translation table messages
- * @BATADV_DBG_BLA: bridge loop avoidance messages
- * @BATADV_DBG_DAT: ARP snooping and DAT related messages
- * @BATADV_DBG_NC: network coding related messages
- * @BATADV_DBG_MCAST: multicast related messages
- * @BATADV_DBG_ALL: the union of all the above log levels
- */
-enum batadv_dbg_level {
- BATADV_DBG_BATMAN = BIT(0),
- BATADV_DBG_ROUTES = BIT(1),
- BATADV_DBG_TT = BIT(2),
- BATADV_DBG_BLA = BIT(3),
- BATADV_DBG_DAT = BIT(4),
- BATADV_DBG_NC = BIT(5),
- BATADV_DBG_MCAST = BIT(6),
- BATADV_DBG_ALL = 127,
-};
-
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-__printf(2, 3);
-
-/* possibly ratelimited debug output */
-#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
- do { \
- if (atomic_read(&bat_priv->log_level) & type && \
- (!ratelimited || net_ratelimit())) \
- batadv_debug_log(bat_priv, fmt, ## arg);\
- } \
- while (0)
-#else /* !CONFIG_BATMAN_ADV_DEBUG */
-__printf(4, 5)
-static inline void _batadv_dbg(int type __always_unused,
- struct batadv_priv *bat_priv __always_unused,
- int ratelimited __always_unused,
- const char *fmt __always_unused, ...)
-{
-}
-#endif
-
-#define batadv_dbg(type, bat_priv, arg...) \
- _batadv_dbg(type, bat_priv, 0, ## arg)
-#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
- _batadv_dbg(type, bat_priv, 1, ## arg)
-
-#define batadv_info(net_dev, fmt, arg...) \
- do { \
- struct net_device *_netdev = (net_dev); \
- struct batadv_priv *_batpriv = netdev_priv(_netdev); \
- batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
- pr_info("%s: " fmt, _netdev->name, ## arg); \
- } while (0)
-#define batadv_err(net_dev, fmt, arg...) \
- do { \
- struct net_device *_netdev = (net_dev); \
- struct batadv_priv *_batpriv = netdev_priv(_netdev); \
- batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
- pr_err("%s: " fmt, _netdev->name, ## arg); \
- } while (0)
-
-/**
* batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
* @data1: Pointer to a six-byte array containing the Ethernet address
* @data2: Pointer other six-byte array containing the Ethernet address
@@ -372,39 +310,6 @@ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
*/
#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
- u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len);
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len, int packet_min_len);
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
- struct batadv_ogm_packet *batadv_ogm_packet,
- struct batadv_orig_node *orig_node);
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version);
-
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
- void (*optr)(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig,
- u8 flags,
- void *tvlv_value,
- u16 tvlv_value_len),
- int (*uptr)(struct batadv_priv *bat_priv,
- u8 *src, u8 *dst,
- void *tvlv_value,
- u16 tvlv_value_len),
- u8 type, u8 version, u8 flags);
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version);
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
- struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_buff, u16 tvlv_buff_len);
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
- u8 *dst, u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len);
unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid);
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d3222db60fd0..cc915073a753 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -55,8 +55,10 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "packet.h"
#include "translation-table.h"
+#include "tvlv.h"
/**
* batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
new file mode 100644
index 000000000000..231f8eaf075b
--- /dev/null
+++ b/net/batman-adv/netlink.c
@@ -0,0 +1,424 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "netlink.h"
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genetlink.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/printk.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "soft-interface.h"
+#include "tp_meter.h"
+
+struct sk_buff;
+
+static struct genl_family batadv_netlink_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = BATADV_NL_NAME,
+ .version = 1,
+ .maxattr = BATADV_ATTR_MAX,
+};
+
+/* multicast groups */
+enum batadv_netlink_multicast_groups {
+ BATADV_NL_MCGRP_TPMETER,
+};
+
+static struct genl_multicast_group batadv_netlink_mcgrps[] = {
+ [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
+};
+
+static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
+ [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
+ [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
+ [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
+ [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
+ [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
+};
+
+/**
+ * batadv_netlink_mesh_info_put - fill in generic information about mesh
+ * interface
+ * @msg: netlink message to be sent back
+ * @soft_iface: interface for which the data should be taken
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+{
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ struct batadv_hard_iface *primary_if = NULL;
+ struct net_device *hard_iface;
+ int ret = -ENOBUFS;
+
+ if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
+ nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
+ bat_priv->algo_ops->name) ||
+ nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
+ nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
+ nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
+ soft_iface->dev_addr))
+ goto out;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
+ hard_iface = primary_if->net_dev;
+
+ if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ hard_iface->ifindex) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ hard_iface->name) ||
+ nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+ hard_iface->dev_addr))
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ return ret;
+}
+
+/**
+ * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO
+ * netlink request
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct net_device *soft_iface;
+ struct sk_buff *msg = NULL;
+ void *msg_head;
+ int ifindex;
+ int ret;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &batadv_netlink_family, 0,
+ BATADV_CMD_GET_MESH_INFO);
+ if (!msg_head) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+
+ out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (ret) {
+ if (msg)
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_end(msg, msg_head);
+ return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_put - Fill information of started tp_meter session
+ * @msg: netlink message to be sent back
+ * @cookie: tp meter session cookie
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
+{
+ if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+ return -ENOBUFS;
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: destination of tp_meter session
+ * @result: reason for tp meter session stop
+ * @test_time: total time ot the tp_meter session
+ * @total_bytes: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 result, u32 test_time, u64 total_bytes,
+ u32 cookie)
+{
+ struct sk_buff *msg;
+ void *hdr;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0,
+ BATADV_CMD_TP_METER);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto err_genlmsg;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes,
+ BATADV_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result))
+ goto nla_put_failure;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&batadv_netlink_family,
+ dev_net(bat_priv->soft_iface), msg, 0,
+ BATADV_NL_MCGRP_TPMETER, GFP_KERNEL);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ ret = -EMSGSIZE;
+
+err_genlmsg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Start a new tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ struct sk_buff *msg = NULL;
+ u32 test_length;
+ void *msg_head;
+ int ifindex;
+ u32 cookie;
+ u8 *dst;
+ int ret;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+ return -EINVAL;
+
+ if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+ if (!ifindex)
+ return -EINVAL;
+
+ dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+ test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &batadv_netlink_family, 0,
+ BATADV_CMD_TP_METER);
+ if (!msg_head) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ batadv_tp_start(bat_priv, dst, test_length, &cookie);
+
+ ret = batadv_netlink_tp_meter_put(msg, cookie);
+
+ out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (ret) {
+ if (msg)
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_end(msg, msg_head);
+ return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Cancel a running tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ u8 *dst;
+ int ret = 0;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+ if (!ifindex)
+ return -EINVAL;
+
+ dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
+
+out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
+static struct genl_ops batadv_netlink_ops[] = {
+ {
+ .cmd = BATADV_CMD_GET_MESH_INFO,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_get_mesh_info,
+ },
+ {
+ .cmd = BATADV_CMD_TP_METER,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_tp_meter_start,
+ },
+ {
+ .cmd = BATADV_CMD_TP_METER_CANCEL,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_tp_meter_cancel,
+ },
+};
+
+/**
+ * batadv_netlink_register - register batadv genl netlink family
+ */
+void __init batadv_netlink_register(void)
+{
+ int ret;
+
+ ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
+ batadv_netlink_ops,
+ batadv_netlink_mcgrps);
+ if (ret)
+ pr_warn("unable to register netlink family");
+}
+
+/**
+ * batadv_netlink_unregister - unregister batadv genl netlink family
+ */
+void batadv_netlink_unregister(void)
+{
+ genl_unregister_family(&batadv_netlink_family);
+}
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
new file mode 100644
index 000000000000..945653ab58c6
--- /dev/null
+++ b/net/batman-adv/netlink.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_NETLINK_H_
+#define _NET_BATMAN_ADV_NETLINK_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+void batadv_netlink_register(void);
+void batadv_netlink_unregister(void);
+
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 result, u32 test_time, u64 total_bytes,
+ u32 cookie);
+
+#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 678f06865312..293ef4ffd4e1 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -51,10 +51,12 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
+#include "tvlv.h"
static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 076d258c92e1..7d1e5421f6bc 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -34,11 +34,13 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "distributed-arp-table.h"
#include "fragmentation.h"
#include "gateway_client.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "multicast.h"
#include "network-coding.h"
#include "routing.h"
@@ -532,8 +534,8 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
kref_init(&hardif_neigh->refcount);
- if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
- bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
+ if (bat_priv->algo_ops->neigh.hardif_init)
+ bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
@@ -704,17 +706,17 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
primary_if->net_dev->dev_addr, net_dev->name,
- bat_priv->bat_algo_ops->name);
+ bat_priv->algo_ops->name);
batadv_hardif_put(primary_if);
- if (!bat_priv->bat_algo_ops->bat_neigh_print) {
+ if (!bat_priv->algo_ops->neigh.print) {
seq_puts(seq,
"No printing function for this routing protocol\n");
return 0;
}
- bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq);
+ bat_priv->algo_ops->neigh.print(bat_priv, seq);
return 0;
}
@@ -765,8 +767,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
batadv_frag_purge_orig(orig_node, NULL);
- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+ if (orig_node->bat_priv->algo_ops->orig.free)
+ orig_node->bat_priv->algo_ops->orig.free(orig_node);
kfree(orig_node->tt_buff);
kfree(orig_node);
@@ -1095,12 +1097,12 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_neigh_node *best = NULL, *neigh;
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
rcu_read_lock();
hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) {
- if (best && (bao->bat_neigh_cmp(neigh, if_outgoing,
- best, if_outgoing) <= 0))
+ if (best && (bao->neigh.cmp(neigh, if_outgoing, best,
+ if_outgoing) <= 0))
continue;
if (!kref_get_unless_zero(&neigh->refcount))
@@ -1252,18 +1254,17 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
primary_if->net_dev->dev_addr, net_dev->name,
- bat_priv->bat_algo_ops->name);
+ bat_priv->algo_ops->name);
batadv_hardif_put(primary_if);
- if (!bat_priv->bat_algo_ops->bat_orig_print) {
+ if (!bat_priv->algo_ops->orig.print) {
seq_puts(seq,
"No printing function for this routing protocol\n");
return 0;
}
- bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq,
- BATADV_IF_DEFAULT);
+ bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT);
return 0;
}
@@ -1290,7 +1291,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
}
bat_priv = netdev_priv(hard_iface->soft_iface);
- if (!bat_priv->bat_algo_ops->bat_orig_print) {
+ if (!bat_priv->algo_ops->orig.print) {
seq_puts(seq,
"No printing function for this routing protocol\n");
goto out;
@@ -1304,9 +1305,9 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, hard_iface->net_dev->name,
hard_iface->net_dev->dev_addr,
- hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name);
+ hard_iface->soft_iface->name, bat_priv->algo_ops->name);
- bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface);
+ bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface);
out:
if (hard_iface)
@@ -1318,7 +1319,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
@@ -1334,9 +1335,8 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
ret = 0;
- if (bao->bat_orig_add_if)
- ret = bao->bat_orig_add_if(orig_node,
- max_if_num);
+ if (bao->orig.add_if)
+ ret = bao->orig.add_if(orig_node, max_if_num);
if (ret == -ENOMEM)
goto err;
}
@@ -1358,7 +1358,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
struct hlist_head *head;
struct batadv_hard_iface *hard_iface_tmp;
struct batadv_orig_node *orig_node;
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
u32 i;
int ret;
@@ -1371,10 +1371,9 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
ret = 0;
- if (bao->bat_orig_del_if)
- ret = bao->bat_orig_del_if(orig_node,
- max_if_num,
- hard_iface->if_num);
+ if (bao->orig.del_if)
+ ret = bao->orig.del_if(orig_node, max_if_num,
+ hard_iface->if_num);
if (ret == -ENOMEM)
goto err;
}
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 71567794df17..6b011ff64dd8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -21,6 +21,8 @@
#include <asm/byteorder.h>
#include <linux/types.h>
+#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0)
+
/**
* enum batadv_packettype - types for batman-adv encapsulated packets
* @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
@@ -93,6 +95,7 @@ enum batadv_icmp_packettype {
BATADV_ECHO_REQUEST = 8,
BATADV_TTL_EXCEEDED = 11,
BATADV_PARAMETER_PROBLEM = 12,
+ BATADV_TP = 15,
};
/**
@@ -285,6 +288,16 @@ struct batadv_elp_packet {
#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
/**
+ * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
+ * @BATADV_TP_START: start a throughput meter run
+ * @BATADV_TP_STOP: stop a throughput meter run
+ */
+enum batadv_icmp_user_cmd_type {
+ BATADV_TP_START = 0,
+ BATADV_TP_STOP = 2,
+};
+
+/**
* struct batadv_icmp_header - common members among all the ICMP packets
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
@@ -334,6 +347,47 @@ struct batadv_icmp_packet {
__be16 seqno;
};
+/**
+ * struct batadv_icmp_tp_packet - ICMP TP Meter packet
+ * @packet_type: batman-adv packet type, part of the general header
+ * @version: batman-adv protocol version, part of the genereal header
+ * @ttl: time to live for this packet, part of the genereal header
+ * @msg_type: ICMP packet type
+ * @dst: address of the destination node
+ * @orig: address of the source node
+ * @uid: local ICMP socket identifier
+ * @subtype: TP packet subtype (see batadv_icmp_tp_subtype)
+ * @session: TP session identifier
+ * @seqno: the TP sequence number
+ * @timestamp: time when the packet has been sent. This value is filled in a
+ * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the
+ * RTT. Since it is read only by the host which wrote it, there is no need to
+ * store it using network order
+ */
+struct batadv_icmp_tp_packet {
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 msg_type; /* see ICMP message types above */
+ u8 dst[ETH_ALEN];
+ u8 orig[ETH_ALEN];
+ u8 uid;
+ u8 subtype;
+ u8 session[2];
+ __be32 seqno;
+ __be32 timestamp;
+};
+
+/**
+ * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes
+ * @BATADV_TP_MSG: Msg from sender to receiver
+ * @BATADV_TP_ACK: acknowledgment from receiver to sender
+ */
+enum batadv_icmp_tp_subtype {
+ BATADV_TP_MSG = 0,
+ BATADV_TP_ACK,
+};
+
#define BATADV_RR_LEN 16
/**
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 319a58820197..af8e11933928 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -40,12 +40,15 @@
#include "fragmentation.h"
#include "hard-interface.h"
#include "icmp_socket.h"
+#include "log.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
#include "send.h"
#include "soft-interface.h"
+#include "tp_meter.h"
#include "translation-table.h"
+#include "tvlv.h"
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -268,10 +271,19 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
icmph->ttl = BATADV_TTL;
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res != NET_XMIT_DROP)
- ret = NET_RX_SUCCESS;
+ if (res == -1)
+ goto out;
+
+ ret = NET_RX_SUCCESS;
break;
+ case BATADV_TP:
+ if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
+ goto out;
+
+ batadv_tp_meter_recv(bat_priv, skb);
+ ret = NET_RX_SUCCESS;
+ goto out;
default:
/* drop unknown type */
goto out;
@@ -290,7 +302,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_orig_node *orig_node = NULL;
struct batadv_icmp_packet *icmp_packet;
- int ret = NET_RX_DROP;
+ int res, ret = NET_RX_DROP;
icmp_packet = (struct batadv_icmp_packet *)skb->data;
@@ -321,7 +333,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
icmp_packet->ttl = BATADV_TTL;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res != -1)
ret = NET_RX_SUCCESS;
out:
@@ -341,7 +354,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
struct ethhdr *ethhdr;
struct batadv_orig_node *orig_node = NULL;
int hdr_size = sizeof(struct batadv_icmp_header);
- int ret = NET_RX_DROP;
+ int res, ret = NET_RX_DROP;
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
@@ -408,7 +421,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
icmph->ttl--;
/* route it */
- if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
+ res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+ if (res != -1)
ret = NET_RX_SUCCESS;
out:
@@ -469,7 +483,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if)
{
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
struct batadv_neigh_node *first_candidate_router = NULL;
struct batadv_neigh_node *next_candidate_router = NULL;
struct batadv_neigh_node *router, *cand_router = NULL;
@@ -523,9 +537,9 @@ batadv_find_router(struct batadv_priv *bat_priv,
/* alternative candidate should be good enough to be
* considered
*/
- if (!bao->bat_neigh_is_similar_or_better(cand_router,
- cand->if_outgoing,
- router, recv_if))
+ if (!bao->neigh.is_similar_or_better(cand_router,
+ cand->if_outgoing, router,
+ recv_if))
goto next;
/* don't use the same router twice */
@@ -645,6 +659,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+ if (res == -1)
+ goto out;
/* translate transmit result into receive result */
if (res == NET_XMIT_SUCCESS) {
@@ -652,13 +668,10 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
len + ETH_HLEN);
-
- ret = NET_RX_SUCCESS;
- } else if (res == -EINPROGRESS) {
- /* skb was buffered and consumed */
- ret = NET_RX_SUCCESS;
}
+ ret = NET_RX_SUCCESS;
+
out:
if (orig_node)
batadv_orig_node_put(orig_node);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 59e695b5cfbd..3a10d87b4b76 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -20,10 +20,11 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
-#include <linux/if_ether.h>
#include <linux/if.h>
+#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
@@ -42,6 +43,7 @@
#include "fragmentation.h"
#include "gateway_client.h"
#include "hard-interface.h"
+#include "log.h"
#include "network-coding.h"
#include "originator.h"
#include "routing.h"
@@ -71,6 +73,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
{
struct batadv_priv *bat_priv;
struct ethhdr *ethhdr;
+ int ret;
bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -108,8 +111,15 @@ int batadv_send_skb_packet(struct sk_buff *skb,
/* dev_queue_xmit() returns a negative result on error. However on
* congestion and traffic shaping, it drops and returns NET_XMIT_DROP
* (which is > 0). This will not be treated as an error.
+ *
+ * a negative value cannot be returned because it could be interepreted
+ * as not consumed skb by callers of batadv_send_skb_to_orig.
*/
- return dev_queue_xmit(skb);
+ ret = dev_queue_xmit(skb);
+ if (ret < 0)
+ ret = NET_XMIT_DROP;
+
+ return ret;
send_skb_err:
kfree_skb(skb);
return NET_XMIT_DROP;
@@ -155,8 +165,11 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
* host, NULL can be passed as recv_if and no interface alternating is
* attempted.
*
- * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
- * -EINPROGRESS if the skb is buffered for later transmit.
+ * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the
+ * skb is buffered for later transmit or the NET_XMIT status returned by the
+ * lower routine if the packet has been passed down.
+ *
+ * If the returning value is not -1 the skb has been consumed.
*/
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -164,7 +177,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = orig_node->bat_priv;
struct batadv_neigh_node *neigh_node;
- int ret = NET_XMIT_DROP;
+ int ret = -1;
/* batadv_find_router() increases neigh_nodes refcount if found. */
neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
@@ -177,8 +190,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
if (atomic_read(&bat_priv->fragmentation) &&
skb->len > neigh_node->if_incoming->net_dev->mtu) {
/* Fragment and send packet. */
- if (batadv_frag_send_packet(skb, orig_node, neigh_node))
- ret = NET_XMIT_SUCCESS;
+ ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
goto out;
}
@@ -187,12 +199,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
* (i.e. being forwarded). If the packet originates from this node or if
* network coding fails, then send the packet as usual.
*/
- if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
+ if (recv_if && batadv_nc_skb_forward(skb, neigh_node))
ret = -EINPROGRESS;
- } else {
- batadv_send_unicast_skb(skb, neigh_node);
- ret = NET_XMIT_SUCCESS;
- }
+ else
+ ret = batadv_send_unicast_skb(skb, neigh_node);
out:
if (neigh_node)
@@ -318,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
{
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr;
- int ret = NET_XMIT_DROP;
+ int res, ret = NET_XMIT_DROP;
if (!orig_node)
goto out;
@@ -355,7 +365,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
unicast_packet->ttvn = unicast_packet->ttvn - 1;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res != -1)
ret = NET_XMIT_SUCCESS;
out:
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 4ba894ca8b2a..7527c0652dd5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -48,6 +48,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
@@ -841,6 +842,8 @@ static int batadv_softif_init_late(struct net_device *dev)
#ifdef CONFIG_BATMAN_ADV_BLA
atomic_set(&bat_priv->bla.num_requests, 0);
#endif
+ atomic_set(&bat_priv->tp_num, 0);
+
bat_priv->tt.last_changeset = NULL;
bat_priv->tt.last_changeset_len = 0;
bat_priv->isolation_mark = 0;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 233abcf33c03..fe9ca94ddee2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -25,8 +25,8 @@
#include <linux/fs.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
-#include <linux/kref.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/rculist.h>
@@ -38,11 +38,12 @@
#include <linux/string.h>
#include <linux/stringify.h>
+#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "gateway_common.h"
-#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
+#include "log.h"
#include "network-coding.h"
#include "packet.h"
#include "soft-interface.h"
@@ -411,7 +412,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj,
{
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
- return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name);
+ return sprintf(buff, "%s\n", bat_priv->algo_ops->name);
}
static void batadv_post_gw_reselect(struct net_device *net_dev)
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
new file mode 100644
index 000000000000..2333777f919d
--- /dev/null
+++ b/net/batman-adv/tp_meter.c
@@ -0,0 +1,1507 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tp_meter.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/param.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "log.h"
+#include "netlink.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+
+/**
+ * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user
+ * in milliseconds
+ */
+#define BATADV_TP_DEF_TEST_LENGTH 10000
+
+/**
+ * BATADV_TP_AWND - Advertised window by the receiver (in bytes)
+ */
+#define BATADV_TP_AWND 0x20000000
+
+/**
+ * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not
+ * get anything for such amount of milliseconds, the connection is killed
+ */
+#define BATADV_TP_RECV_TIMEOUT 1000
+
+/**
+ * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond
+ * such amound of milliseconds, the receiver is considered unreachable and the
+ * connection is killed
+ */
+#define BATADV_TP_MAX_RTO 30000
+
+/**
+ * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high
+ * in order to immediately trigger a wrap around (test purposes)
+ */
+#define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000)
+
+/**
+ * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header)
+ * to simulate
+ */
+#define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \
+ sizeof(struct batadv_unicast_packet))
+
+static u8 batadv_tp_prerandom[4096] __read_mostly;
+
+/**
+ * batadv_tp_session_cookie - generate session cookie based on session ids
+ * @session: TP session identifier
+ * @icmp_uid: icmp pseudo uid of the tp session
+ *
+ * Return: 32 bit tp_meter session cookie
+ */
+static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
+{
+ u32 cookie;
+
+ cookie = icmp_uid << 16;
+ cookie |= session[0] << 8;
+ cookie |= session[1];
+
+ return cookie;
+}
+
+/**
+ * batadv_tp_cwnd - compute the new cwnd size
+ * @base: base cwnd size value
+ * @increment: the value to add to base to get the new size
+ * @min: minumim cwnd value (usually MSS)
+ *
+ * Return the new cwnd size and ensures it does not exceed the Advertised
+ * Receiver Window size. It is wrap around safe.
+ * For details refer to Section 3.1 of RFC5681
+ *
+ * Return: new congestion window size in bytes
+ */
+static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
+{
+ u32 new_size = base + increment;
+
+ /* check for wrap-around */
+ if (new_size < base)
+ new_size = (u32)ULONG_MAX;
+
+ new_size = min_t(u32, new_size, BATADV_TP_AWND);
+
+ return max_t(u32, new_size, min);
+}
+
+/**
+ * batadv_tp_updated_cwnd - update the Congestion Windows
+ * @tp_vars: the private data of the current TP meter session
+ * @mss: maximum segment size of transmission
+ *
+ * 1) if the session is in Slow Start, the CWND has to be increased by 1
+ * MSS every unique received ACK
+ * 2) if the session is in Congestion Avoidance, the CWND has to be
+ * increased by MSS * MSS / CWND for every unique received ACK
+ */
+static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss)
+{
+ spin_lock_bh(&tp_vars->cwnd_lock);
+
+ /* slow start... */
+ if (tp_vars->cwnd <= tp_vars->ss_threshold) {
+ tp_vars->dec_cwnd = 0;
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+ return;
+ }
+
+ /* increment CWND at least of 1 (section 3.1 of RFC5681) */
+ tp_vars->dec_cwnd += max_t(u32, 1U << 3,
+ ((mss * mss) << 6) / (tp_vars->cwnd << 3));
+ if (tp_vars->dec_cwnd < (mss << 3)) {
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+ return;
+ }
+
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+ tp_vars->dec_cwnd = 0;
+
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+}
+
+/**
+ * batadv_tp_update_rto - calculate new retransmission timeout
+ * @tp_vars: the private data of the current TP meter session
+ * @new_rtt: new roundtrip time in msec
+ */
+static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars,
+ u32 new_rtt)
+{
+ long m = new_rtt;
+
+ /* RTT update
+ * Details in Section 2.2 and 2.3 of RFC6298
+ *
+ * It's tricky to understand. Don't lose hair please.
+ * Inspired by tcp_rtt_estimator() tcp_input.c
+ */
+ if (tp_vars->srtt != 0) {
+ m -= (tp_vars->srtt >> 3); /* m is now error in rtt est */
+ tp_vars->srtt += m; /* rtt = 7/8 srtt + 1/8 new */
+ if (m < 0)
+ m = -m;
+
+ m -= (tp_vars->rttvar >> 2);
+ tp_vars->rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */
+ } else {
+ /* first measure getting in */
+ tp_vars->srtt = m << 3; /* take the measured time to be srtt */
+ tp_vars->rttvar = m << 1; /* new_rtt / 2 */
+ }
+
+ /* rto = srtt + 4 * rttvar.
+ * rttvar is scaled by 4, therefore doesn't need to be multiplied
+ */
+ tp_vars->rto = (tp_vars->srtt >> 3) + tp_vars->rttvar;
+}
+
+/**
+ * batadv_tp_batctl_notify - send client status result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @start_time: start of transmission in jiffies
+ * @total_sent: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason,
+ const u8 *dst, struct batadv_priv *bat_priv,
+ unsigned long start_time, u64 total_sent,
+ u32 cookie)
+{
+ u32 test_time;
+ u8 result;
+ u32 total_bytes;
+
+ if (!batadv_tp_is_error(reason)) {
+ result = BATADV_TP_REASON_COMPLETE;
+ test_time = jiffies_to_msecs(jiffies - start_time);
+ total_bytes = total_sent;
+ } else {
+ result = reason;
+ test_time = 0;
+ total_bytes = 0;
+ }
+
+ batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time,
+ total_bytes, cookie);
+}
+
+/**
+ * batadv_tp_batctl_error_notify - send client error result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
+ const u8 *dst,
+ struct batadv_priv *bat_priv,
+ u32 cookie)
+{
+ batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie);
+}
+
+/**
+ * batadv_tp_list_find - find a tp_vars object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ *
+ * Look for a tp_vars object matching dst as end_point and return it after
+ * having incremented the refcounter. Return NULL is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars with @dst was found
+ */
+static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
+ const u8 *dst)
+{
+ struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+ if (!batadv_compare_eth(pos->other_end, dst))
+ continue;
+
+ /* most of the time this function is invoked during the normal
+ * process..it makes sens to pay more when the session is
+ * finished and to speed the process up during the measurement
+ */
+ if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+ continue;
+
+ tp_vars = pos;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tp_vars;
+}
+
+/**
+ * batadv_tp_list_find_session - find tp_vars session object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ * @session: session identifier
+ *
+ * Look for a tp_vars object matching dst as end_point, session as tp meter
+ * session and return it after having incremented the refcounter. Return NULL
+ * is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars was found
+ */
+static struct batadv_tp_vars *
+batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
+ const u8 *session)
+{
+ struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+ if (!batadv_compare_eth(pos->other_end, dst))
+ continue;
+
+ if (memcmp(pos->session, session, sizeof(pos->session)) != 0)
+ continue;
+
+ /* most of the time this function is invoked during the normal
+ * process..it makes sense to pay more when the session is
+ * finished and to speed the process up during the measurement
+ */
+ if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+ continue;
+
+ tp_vars = pos;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tp_vars;
+}
+
+/**
+ * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the batadv_tp_vars
+ */
+static void batadv_tp_vars_release(struct kref *ref)
+{
+ struct batadv_tp_vars *tp_vars;
+ struct batadv_tp_unacked *un, *safe;
+
+ tp_vars = container_of(ref, struct batadv_tp_vars, refcount);
+
+ /* lock should not be needed because this object is now out of any
+ * context!
+ */
+ spin_lock_bh(&tp_vars->unacked_lock);
+ list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+ list_del(&un->list);
+ kfree(un);
+ }
+ spin_unlock_bh(&tp_vars->unacked_lock);
+
+ kfree_rcu(tp_vars, rcu);
+}
+
+/**
+ * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly
+ * release it
+ * @tp_vars: the private data of the current TP meter session to be free'd
+ */
+static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
+{
+ kref_put(&tp_vars->refcount, batadv_tp_vars_release);
+}
+
+/**
+ * batadv_tp_sender_cleanup - cleanup sender data and drop and timer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session to cleanup
+ */
+static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
+ struct batadv_tp_vars *tp_vars)
+{
+ cancel_delayed_work(&tp_vars->finish_work);
+
+ spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+ hlist_del_rcu(&tp_vars->list);
+ spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+ /* drop list reference */
+ batadv_tp_vars_put(tp_vars);
+
+ atomic_dec(&tp_vars->bat_priv->tp_num);
+
+ /* kill the timer and remove its reference */
+ del_timer_sync(&tp_vars->timer);
+ /* the worker might have rearmed itself therefore we kill it again. Note
+ * that if the worker should run again before invoking the following
+ * del_timer(), it would not re-arm itself once again because the status
+ * is OFF now
+ */
+ del_timer(&tp_vars->timer);
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_sender_end - print info about ended session and inform client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
+ struct batadv_tp_vars *tp_vars)
+{
+ u32 session_cookie;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Test towards %pM finished..shutting down (reason=%d)\n",
+ tp_vars->other_end, tp_vars->reason);
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n",
+ tp_vars->srtt >> 3, tp_vars->rttvar >> 2, tp_vars->rto);
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Final values: cwnd=%u ss_threshold=%u\n",
+ tp_vars->cwnd, tp_vars->ss_threshold);
+
+ session_cookie = batadv_tp_session_cookie(tp_vars->session,
+ tp_vars->icmp_uid);
+
+ batadv_tp_batctl_notify(tp_vars->reason,
+ tp_vars->other_end,
+ bat_priv,
+ tp_vars->start_time,
+ atomic64_read(&tp_vars->tot_sent),
+ session_cookie);
+}
+
+/**
+ * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully
+ * @tp_vars: the private data of the current TP meter session
+ * @reason: reason for tp meter session stop
+ */
+static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
+ enum batadv_tp_meter_reason reason)
+{
+ if (!atomic_dec_and_test(&tp_vars->sending))
+ return;
+
+ tp_vars->reason = reason;
+}
+
+/**
+ * batadv_tp_sender_finish - stop sender session after test_length was reached
+ * @work: delayed work reference of the related tp_vars
+ */
+static void batadv_tp_sender_finish(struct work_struct *work)
+{
+ struct delayed_work *delayed_work;
+ struct batadv_tp_vars *tp_vars;
+
+ delayed_work = to_delayed_work(work);
+ tp_vars = container_of(delayed_work, struct batadv_tp_vars,
+ finish_work);
+
+ batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE);
+}
+
+/**
+ * batadv_tp_reset_sender_timer - reschedule the sender timer
+ * @tp_vars: the private TP meter data for this session
+ *
+ * Reschedule the timer using tp_vars->rto as delay
+ */
+static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
+{
+ /* most of the time this function is invoked while normal packet
+ * reception...
+ */
+ if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ /* timer ref will be dropped in batadv_tp_sender_cleanup */
+ return;
+
+ mod_timer(&tp_vars->timer, jiffies + msecs_to_jiffies(tp_vars->rto));
+}
+
+/**
+ * batadv_tp_sender_timeout - timer that fires in case of packet loss
+ * @arg: address of the related tp_vars
+ *
+ * If fired it means that there was packet loss.
+ * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
+ * reset the cwnd to 3*MSS
+ */
+static void batadv_tp_sender_timeout(unsigned long arg)
+{
+ struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+ struct batadv_priv *bat_priv = tp_vars->bat_priv;
+
+ if (atomic_read(&tp_vars->sending) == 0)
+ return;
+
+ /* if the user waited long enough...shutdown the test */
+ if (unlikely(tp_vars->rto >= BATADV_TP_MAX_RTO)) {
+ batadv_tp_sender_shutdown(tp_vars,
+ BATADV_TP_REASON_DST_UNREACHABLE);
+ return;
+ }
+
+ /* RTO exponential backoff
+ * Details in Section 5.5 of RFC6298
+ */
+ tp_vars->rto <<= 1;
+
+ spin_lock_bh(&tp_vars->cwnd_lock);
+
+ tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+ if (tp_vars->ss_threshold < BATADV_TP_PLEN * 2)
+ tp_vars->ss_threshold = BATADV_TP_PLEN * 2;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n",
+ tp_vars->other_end, tp_vars->cwnd, tp_vars->ss_threshold,
+ atomic_read(&tp_vars->last_acked));
+
+ tp_vars->cwnd = BATADV_TP_PLEN * 3;
+
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+
+ /* resend the non-ACKed packets.. */
+ tp_vars->last_sent = atomic_read(&tp_vars->last_acked);
+ wake_up(&tp_vars->more_bytes);
+
+ batadv_tp_reset_sender_timer(tp_vars);
+}
+
+/**
+ * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes
+ * @tp_vars: the private TP meter data for this session
+ * @buf: Buffer to fill with bytes
+ * @nbytes: amount of pseudorandom bytes
+ */
+static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars,
+ u8 *buf, size_t nbytes)
+{
+ u32 local_offset;
+ size_t bytes_inbuf;
+ size_t to_copy;
+ size_t pos = 0;
+
+ spin_lock_bh(&tp_vars->prerandom_lock);
+ local_offset = tp_vars->prerandom_offset;
+ tp_vars->prerandom_offset += nbytes;
+ tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom);
+ spin_unlock_bh(&tp_vars->prerandom_lock);
+
+ while (nbytes) {
+ local_offset %= sizeof(batadv_tp_prerandom);
+ bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset;
+ to_copy = min(nbytes, bytes_inbuf);
+
+ memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy);
+ pos += to_copy;
+ nbytes -= to_copy;
+ local_offset = 0;
+ }
+}
+
+/**
+ * batadv_tp_send_msg - send a single message
+ * @tp_vars: the private TP meter data for this session
+ * @src: source mac address
+ * @orig_node: the originator of the destination
+ * @seqno: sequence number of this packet
+ * @len: length of the entire packet
+ * @session: session identifier
+ * @uid: local ICMP "socket" index
+ * @timestamp: timestamp in jiffies which is replied in ack
+ *
+ * Create and send a single TP Meter message.
+ *
+ * Return: 0 on success, BATADV_TP_REASON_DST_UNREACHABLE if the destination is
+ * not reachable, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't be
+ * allocated
+ */
+static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
+ struct batadv_orig_node *orig_node,
+ u32 seqno, size_t len, const u8 *session,
+ int uid, u32 timestamp)
+{
+ struct batadv_icmp_tp_packet *icmp;
+ struct sk_buff *skb;
+ int r;
+ u8 *data;
+ size_t data_len;
+
+ skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
+ if (unlikely(!skb))
+ return BATADV_TP_REASON_MEMORY_ERROR;
+
+ skb_reserve(skb, ETH_HLEN);
+ icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+
+ /* fill the icmp header */
+ ether_addr_copy(icmp->dst, orig_node->orig);
+ ether_addr_copy(icmp->orig, src);
+ icmp->version = BATADV_COMPAT_VERSION;
+ icmp->packet_type = BATADV_ICMP;
+ icmp->ttl = BATADV_TTL;
+ icmp->msg_type = BATADV_TP;
+ icmp->uid = uid;
+
+ icmp->subtype = BATADV_TP_MSG;
+ memcpy(icmp->session, session, sizeof(icmp->session));
+ icmp->seqno = htonl(seqno);
+ icmp->timestamp = htonl(timestamp);
+
+ data_len = len - sizeof(*icmp);
+ data = (u8 *)skb_put(skb, data_len);
+ batadv_tp_fill_prerandom(tp_vars, data, data_len);
+
+ r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (r == -1)
+ kfree_skb(skb);
+
+ if (r == NET_XMIT_SUCCESS)
+ return 0;
+
+ return BATADV_TP_REASON_CANT_SEND;
+}
+
+/**
+ * batadv_tp_recv_ack - ACK receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP ACK packet
+ */
+static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
+ const struct sk_buff *skb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_orig_node *orig_node = NULL;
+ const struct batadv_icmp_tp_packet *icmp;
+ struct batadv_tp_vars *tp_vars;
+ size_t packet_len, mss;
+ u32 rtt, recv_ack, cwnd;
+ unsigned char *dev_addr;
+
+ packet_len = BATADV_TP_PLEN;
+ mss = BATADV_TP_PLEN;
+ packet_len += sizeof(struct batadv_unicast_packet);
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ /* find the tp_vars */
+ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+ icmp->session);
+ if (unlikely(!tp_vars))
+ return;
+
+ if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ goto out;
+
+ /* old ACK? silently drop it.. */
+ if (batadv_seq_before(ntohl(icmp->seqno),
+ (u32)atomic_read(&tp_vars->last_acked)))
+ goto out;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (unlikely(!primary_if))
+ goto out;
+
+ orig_node = batadv_orig_hash_find(bat_priv, icmp->orig);
+ if (unlikely(!orig_node))
+ goto out;
+
+ /* update RTO with the new sampled RTT, if any */
+ rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp);
+ if (icmp->timestamp && rtt)
+ batadv_tp_update_rto(tp_vars, rtt);
+
+ /* ACK for new data... reset the timer */
+ batadv_tp_reset_sender_timer(tp_vars);
+
+ recv_ack = ntohl(icmp->seqno);
+
+ /* check if this ACK is a duplicate */
+ if (atomic_read(&tp_vars->last_acked) == recv_ack) {
+ atomic_inc(&tp_vars->dup_acks);
+ if (atomic_read(&tp_vars->dup_acks) != 3)
+ goto out;
+
+ if (recv_ack >= tp_vars->recover)
+ goto out;
+
+ /* if this is the third duplicate ACK do Fast Retransmit */
+ batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+ orig_node, recv_ack, packet_len,
+ icmp->session, icmp->uid,
+ jiffies_to_msecs(jiffies));
+
+ spin_lock_bh(&tp_vars->cwnd_lock);
+
+ /* Fast Recovery */
+ tp_vars->fast_recovery = true;
+ /* Set recover to the last outstanding seqno when Fast Recovery
+ * is entered. RFC6582, Section 3.2, step 1
+ */
+ tp_vars->recover = tp_vars->last_sent;
+ tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n",
+ tp_vars->cwnd, tp_vars->ss_threshold,
+ tp_vars->last_sent, recv_ack);
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 3 * mss,
+ mss);
+ tp_vars->dec_cwnd = 0;
+ tp_vars->last_sent = recv_ack;
+
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+ } else {
+ /* count the acked data */
+ atomic64_add(recv_ack - atomic_read(&tp_vars->last_acked),
+ &tp_vars->tot_sent);
+ /* reset the duplicate ACKs counter */
+ atomic_set(&tp_vars->dup_acks, 0);
+
+ if (tp_vars->fast_recovery) {
+ /* partial ACK */
+ if (batadv_seq_before(recv_ack, tp_vars->recover)) {
+ /* this is another hole in the window. React
+ * immediately as specified by NewReno (see
+ * Section 3.2 of RFC6582 for details)
+ */
+ dev_addr = primary_if->net_dev->dev_addr;
+ batadv_tp_send_msg(tp_vars, dev_addr,
+ orig_node, recv_ack,
+ packet_len, icmp->session,
+ icmp->uid,
+ jiffies_to_msecs(jiffies));
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd,
+ mss, mss);
+ } else {
+ tp_vars->fast_recovery = false;
+ /* set cwnd to the value of ss_threshold at the
+ * moment that Fast Recovery was entered.
+ * RFC6582, Section 3.2, step 3
+ */
+ cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 0,
+ mss);
+ tp_vars->cwnd = cwnd;
+ }
+ goto move_twnd;
+ }
+
+ if (recv_ack - atomic_read(&tp_vars->last_acked) >= mss)
+ batadv_tp_update_cwnd(tp_vars, mss);
+move_twnd:
+ /* move the Transmit Window */
+ atomic_set(&tp_vars->last_acked, recv_ack);
+ }
+
+ wake_up(&tp_vars->more_bytes);
+out:
+ if (likely(primary_if))
+ batadv_hardif_put(primary_if);
+ if (likely(orig_node))
+ batadv_orig_node_put(orig_node);
+ if (likely(tp_vars))
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_avail - check if congestion window is not full
+ * @tp_vars: the private data of the current TP meter session
+ * @payload_len: size of the payload of a single message
+ *
+ * Return: true when congestion window is not full, false otherwise
+ */
+static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars,
+ size_t payload_len)
+{
+ u32 win_left, win_limit;
+
+ win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd;
+ win_left = win_limit - tp_vars->last_sent;
+
+ return win_left >= payload_len;
+}
+
+/**
+ * batadv_tp_wait_available - wait until congestion window becomes free or
+ * timeout is reached
+ * @tp_vars: the private data of the current TP meter session
+ * @plen: size of the payload of a single message
+ *
+ * Return: 0 if the condition evaluated to false after the timeout elapsed,
+ * 1 if the condition evaluated to true after the timeout elapsed, the
+ * remaining jiffies (at least 1) if the condition evaluated to true before
+ * the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal.
+ */
+static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen)
+{
+ int ret;
+
+ ret = wait_event_interruptible_timeout(tp_vars->more_bytes,
+ batadv_tp_avail(tp_vars, plen),
+ HZ / 10);
+
+ return ret;
+}
+
+/**
+ * batadv_tp_send - main sending thread of a tp meter session
+ * @arg: address of the related tp_vars
+ *
+ * Return: nothing, this function never returns
+ */
+static int batadv_tp_send(void *arg)
+{
+ struct batadv_tp_vars *tp_vars = arg;
+ struct batadv_priv *bat_priv = tp_vars->bat_priv;
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_orig_node *orig_node = NULL;
+ size_t payload_len, packet_len;
+ int err = 0;
+
+ if (unlikely(tp_vars->role != BATADV_TP_SENDER)) {
+ err = BATADV_TP_REASON_DST_UNREACHABLE;
+ tp_vars->reason = err;
+ goto out;
+ }
+
+ orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end);
+ if (unlikely(!orig_node)) {
+ err = BATADV_TP_REASON_DST_UNREACHABLE;
+ tp_vars->reason = err;
+ goto out;
+ }
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (unlikely(!primary_if)) {
+ err = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+
+ /* assume that all the hard_interfaces have a correctly
+ * configured MTU, so use the soft_iface MTU as MSS.
+ * This might not be true and in that case the fragmentation
+ * should be used.
+ * Now, try to send the packet as it is
+ */
+ payload_len = BATADV_TP_PLEN;
+ BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN);
+
+ batadv_tp_reset_sender_timer(tp_vars);
+
+ /* queue the worker in charge of terminating the test */
+ queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work,
+ msecs_to_jiffies(tp_vars->test_length));
+
+ while (atomic_read(&tp_vars->sending) != 0) {
+ if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) {
+ batadv_tp_wait_available(tp_vars, payload_len);
+ continue;
+ }
+
+ /* to emulate normal unicast traffic, add to the payload len
+ * the size of the unicast header
+ */
+ packet_len = payload_len + sizeof(struct batadv_unicast_packet);
+
+ err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+ orig_node, tp_vars->last_sent,
+ packet_len,
+ tp_vars->session, tp_vars->icmp_uid,
+ jiffies_to_msecs(jiffies));
+
+ /* something went wrong during the preparation/transmission */
+ if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: batadv_tp_send() cannot send packets (%d)\n",
+ err);
+ /* ensure nobody else tries to stop the thread now */
+ if (atomic_dec_and_test(&tp_vars->sending))
+ tp_vars->reason = err;
+ break;
+ }
+
+ /* right-shift the TWND */
+ if (!err)
+ tp_vars->last_sent += payload_len;
+
+ cond_resched();
+ }
+
+out:
+ if (likely(primary_if))
+ batadv_hardif_put(primary_if);
+ if (likely(orig_node))
+ batadv_orig_node_put(orig_node);
+
+ batadv_tp_sender_end(bat_priv, tp_vars);
+ batadv_tp_sender_cleanup(bat_priv, tp_vars);
+
+ batadv_tp_vars_put(tp_vars);
+
+ do_exit(0);
+}
+
+/**
+ * batadv_tp_start_kthread - start new thread which manages the tp meter sender
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
+{
+ struct task_struct *kthread;
+ struct batadv_priv *bat_priv = tp_vars->bat_priv;
+ u32 session_cookie;
+
+ kref_get(&tp_vars->refcount);
+ kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter");
+ if (IS_ERR(kthread)) {
+ session_cookie = batadv_tp_session_cookie(tp_vars->session,
+ tp_vars->icmp_uid);
+ pr_err("batadv: cannot create tp meter kthread\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+ tp_vars->other_end,
+ bat_priv, session_cookie);
+
+ /* drop reserved reference for kthread */
+ batadv_tp_vars_put(tp_vars);
+
+ /* cleanup of failed tp meter variables */
+ batadv_tp_sender_cleanup(bat_priv, tp_vars);
+ return;
+ }
+
+ wake_up_process(kthread);
+}
+
+/**
+ * batadv_tp_start - start a new tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @test_length: test length in milliseconds
+ * @cookie: session cookie
+ */
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+ u32 test_length, u32 *cookie)
+{
+ struct batadv_tp_vars *tp_vars;
+ u8 session_id[2];
+ u8 icmp_uid;
+ u32 session_cookie;
+
+ get_random_bytes(session_id, sizeof(session_id));
+ get_random_bytes(&icmp_uid, 1);
+ session_cookie = batadv_tp_session_cookie(session_id, icmp_uid);
+ *cookie = session_cookie;
+
+ /* look for an already existing test towards this node */
+ spin_lock_bh(&bat_priv->tp_list_lock);
+ tp_vars = batadv_tp_list_find(bat_priv, dst);
+ if (tp_vars) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_tp_vars_put(tp_vars);
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: test to or from the same node already ongoing, aborting\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING,
+ dst, bat_priv, session_cookie);
+ return;
+ }
+
+ if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: too many ongoing sessions, aborting (SEND)\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst,
+ bat_priv, session_cookie);
+ return;
+ }
+
+ tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+ if (!tp_vars) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: batadv_tp_start cannot allocate list elements\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+ dst, bat_priv, session_cookie);
+ return;
+ }
+
+ /* initialize tp_vars */
+ ether_addr_copy(tp_vars->other_end, dst);
+ kref_init(&tp_vars->refcount);
+ tp_vars->role = BATADV_TP_SENDER;
+ atomic_set(&tp_vars->sending, 1);
+ memcpy(tp_vars->session, session_id, sizeof(session_id));
+ tp_vars->icmp_uid = icmp_uid;
+
+ tp_vars->last_sent = BATADV_TP_FIRST_SEQ;
+ atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ);
+ tp_vars->fast_recovery = false;
+ tp_vars->recover = BATADV_TP_FIRST_SEQ;
+
+ /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681).
+ * For batman-adv the MSS is the size of the payload received by the
+ * soft_interface, hence its MTU
+ */
+ tp_vars->cwnd = BATADV_TP_PLEN * 3;
+ /* at the beginning initialise the SS threshold to the biggest possible
+ * window size, hence the AWND size
+ */
+ tp_vars->ss_threshold = BATADV_TP_AWND;
+
+ /* RTO initial value is 3 seconds.
+ * Details in Section 2.1 of RFC6298
+ */
+ tp_vars->rto = 1000;
+ tp_vars->srtt = 0;
+ tp_vars->rttvar = 0;
+
+ atomic64_set(&tp_vars->tot_sent, 0);
+
+ kref_get(&tp_vars->refcount);
+ setup_timer(&tp_vars->timer, batadv_tp_sender_timeout,
+ (unsigned long)tp_vars);
+
+ tp_vars->bat_priv = bat_priv;
+ tp_vars->start_time = jiffies;
+
+ init_waitqueue_head(&tp_vars->more_bytes);
+
+ spin_lock_init(&tp_vars->unacked_lock);
+ INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+ spin_lock_init(&tp_vars->cwnd_lock);
+
+ tp_vars->prerandom_offset = 0;
+ spin_lock_init(&tp_vars->prerandom_lock);
+
+ kref_get(&tp_vars->refcount);
+ hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+
+ tp_vars->test_length = test_length;
+ if (!tp_vars->test_length)
+ tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: starting throughput meter towards %pM (length=%ums)\n",
+ dst, test_length);
+
+ /* init work item for finished tp tests */
+ INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish);
+
+ /* start tp kthread. This way the write() call issued from userspace can
+ * happily return and avoid to block
+ */
+ batadv_tp_start_kthread(tp_vars);
+
+ /* don't return reference to new tp_vars */
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_stop - stop currently running tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @return_value: reason for tp meter session stop
+ */
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 return_value)
+{
+ struct batadv_orig_node *orig_node;
+ struct batadv_tp_vars *tp_vars;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: stopping test towards %pM\n", dst);
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (!orig_node)
+ return;
+
+ tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig);
+ if (!tp_vars) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: trying to interrupt an already over connection\n");
+ goto out;
+ }
+
+ batadv_tp_sender_shutdown(tp_vars, return_value);
+ batadv_tp_vars_put(tp_vars);
+out:
+ batadv_orig_node_put(orig_node);
+}
+
+/**
+ * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer
+ * @tp_vars: the private data of the current TP meter session
+ *
+ * start the receiver shutdown timer or reset it if already started
+ */
+static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
+{
+ mod_timer(&tp_vars->timer,
+ jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT));
+}
+
+/**
+ * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
+ * reached without received ack
+ * @arg: address of the related tp_vars
+ */
+static void batadv_tp_receiver_shutdown(unsigned long arg)
+{
+ struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+ struct batadv_tp_unacked *un, *safe;
+ struct batadv_priv *bat_priv;
+
+ bat_priv = tp_vars->bat_priv;
+
+ /* if there is recent activity rearm the timer */
+ if (!batadv_has_timed_out(tp_vars->last_recv_time,
+ BATADV_TP_RECV_TIMEOUT)) {
+ /* reset the receiver shutdown timer */
+ batadv_tp_reset_receiver_timer(tp_vars);
+ return;
+ }
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Shutting down for inactivity (more than %dms) from %pM\n",
+ BATADV_TP_RECV_TIMEOUT, tp_vars->other_end);
+
+ spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+ hlist_del_rcu(&tp_vars->list);
+ spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+ /* drop list reference */
+ batadv_tp_vars_put(tp_vars);
+
+ atomic_dec(&bat_priv->tp_num);
+
+ spin_lock_bh(&tp_vars->unacked_lock);
+ list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+ list_del(&un->list);
+ kfree(un);
+ }
+ spin_unlock_bh(&tp_vars->unacked_lock);
+
+ /* drop reference of timer */
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_send_ack - send an ACK packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the mac address of the destination originator
+ * @seq: the sequence number to ACK
+ * @timestamp: the timestamp to echo back in the ACK
+ * @session: session identifier
+ * @socket_index: local ICMP socket identifier
+ *
+ * Return: 0 on success, a positive integer representing the reason of the
+ * failure otherwise
+ */
+static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
+ u32 seq, __be32 timestamp, const u8 *session,
+ int socket_index)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_orig_node *orig_node;
+ struct batadv_icmp_tp_packet *icmp;
+ struct sk_buff *skb;
+ int r, ret;
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (unlikely(!orig_node)) {
+ ret = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (unlikely(!primary_if)) {
+ ret = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+
+ skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN);
+ if (unlikely(!skb)) {
+ ret = BATADV_TP_REASON_MEMORY_ERROR;
+ goto out;
+ }
+
+ skb_reserve(skb, ETH_HLEN);
+ icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+ icmp->packet_type = BATADV_ICMP;
+ icmp->version = BATADV_COMPAT_VERSION;
+ icmp->ttl = BATADV_TTL;
+ icmp->msg_type = BATADV_TP;
+ ether_addr_copy(icmp->dst, orig_node->orig);
+ ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr);
+ icmp->uid = socket_index;
+
+ icmp->subtype = BATADV_TP_ACK;
+ memcpy(icmp->session, session, sizeof(icmp->session));
+ icmp->seqno = htonl(seq);
+ icmp->timestamp = timestamp;
+
+ /* send the ack */
+ r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (r == -1)
+ kfree_skb(skb);
+
+ if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+ ret = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (likely(orig_node))
+ batadv_orig_node_put(orig_node);
+ if (likely(primary_if))
+ batadv_hardif_put(primary_if);
+
+ return ret;
+}
+
+/**
+ * batadv_tp_handle_out_of_order - store an out of order packet
+ * @tp_vars: the private data of the current TP meter session
+ * @skb: the buffer containing the received packet
+ *
+ * Store the out of order packet in the unacked list for late processing. This
+ * packets are kept in this list so that they can be ACKed at once as soon as
+ * all the previous packets have been received
+ *
+ * Return: true if the packed has been successfully processed, false otherwise
+ */
+static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars,
+ const struct sk_buff *skb)
+{
+ const struct batadv_icmp_tp_packet *icmp;
+ struct batadv_tp_unacked *un, *new;
+ u32 payload_len;
+ bool added = false;
+
+ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if (unlikely(!new))
+ return false;
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ new->seqno = ntohl(icmp->seqno);
+ payload_len = skb->len - sizeof(struct batadv_unicast_packet);
+ new->len = payload_len;
+
+ spin_lock_bh(&tp_vars->unacked_lock);
+ /* if the list is empty immediately attach this new object */
+ if (list_empty(&tp_vars->unacked_list)) {
+ list_add(&new->list, &tp_vars->unacked_list);
+ goto out;
+ }
+
+ /* otherwise loop over the list and either drop the packet because this
+ * is a duplicate or store it at the right position.
+ *
+ * The iteration is done in the reverse way because it is likely that
+ * the last received packet (the one being processed now) has a bigger
+ * seqno than all the others already stored.
+ */
+ list_for_each_entry_reverse(un, &tp_vars->unacked_list, list) {
+ /* check for duplicates */
+ if (new->seqno == un->seqno) {
+ if (new->len > un->len)
+ un->len = new->len;
+ kfree(new);
+ added = true;
+ break;
+ }
+
+ /* look for the right position */
+ if (batadv_seq_before(new->seqno, un->seqno))
+ continue;
+
+ /* as soon as an entry having a bigger seqno is found, the new
+ * one is attached _after_ it. In this way the list is kept in
+ * ascending order
+ */
+ list_add_tail(&new->list, &un->list);
+ added = true;
+ break;
+ }
+
+ /* received packet with smallest seqno out of order; add it to front */
+ if (!added)
+ list_add(&new->list, &tp_vars->unacked_list);
+
+out:
+ spin_unlock_bh(&tp_vars->unacked_lock);
+
+ return true;
+}
+
+/**
+ * batadv_tp_ack_unordered - update number received bytes in current stream
+ * without gaps
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars)
+{
+ struct batadv_tp_unacked *un, *safe;
+ u32 to_ack;
+
+ /* go through the unacked packet list and possibly ACK them as
+ * well
+ */
+ spin_lock_bh(&tp_vars->unacked_lock);
+ list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+ /* the list is ordered, therefore it is possible to stop as soon
+ * there is a gap between the last acked seqno and the seqno of
+ * the packet under inspection
+ */
+ if (batadv_seq_before(tp_vars->last_recv, un->seqno))
+ break;
+
+ to_ack = un->seqno + un->len - tp_vars->last_recv;
+
+ if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len))
+ tp_vars->last_recv += to_ack;
+
+ list_del(&un->list);
+ kfree(un);
+ }
+ spin_unlock_bh(&tp_vars->unacked_lock);
+}
+
+/**
+ * batadv_tp_init_recv - return matching or create new receiver tp_vars
+ * @bat_priv: the bat priv with all the soft interface information
+ * @icmp: received icmp tp msg
+ *
+ * Return: corresponding tp_vars or NULL on errors
+ */
+static struct batadv_tp_vars *
+batadv_tp_init_recv(struct batadv_priv *bat_priv,
+ const struct batadv_icmp_tp_packet *icmp)
+{
+ struct batadv_tp_vars *tp_vars;
+
+ spin_lock_bh(&bat_priv->tp_list_lock);
+ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+ icmp->session);
+ if (tp_vars)
+ goto out_unlock;
+
+ if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: too many ongoing sessions, aborting (RECV)\n");
+ goto out_unlock;
+ }
+
+ tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+ if (!tp_vars)
+ goto out_unlock;
+
+ ether_addr_copy(tp_vars->other_end, icmp->orig);
+ tp_vars->role = BATADV_TP_RECEIVER;
+ memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session));
+ tp_vars->last_recv = BATADV_TP_FIRST_SEQ;
+ tp_vars->bat_priv = bat_priv;
+ kref_init(&tp_vars->refcount);
+
+ spin_lock_init(&tp_vars->unacked_lock);
+ INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+ kref_get(&tp_vars->refcount);
+ hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+
+ kref_get(&tp_vars->refcount);
+ setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown,
+ (unsigned long)tp_vars);
+
+ batadv_tp_reset_receiver_timer(tp_vars);
+
+out_unlock:
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+
+ return tp_vars;
+}
+
+/**
+ * batadv_tp_recv_msg - process a single data message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP MSG packet
+ */
+static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
+ const struct sk_buff *skb)
+{
+ const struct batadv_icmp_tp_packet *icmp;
+ struct batadv_tp_vars *tp_vars;
+ size_t packet_size;
+ u32 seqno;
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ seqno = ntohl(icmp->seqno);
+ /* check if this is the first seqno. This means that if the
+ * first packet is lost, the tp meter does not work anymore!
+ */
+ if (seqno == BATADV_TP_FIRST_SEQ) {
+ tp_vars = batadv_tp_init_recv(bat_priv, icmp);
+ if (!tp_vars) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n");
+ goto out;
+ }
+ } else {
+ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+ icmp->session);
+ if (!tp_vars) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Unexpected packet from %pM!\n",
+ icmp->orig);
+ goto out;
+ }
+ }
+
+ if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: dropping packet: not expected (role=%u)\n",
+ tp_vars->role);
+ goto out;
+ }
+
+ tp_vars->last_recv_time = jiffies;
+
+ /* if the packet is a duplicate, it may be the case that an ACK has been
+ * lost. Resend the ACK
+ */
+ if (batadv_seq_before(seqno, tp_vars->last_recv))
+ goto send_ack;
+
+ /* if the packet is out of order enqueue it */
+ if (ntohl(icmp->seqno) != tp_vars->last_recv) {
+ /* exit immediately (and do not send any ACK) if the packet has
+ * not been enqueued correctly
+ */
+ if (!batadv_tp_handle_out_of_order(tp_vars, skb))
+ goto out;
+
+ /* send a duplicate ACK */
+ goto send_ack;
+ }
+
+ /* if everything was fine count the ACKed bytes */
+ packet_size = skb->len - sizeof(struct batadv_unicast_packet);
+ tp_vars->last_recv += packet_size;
+
+ /* check if this ordered message filled a gap.... */
+ batadv_tp_ack_unordered(tp_vars);
+
+send_ack:
+ /* send the ACK. If the received packet was out of order, the ACK that
+ * is going to be sent is a duplicate (the sender will count them and
+ * possibly enter Fast Retransmit as soon as it has reached 3)
+ */
+ batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
+ icmp->timestamp, icmp->session, icmp->uid);
+out:
+ if (likely(tp_vars))
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_meter_recv - main TP Meter receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ */
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
+{
+ struct batadv_icmp_tp_packet *icmp;
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ switch (icmp->subtype) {
+ case BATADV_TP_MSG:
+ batadv_tp_recv_msg(bat_priv, skb);
+ break;
+ case BATADV_TP_ACK:
+ batadv_tp_recv_ack(bat_priv, skb);
+ break;
+ default:
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Received unknown TP Metric packet type %u\n",
+ icmp->subtype);
+ }
+ consume_skb(skb);
+}
+
+/**
+ * batadv_tp_meter_init - initialize global tp_meter structures
+ */
+void batadv_tp_meter_init(void)
+{
+ get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom));
+}
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
new file mode 100644
index 000000000000..ba922c425e56
--- /dev/null
+++ b/net/batman-adv/tp_meter.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TP_METER_H_
+#define _NET_BATMAN_ADV_TP_METER_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct sk_buff;
+
+void batadv_tp_meter_init(void);
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+ u32 test_length, u32 *cookie);
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 return_value);
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
+#endif /* _NET_BATMAN_ADV_TP_METER_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0d441aac1ad5..7e6df7a4964a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -47,10 +47,12 @@
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "multicast.h"
#include "originator.h"
#include "packet.h"
#include "soft-interface.h"
+#include "tvlv.h"
/* hash class keys */
static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -1545,7 +1547,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *tt_global_entry)
{
struct batadv_neigh_node *router, *best_router = NULL;
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
@@ -1557,8 +1559,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
continue;
if (best_router &&
- bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
- best_router, BATADV_IF_DEFAULT) <= 0) {
+ bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router,
+ BATADV_IF_DEFAULT) <= 0) {
batadv_neigh_node_put(router);
continue;
}
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
new file mode 100644
index 000000000000..3d1cf0fb112d
--- /dev/null
+++ b/net/batman-adv/tvlv.c
@@ -0,0 +1,632 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+#include "tvlv.h"
+
+/**
+ * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_handler_release(struct kref *ref)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
+ kfree_rcu(tvlv_handler, rcu);
+}
+
+/**
+ * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
+ * possibly release it
+ * @tvlv_handler: the tvlv handler to free
+ */
+static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
+{
+ kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
+}
+
+/**
+ * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
+ * based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to look for
+ * @version: tvlv handler version to look for
+ *
+ * Return: tvlv handler if found or NULL otherwise.
+ */
+static struct batadv_tvlv_handler *
+batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+ struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tvlv_handler_tmp,
+ &bat_priv->tvlv.handler_list, list) {
+ if (tvlv_handler_tmp->type != type)
+ continue;
+
+ if (tvlv_handler_tmp->version != version)
+ continue;
+
+ if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
+ continue;
+
+ tvlv_handler = tvlv_handler_tmp;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tvlv_handler;
+}
+
+/**
+ * batadv_tvlv_container_release - release tvlv from lists and free
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_container_release(struct kref *ref)
+{
+ struct batadv_tvlv_container *tvlv;
+
+ tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
+ kfree(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_put - decrement the tvlv container refcounter and
+ * possibly release it
+ * @tvlv: the tvlv container to free
+ */
+static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
+{
+ kref_put(&tvlv->refcount, batadv_tvlv_container_release);
+}
+
+/**
+ * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
+ * list based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to look for
+ * @version: tvlv container version to look for
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: tvlv container if found or NULL otherwise.
+ */
+static struct batadv_tvlv_container *
+batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+ struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
+
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+ hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
+ if (tvlv_tmp->tvlv_hdr.type != type)
+ continue;
+
+ if (tvlv_tmp->tvlv_hdr.version != version)
+ continue;
+
+ kref_get(&tvlv_tmp->refcount);
+ tvlv = tvlv_tmp;
+ break;
+ }
+
+ return tvlv;
+}
+
+/**
+ * batadv_tvlv_container_list_size - calculate the size of the tvlv container
+ * list entries
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: size of all currently registered tvlv containers in bytes.
+ */
+static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+{
+ struct batadv_tvlv_container *tvlv;
+ u16 tvlv_len = 0;
+
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+ hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+ tvlv_len += sizeof(struct batadv_tvlv_hdr);
+ tvlv_len += ntohs(tvlv->tvlv_hdr.len);
+ }
+
+ return tvlv_len;
+}
+
+/**
+ * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
+ * list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv: the to be removed tvlv container
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ */
+static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_container *tvlv)
+{
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+ if (!tvlv)
+ return;
+
+ hlist_del(&tvlv->list);
+
+ /* first call to decrement the counter, second call to free */
+ batadv_tvlv_container_put(tvlv);
+ batadv_tvlv_container_put(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_unregister - unregister tvlv container based on the
+ * provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to unregister
+ * @version: tvlv container type to unregister
+ */
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version)
+{
+ struct batadv_tvlv_container *tvlv;
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv = batadv_tvlv_container_get(bat_priv, type, version);
+ batadv_tvlv_container_remove(bat_priv, tvlv);
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_container_register - register tvlv type, version and content
+ * to be propagated with each (primary interface) OGM
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type
+ * @version: tvlv container version
+ * @tvlv_value: tvlv container content
+ * @tvlv_value_len: tvlv container content length
+ *
+ * If a container of the same type and version was already registered the new
+ * content is going to replace the old one.
+ */
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+ u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ struct batadv_tvlv_container *tvlv_old, *tvlv_new;
+
+ if (!tvlv_value)
+ tvlv_value_len = 0;
+
+ tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
+ if (!tvlv_new)
+ return;
+
+ tvlv_new->tvlv_hdr.version = version;
+ tvlv_new->tvlv_hdr.type = type;
+ tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
+
+ memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
+ INIT_HLIST_NODE(&tvlv_new->list);
+ kref_init(&tvlv_new->refcount);
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
+ batadv_tvlv_container_remove(bat_priv, tvlv_old);
+ hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
+ * requested packet size
+ * @packet_buff: packet buffer
+ * @packet_buff_len: packet buffer size
+ * @min_packet_len: requested packet minimum size
+ * @additional_packet_len: requested additional packet size on top of minimum
+ * size
+ *
+ * Return: true of the packet buffer could be changed to the requested size,
+ * false otherwise.
+ */
+static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
+ int *packet_buff_len,
+ int min_packet_len,
+ int additional_packet_len)
+{
+ unsigned char *new_buff;
+
+ new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
+
+ /* keep old buffer if kmalloc should fail */
+ if (!new_buff)
+ return false;
+
+ memcpy(new_buff, *packet_buff, min_packet_len);
+ kfree(*packet_buff);
+ *packet_buff = new_buff;
+ *packet_buff_len = min_packet_len + additional_packet_len;
+
+ return true;
+}
+
+/**
+ * batadv_tvlv_container_ogm_append - append tvlv container content to given
+ * OGM packet buffer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_buff: ogm packet buffer
+ * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
+ * content
+ * @packet_min_len: ogm header size to be preserved for the OGM itself
+ *
+ * The ogm packet might be enlarged or shrunk depending on the current size
+ * and the size of the to-be-appended tvlv containers.
+ *
+ * Return: size of all appended tvlv containers in bytes.
+ */
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len, int packet_min_len)
+{
+ struct batadv_tvlv_container *tvlv;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ u16 tvlv_value_len;
+ void *tvlv_value;
+ bool ret;
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+
+ ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
+ packet_min_len, tvlv_value_len);
+
+ if (!ret)
+ goto end;
+
+ if (!tvlv_value_len)
+ goto end;
+
+ tvlv_value = (*packet_buff) + packet_min_len;
+
+ hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+ tvlv_hdr = tvlv_value;
+ tvlv_hdr->type = tvlv->tvlv_hdr.type;
+ tvlv_hdr->version = tvlv->tvlv_hdr.version;
+ tvlv_hdr->len = tvlv->tvlv_hdr.len;
+ tvlv_value = tvlv_hdr + 1;
+ memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
+ tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
+ }
+
+end:
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+ return tvlv_value_len;
+}
+
+/**
+ * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
+ * appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv_handler: tvlv callback function handling the tvlv content
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success if handler was not found or the return value of the handler
+ * callback.
+ */
+static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_handler *tvlv_handler,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ if (!tvlv_handler)
+ return NET_RX_SUCCESS;
+
+ if (ogm_source) {
+ if (!tvlv_handler->ogm_handler)
+ return NET_RX_SUCCESS;
+
+ if (!orig_node)
+ return NET_RX_SUCCESS;
+
+ tvlv_handler->ogm_handler(bat_priv, orig_node,
+ BATADV_NO_FLAGS,
+ tvlv_value, tvlv_value_len);
+ tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
+ } else {
+ if (!src)
+ return NET_RX_SUCCESS;
+
+ if (!dst)
+ return NET_RX_SUCCESS;
+
+ if (!tvlv_handler->unicast_handler)
+ return NET_RX_SUCCESS;
+
+ return tvlv_handler->unicast_handler(bat_priv, src,
+ dst, tvlv_value,
+ tvlv_value_len);
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
+ * appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success when processing an OGM or the return value of all called
+ * handler callbacks.
+ */
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ u16 tvlv_value_cont_len;
+ u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
+ int ret = NET_RX_SUCCESS;
+
+ while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
+ tvlv_hdr = tvlv_value;
+ tvlv_value_cont_len = ntohs(tvlv_hdr->len);
+ tvlv_value = tvlv_hdr + 1;
+ tvlv_value_len -= sizeof(*tvlv_hdr);
+
+ if (tvlv_value_cont_len > tvlv_value_len)
+ break;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv,
+ tvlv_hdr->type,
+ tvlv_hdr->version);
+
+ ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
+ ogm_source, orig_node,
+ src, dst, tvlv_value,
+ tvlv_value_cont_len);
+ if (tvlv_handler)
+ batadv_tvlv_handler_put(tvlv_handler);
+ tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
+ tvlv_value_len -= tvlv_value_cont_len;
+ }
+
+ if (!ogm_source)
+ return ret;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tvlv_handler,
+ &bat_priv->tvlv.handler_list, list) {
+ if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
+ !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
+ tvlv_handler->ogm_handler(bat_priv, orig_node,
+ cifnotfound, NULL, 0);
+
+ tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
+ }
+ rcu_read_unlock();
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
+ * handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @batadv_ogm_packet: ogm packet containing the tvlv containers
+ * @orig_node: orig node emitting the ogm packet
+ */
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_orig_node *orig_node)
+{
+ void *tvlv_value;
+ u16 tvlv_value_len;
+
+ if (!batadv_ogm_packet)
+ return;
+
+ tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
+ if (!tvlv_value_len)
+ return;
+
+ tvlv_value = batadv_ogm_packet + 1;
+
+ batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+ tvlv_value, tvlv_value_len);
+}
+
+/**
+ * batadv_tvlv_handler_register - register tvlv handler based on the provided
+ * type and version (both need to match) for ogm tvlv payload and/or unicast
+ * payload
+ * @bat_priv: the bat priv with all the soft interface information
+ * @optr: ogm tvlv handler callback function. This function receives the orig
+ * node, flags and the tvlv content as argument to process.
+ * @uptr: unicast tvlv handler callback function. This function receives the
+ * source & destination of the unicast packet as well as the tvlv content
+ * to process.
+ * @type: tvlv handler type to be registered
+ * @version: tvlv handler version to be registered
+ * @flags: flags to enable or disable TVLV API behavior
+ */
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+ void (*optr)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 flags,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ int (*uptr)(struct batadv_priv *bat_priv,
+ u8 *src, u8 *dst,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ u8 type, u8 version, u8 flags)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+ if (tvlv_handler) {
+ batadv_tvlv_handler_put(tvlv_handler);
+ return;
+ }
+
+ tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
+ if (!tvlv_handler)
+ return;
+
+ tvlv_handler->ogm_handler = optr;
+ tvlv_handler->unicast_handler = uptr;
+ tvlv_handler->type = type;
+ tvlv_handler->version = version;
+ tvlv_handler->flags = flags;
+ kref_init(&tvlv_handler->refcount);
+ INIT_HLIST_NODE(&tvlv_handler->list);
+
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+}
+
+/**
+ * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
+ * provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to be unregistered
+ * @version: tvlv handler version to be unregistered
+ */
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+ if (!tvlv_handler)
+ return;
+
+ batadv_tvlv_handler_put(tvlv_handler);
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ hlist_del_rcu(&tvlv_handler->list);
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+ batadv_tvlv_handler_put(tvlv_handler);
+}
+
+/**
+ * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
+ * specified host
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @type: tvlv type
+ * @version: tvlv version
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ */
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+ u8 *dst, u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ struct batadv_orig_node *orig_node;
+ struct sk_buff *skb;
+ unsigned char *tvlv_buff;
+ unsigned int tvlv_len;
+ ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
+ int res;
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (!orig_node)
+ return;
+
+ tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
+
+ skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
+ if (!skb)
+ goto out;
+
+ skb->priority = TC_PRIO_CONTROL;
+ skb_reserve(skb, ETH_HLEN);
+ tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
+ unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
+ unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
+ unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
+ unicast_tvlv_packet->ttl = BATADV_TTL;
+ unicast_tvlv_packet->reserved = 0;
+ unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
+ unicast_tvlv_packet->align = 0;
+ ether_addr_copy(unicast_tvlv_packet->src, src);
+ ether_addr_copy(unicast_tvlv_packet->dst, dst);
+
+ tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
+ tvlv_hdr->version = version;
+ tvlv_hdr->type = type;
+ tvlv_hdr->len = htons(tvlv_value_len);
+ tvlv_buff += sizeof(*tvlv_hdr);
+ memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
+
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res == -1)
+ kfree_skb(skb);
+out:
+ batadv_orig_node_put(orig_node);
+}
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
new file mode 100644
index 000000000000..e4369b547b43
--- /dev/null
+++ b/net/batman-adv/tvlv.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TVLV_H_
+#define _NET_BATMAN_ADV_TVLV_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_ogm_packet;
+
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+ u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len);
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len, int packet_min_len);
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_orig_node *orig_node);
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version);
+
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+ void (*optr)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 flags,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ int (*uptr)(struct batadv_priv *bat_priv,
+ u8 *src, u8 *dst,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ u8 type, u8 version, u8 flags);
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version);
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ u8 *src, u8 *dst,
+ void *tvlv_buff, u16 tvlv_buff_len);
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+ u8 *dst, u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len);
+
+#endif /* _NET_BATMAN_ADV_TVLV_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 02e22763a880..43db7b61f8eb 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -33,6 +33,7 @@
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
#include "packet.h"
@@ -833,6 +834,111 @@ struct batadv_priv_nc {
};
/**
+ * struct batadv_tp_unacked - unacked packet meta-information
+ * @seqno: seqno of the unacked packet
+ * @len: length of the packet
+ * @list: list node for batadv_tp_vars::unacked_list
+ *
+ * This struct is supposed to represent a buffer unacked packet. However, since
+ * the purpose of the TP meter is to count the traffic only, there is no need to
+ * store the entire sk_buff, the starting offset and the length are enough
+ */
+struct batadv_tp_unacked {
+ u32 seqno;
+ u16 len;
+ struct list_head list;
+};
+
+/**
+ * enum batadv_tp_meter_role - Modus in tp meter session
+ * @BATADV_TP_RECEIVER: Initialized as receiver
+ * @BATADV_TP_SENDER: Initialized as sender
+ */
+enum batadv_tp_meter_role {
+ BATADV_TP_RECEIVER,
+ BATADV_TP_SENDER
+};
+
+/**
+ * struct batadv_tp_vars - tp meter private variables per session
+ * @list: list node for bat_priv::tp_list
+ * @timer: timer for ack (receiver) and retry (sender)
+ * @bat_priv: pointer to the mesh object
+ * @start_time: start time in jiffies
+ * @other_end: mac address of remote
+ * @role: receiver/sender modi
+ * @sending: sending binary semaphore: 1 if sending, 0 is not
+ * @reason: reason for a stopped session
+ * @finish_work: work item for the finishing procedure
+ * @test_length: test length in milliseconds
+ * @session: TP session identifier
+ * @icmp_uid: local ICMP "socket" index
+ * @dec_cwnd: decimal part of the cwnd used during linear growth
+ * @cwnd: current size of the congestion window
+ * @cwnd_lock: lock do protect @cwnd & @dec_cwnd
+ * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
+ * connection switches to the Congestion Avoidance state
+ * @last_acked: last acked byte
+ * @last_sent: last sent byte, not yet acked
+ * @tot_sent: amount of data sent/ACKed so far
+ * @dup_acks: duplicate ACKs counter
+ * @fast_recovery: true if in Fast Recovery mode
+ * @recover: last sent seqno when entering Fast Recovery
+ * @rto: sender timeout
+ * @srtt: smoothed RTT scaled by 2^3
+ * @rttvar: RTT variation scaled by 2^2
+ * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout
+ * @prerandom_offset: offset inside the prerandom buffer
+ * @prerandom_lock: spinlock protecting access to prerandom_offset
+ * @last_recv: last in-order received packet
+ * @unacked_list: list of unacked packets (meta-info only)
+ * @unacked_lock: protect unacked_list
+ * @last_recv_time: time time (jiffies) a msg was received
+ * @refcount: number of context where the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_tp_vars {
+ struct hlist_node list;
+ struct timer_list timer;
+ struct batadv_priv *bat_priv;
+ unsigned long start_time;
+ u8 other_end[ETH_ALEN];
+ enum batadv_tp_meter_role role;
+ atomic_t sending;
+ enum batadv_tp_meter_reason reason;
+ struct delayed_work finish_work;
+ u32 test_length;
+ u8 session[2];
+ u8 icmp_uid;
+
+ /* sender variables */
+ u16 dec_cwnd;
+ u32 cwnd;
+ spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */
+ u32 ss_threshold;
+ atomic_t last_acked;
+ u32 last_sent;
+ atomic64_t tot_sent;
+ atomic_t dup_acks;
+ bool fast_recovery;
+ u32 recover;
+ u32 rto;
+ u32 srtt;
+ u32 rttvar;
+ wait_queue_head_t more_bytes;
+ u32 prerandom_offset;
+ spinlock_t prerandom_lock; /* Protects prerandom_offset */
+
+ /* receiver variables */
+ u32 last_recv;
+ struct list_head unacked_list;
+ spinlock_t unacked_lock; /* Protects unacked_list */
+ unsigned long last_recv_time;
+ struct kref refcount;
+ struct rcu_head rcu;
+};
+
+/**
* struct batadv_softif_vlan - per VLAN attributes set
* @bat_priv: pointer to the mesh object
* @vid: VLAN identifier
@@ -900,14 +1006,17 @@ struct batadv_priv_bat_v {
* @debug_dir: dentry for debugfs batman-adv subdirectory
* @forw_bat_list: list of aggregated OGMs that will be forwarded
* @forw_bcast_list: list of broadcast packets that will be rebroadcasted
+ * @tp_list: list of tp sessions
+ * @tp_num: number of currently active tp sessions
* @orig_hash: hash table containing mesh participants (orig nodes)
* @forw_bat_list_lock: lock protecting forw_bat_list
* @forw_bcast_list_lock: lock protecting forw_bcast_list
+ * @tp_list_lock: spinlock protecting @tp_list
* @orig_work: work queue callback item for orig node purging
* @cleanup_work: work queue callback item for soft-interface deinit
* @primary_if: one of the hard-interfaces assigned to this mesh interface
* becomes the primary interface
- * @bat_algo_ops: routing algorithm used by this mesh interface
+ * @algo_ops: routing algorithm used by this mesh interface
* @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
* of the mesh interface represented by this object
* @softif_vlan_list_lock: lock protecting softif_vlan_list
@@ -956,13 +1065,16 @@ struct batadv_priv {
struct dentry *debug_dir;
struct hlist_head forw_bat_list;
struct hlist_head forw_bcast_list;
+ struct hlist_head tp_list;
struct batadv_hashtable *orig_hash;
spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
+ spinlock_t tp_list_lock; /* protects tp_list */
+ atomic_t tp_num;
struct delayed_work orig_work;
struct work_struct cleanup_work;
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
- struct batadv_algo_ops *bat_algo_ops;
+ struct batadv_algo_ops *algo_ops;
struct hlist_head softif_vlan_list;
spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */
#ifdef CONFIG_BATMAN_ADV_BLA
@@ -1278,59 +1390,77 @@ struct batadv_forw_packet {
};
/**
+ * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
+ * @activate: start routing mechanisms when hard-interface is brought up
+ * @enable: init routing info when hard-interface is enabled
+ * @disable: de-init routing info when hard-interface is disabled
+ * @update_mac: (re-)init mac addresses of the protocol information
+ * belonging to this hard-interface
+ * @primary_set: called when primary interface is selected / changed
+ */
+struct batadv_algo_iface_ops {
+ void (*activate)(struct batadv_hard_iface *hard_iface);
+ int (*enable)(struct batadv_hard_iface *hard_iface);
+ void (*disable)(struct batadv_hard_iface *hard_iface);
+ void (*update_mac)(struct batadv_hard_iface *hard_iface);
+ void (*primary_set)(struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
+ * @hardif_init: called on creation of single hop entry
+ * @cmp: compare the metrics of two neighbors for their respective outgoing
+ * interfaces
+ * @is_similar_or_better: check if neigh1 is equally similar or better than
+ * neigh2 for their respective outgoing interface from the metric prospective
+ * @print: print the single hop neighbor list (optional)
+ */
+struct batadv_algo_neigh_ops {
+ void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
+ int (*cmp)(struct batadv_neigh_node *neigh1,
+ struct batadv_hard_iface *if_outgoing1,
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2);
+ bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1,
+ struct batadv_hard_iface *if_outgoing1,
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2);
+ void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+};
+
+/**
+ * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
+ * @free: free the resources allocated by the routing algorithm for an orig_node
+ * object
+ * @add_if: ask the routing algorithm to apply the needed changes to the
+ * orig_node due to a new hard-interface being added into the mesh
+ * @del_if: ask the routing algorithm to apply the needed changes to the
+ * orig_node due to an hard-interface being removed from the mesh
+ * @print: print the originator table (optional)
+ */
+struct batadv_algo_orig_ops {
+ void (*free)(struct batadv_orig_node *orig_node);
+ int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+ int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
+ int del_if_num);
+ void (*print)(struct batadv_priv *priv, struct seq_file *seq,
+ struct batadv_hard_iface *hard_iface);
+};
+
+/**
* struct batadv_algo_ops - mesh algorithm callbacks
* @list: list node for the batadv_algo_list
* @name: name of the algorithm
- * @bat_iface_activate: start routing mechanisms when hard-interface is brought
- * up
- * @bat_iface_enable: init routing info when hard-interface is enabled
- * @bat_iface_disable: de-init routing info when hard-interface is disabled
- * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
- * belonging to this hard-interface
- * @bat_primary_iface_set: called when primary interface is selected / changed
- * @bat_hardif_neigh_init: called on creation of single hop entry
- * @bat_neigh_cmp: compare the metrics of two neighbors for their respective
- * outgoing interfaces
- * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or
- * better than neigh2 for their respective outgoing interface from the metric
- * prospective
- * @bat_neigh_print: print the single hop neighbor list (optional)
- * @bat_orig_print: print the originator table (optional)
- * @bat_orig_free: free the resources allocated by the routing algorithm for an
- * orig_node object
- * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to
- * the orig_node due to a new hard-interface being added into the mesh
- * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to
- * the orig_node due to an hard-interface being removed from the mesh
+ * @iface: callbacks related to interface handling
+ * @neigh: callbacks related to neighbors handling
+ * @orig: callbacks related to originators handling
*/
struct batadv_algo_ops {
struct hlist_node list;
char *name;
- void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
- int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
- void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
- void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
- void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
- /* neigh_node handling API */
- void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh);
- int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
- struct batadv_hard_iface *if_outgoing1,
- struct batadv_neigh_node *neigh2,
- struct batadv_hard_iface *if_outgoing2);
- bool (*bat_neigh_is_similar_or_better)
- (struct batadv_neigh_node *neigh1,
- struct batadv_hard_iface *if_outgoing1,
- struct batadv_neigh_node *neigh2,
- struct batadv_hard_iface *if_outgoing2);
- void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq);
- /* orig_node handling API */
- void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
- struct batadv_hard_iface *hard_iface);
- void (*bat_orig_free)(struct batadv_orig_node *orig_node);
- int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
- int max_if_num);
- int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num);
+ struct batadv_algo_iface_ops iface;
+ struct batadv_algo_neigh_ops neigh;
+ struct batadv_algo_orig_ops orig;
};
/**
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 3df7aefb7663..ece45e0683fd 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -215,6 +215,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct sock *sk = sock->sk;
struct sk_buff *skb;
size_t copied;
+ size_t skblen;
int err;
BT_DBG("sock %p sk %p len %zu", sock, sk, len);
@@ -230,6 +231,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
return err;
}
+ skblen = skb->len;
copied = skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
@@ -248,6 +250,9 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_free_datagram(sk, skb);
+ if (msg->msg_flags & MSG_TRUNC)
+ copied = skblen;
+
return err ? : copied;
}
EXPORT_SYMBOL(bt_sock_recvmsg);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index bf9f8a801a2e..3809617aa98d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -625,7 +625,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
list_for_each_entry(d, &hci_dev_list, list) {
if (!test_bit(HCI_UP, &d->flags) ||
hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
- d->dev_type != HCI_BREDR)
+ d->dev_type != HCI_PRIMARY)
continue;
/* Simple routing:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 45a9fc68c677..ddf8432fe8fb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -260,14 +260,12 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
hci_reset_req(req, 0);
switch (hdev->dev_type) {
- case HCI_BREDR:
+ case HCI_PRIMARY:
bredr_init(req);
break;
-
case HCI_AMP:
amp_init1(req);
break;
-
default:
BT_ERR("Unknown device type %d", hdev->dev_type);
break;
@@ -791,11 +789,11 @@ static int __hci_init(struct hci_dev *hdev)
if (err < 0)
return err;
- /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
+ /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
* BR/EDR/LE type controllers. AMP controllers only need the
* first two stages of init.
*/
- if (hdev->dev_type != HCI_BREDR)
+ if (hdev->dev_type != HCI_PRIMARY)
return 0;
err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL);
@@ -1202,7 +1200,7 @@ int hci_inquiry(void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_BREDR) {
+ if (hdev->dev_type != HCI_PRIMARY) {
err = -EOPNOTSUPP;
goto done;
}
@@ -1307,7 +1305,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
* since AMP controllers do not have an address.
*/
if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hdev->dev_type == HCI_BREDR &&
+ hdev->dev_type == HCI_PRIMARY &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY)) {
ret = -EADDRNOTAVAIL;
@@ -1402,7 +1400,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_MGMT) &&
- hdev->dev_type == HCI_BREDR) {
+ hdev->dev_type == HCI_PRIMARY) {
ret = __hci_req_hci_power_on(hdev);
mgmt_power_on(hdev, ret);
}
@@ -1563,7 +1561,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
- if (!auto_off && hdev->dev_type == HCI_BREDR &&
+ if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
hci_dev_test_flag(hdev, HCI_MGMT))
__mgmt_power_off(hdev);
@@ -1802,7 +1800,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_BREDR) {
+ if (hdev->dev_type != HCI_PRIMARY) {
err = -EOPNOTSUPP;
goto done;
}
@@ -2043,7 +2041,7 @@ static void hci_power_on(struct work_struct *work)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_BREDR &&
+ (hdev->dev_type == HCI_PRIMARY &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
@@ -3030,7 +3028,7 @@ int hci_register_dev(struct hci_dev *hdev)
* so the index can be used as the AMP controller ID.
*/
switch (hdev->dev_type) {
- case HCI_BREDR:
+ case HCI_PRIMARY:
id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
break;
case HCI_AMP:
@@ -3090,7 +3088,7 @@ int hci_register_dev(struct hci_dev *hdev)
hci_dev_set_flag(hdev, HCI_SETUP);
hci_dev_set_flag(hdev, HCI_AUTO_OFF);
- if (hdev->dev_type == HCI_BREDR) {
+ if (hdev->dev_type == HCI_PRIMARY) {
/* Assume BR/EDR support until proven otherwise (such as
* through reading supported features during init.
*/
@@ -3165,6 +3163,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
device_del(&hdev->dev);
debugfs_remove_recursive(hdev->debugfs);
+ kfree_const(hdev->hw_info);
+ kfree_const(hdev->fw_info);
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
@@ -3268,6 +3268,28 @@ int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
}
EXPORT_SYMBOL(hci_recv_diag);
+void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...)
+{
+ va_list vargs;
+
+ va_start(vargs, fmt);
+ kfree_const(hdev->hw_info);
+ hdev->hw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs);
+ va_end(vargs);
+}
+EXPORT_SYMBOL(hci_set_hw_info);
+
+void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...)
+{
+ va_list vargs;
+
+ va_start(vargs, fmt);
+ kfree_const(hdev->fw_info);
+ hdev->fw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs);
+ va_end(vargs);
+}
+EXPORT_SYMBOL(hci_set_fw_info);
+
/* ---- Interface to upper protocols ---- */
int hci_register_cb(struct hci_cb *cb)
@@ -3415,7 +3437,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
switch (hdev->dev_type) {
- case HCI_BREDR:
+ case HCI_PRIMARY:
hci_add_acl_hdr(skb, conn->handle, flags);
break;
case HCI_AMP:
@@ -3826,7 +3848,7 @@ static void hci_sched_acl(struct hci_dev *hdev)
BT_DBG("%s", hdev->name);
/* No ACL link over BR/EDR controller */
- if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_BREDR)
+ if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
return;
/* No AMP link over AMP controller */
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 7db4220941cc..63df63ebfb24 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -76,6 +76,30 @@ static const struct file_operations __name ## _fops = { \
.llseek = default_llseek, \
} \
+#define DEFINE_INFO_ATTRIBUTE(__name, __field) \
+static int __name ## _show(struct seq_file *f, void *ptr) \
+{ \
+ struct hci_dev *hdev = f->private; \
+ \
+ hci_dev_lock(hdev); \
+ seq_printf(f, "%s\n", hdev->__field ? : ""); \
+ hci_dev_unlock(hdev); \
+ \
+ return 0; \
+} \
+ \
+static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, __name ## _show, inode->i_private); \
+} \
+ \
+static const struct file_operations __name ## _fops = { \
+ .open = __name ## _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+} \
+
static int features_show(struct seq_file *f, void *ptr)
{
struct hci_dev *hdev = f->private;
@@ -349,6 +373,9 @@ static const struct file_operations sc_only_mode_fops = {
.llseek = default_llseek,
};
+DEFINE_INFO_ATTRIBUTE(hardware_info, hw_info);
+DEFINE_INFO_ATTRIBUTE(firmware_info, fw_info);
+
void hci_debugfs_create_common(struct hci_dev *hdev)
{
debugfs_create_file("features", 0444, hdev->debugfs, hdev,
@@ -382,6 +409,14 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
if (lmp_sc_capable(hdev) || lmp_le_capable(hdev))
debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
hdev, &sc_only_mode_fops);
+
+ if (hdev->hw_info)
+ debugfs_create_file("hardware_info", 0444, hdev->debugfs,
+ hdev, &hardware_info_fops);
+
+ if (hdev->fw_info)
+ debugfs_create_file("firmware_info", 0444, hdev->debugfs,
+ hdev, &firmware_info_fops);
}
static int inquiry_cache_show(struct seq_file *f, void *p)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d4b3dd5413be..e17aacbc5630 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2332,7 +2332,7 @@ static u8 hci_to_mgmt_reason(u8 err)
static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_disconn_complete *ev = (void *) skb->data;
- u8 reason = hci_to_mgmt_reason(ev->reason);
+ u8 reason;
struct hci_conn_params *params;
struct hci_conn *conn;
bool mgmt_connected;
@@ -2355,6 +2355,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->state = BT_CLOSED;
mgmt_connected = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags);
+
+ if (test_bit(HCI_CONN_AUTH_FAILURE, &conn->flags))
+ reason = MGMT_DEV_DISCONN_AUTH_FAILURE;
+ else
+ reason = hci_to_mgmt_reason(ev->reason);
+
mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type,
reason, mgmt_connected);
@@ -2421,6 +2427,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
goto unlock;
if (!ev->status) {
+ clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
if (!hci_conn_ssp_enabled(conn) &&
test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
BT_INFO("re-auth of legacy device is not possible.");
@@ -2429,6 +2437,9 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->sec_level = conn->pending_sec_level;
}
} else {
+ if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
+ set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
mgmt_auth_failed(conn, ev->status);
}
@@ -2613,6 +2624,9 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
if (ev->status && conn->state == BT_CONNECTED) {
+ if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
+ set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_drop(conn);
goto unlock;
@@ -3249,7 +3263,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
struct hci_chan *chan;
switch (hdev->dev_type) {
- case HCI_BREDR:
+ case HCI_PRIMARY:
return hci_conn_hash_lookup_handle(hdev, handle);
case HCI_AMP:
chan = hci_chan_lookup_handle(hdev, handle);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1298d723c0e0..6ef8a01a9ad4 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -676,7 +676,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
return -EOPNOTSUPP;
- if (hdev->dev_type != HCI_BREDR)
+ if (hdev->dev_type != HCI_PRIMARY)
return -EOPNOTSUPP;
switch (cmd) {
@@ -1048,6 +1048,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
+ unsigned int skblen;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -1064,6 +1065,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (!skb)
return err;
+ skblen = skb->len;
copied = skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
@@ -1089,6 +1091,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
skb_free_datagram(sk, skb);
+ if (msg->msg_flags & MSG_TRUNC)
+ copied = skblen;
+
return err ? : copied;
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 555982a78a58..ca7a35ebaefb 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -7,50 +7,6 @@
static struct class *bt_class;
-static inline char *link_typetostr(int type)
-{
- switch (type) {
- case ACL_LINK:
- return "ACL";
- case SCO_LINK:
- return "SCO";
- case ESCO_LINK:
- return "eSCO";
- case LE_LINK:
- return "LE";
- default:
- return "UNKNOWN";
- }
-}
-
-static ssize_t show_link_type(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hci_conn *conn = to_hci_conn(dev);
- return sprintf(buf, "%s\n", link_typetostr(conn->type));
-}
-
-static ssize_t show_link_address(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hci_conn *conn = to_hci_conn(dev);
- return sprintf(buf, "%pMR\n", &conn->dst);
-}
-
-#define LINK_ATTR(_name, _mode, _show, _store) \
-struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
-static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
-
-static struct attribute *bt_link_attrs[] = {
- &link_attr_type.attr,
- &link_attr_address.attr,
- NULL
-};
-
-ATTRIBUTE_GROUPS(bt_link);
-
static void bt_link_release(struct device *dev)
{
struct hci_conn *conn = to_hci_conn(dev);
@@ -59,7 +15,6 @@ static void bt_link_release(struct device *dev)
static struct device_type bt_link = {
.name = "link",
- .groups = bt_link_groups,
.release = bt_link_release,
};
@@ -124,59 +79,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
hci_dev_put(hdev);
}
-static inline char *host_typetostr(int type)
-{
- switch (type) {
- case HCI_BREDR:
- return "BR/EDR";
- case HCI_AMP:
- return "AMP";
- default:
- return "UNKNOWN";
- }
-}
-
-static ssize_t show_type(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hci_dev *hdev = to_hci_dev(dev);
- return sprintf(buf, "%s\n", host_typetostr(hdev->dev_type));
-}
-
-static ssize_t show_name(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hci_dev *hdev = to_hci_dev(dev);
- char name[HCI_MAX_NAME_LENGTH + 1];
- int i;
-
- for (i = 0; i < HCI_MAX_NAME_LENGTH; i++)
- name[i] = hdev->dev_name[i];
-
- name[HCI_MAX_NAME_LENGTH] = '\0';
- return sprintf(buf, "%s\n", name);
-}
-
-static ssize_t show_address(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hci_dev *hdev = to_hci_dev(dev);
- return sprintf(buf, "%pMR\n", &hdev->bdaddr);
-}
-
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-
-static struct attribute *bt_host_attrs[] = {
- &dev_attr_type.attr,
- &dev_attr_name.attr,
- &dev_attr_address.attr,
- NULL
-};
-
-ATTRIBUTE_GROUPS(bt_host);
-
static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
@@ -186,7 +88,6 @@ static void bt_host_release(struct device *dev)
static struct device_type bt_host = {
.name = "host",
- .groups = bt_host_groups,
.release = bt_host_release,
};
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index eb4f5f24cbe3..54ceb1f2cc9a 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7468,7 +7468,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
int len;
/* For AMP controller do not create l2cap conn */
- if (!conn && hcon->hdev->dev_type != HCI_BREDR)
+ if (!conn && hcon->hdev->dev_type != HCI_PRIMARY)
goto drop;
if (!conn)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 388ee8b59145..1842141baedb 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -927,7 +927,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (get_user(opt, (u16 __user *) optval)) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9e4b931588cf..7639290b6de3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
#include "mgmt_util.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 12
+#define MGMT_REVISION 13
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -359,7 +359,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_BREDR &&
+ if (d->dev_type == HCI_PRIMARY &&
!hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -384,7 +384,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_BREDR &&
+ if (d->dev_type == HCI_PRIMARY &&
!hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
BT_DBG("Added hci%u", d->id);
@@ -419,7 +419,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_BREDR &&
+ if (d->dev_type == HCI_PRIMARY &&
hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -444,7 +444,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_BREDR &&
+ if (d->dev_type == HCI_PRIMARY &&
hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
BT_DBG("Added hci%u", d->id);
@@ -479,7 +479,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_BREDR || d->dev_type == HCI_AMP)
+ if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP)
count++;
}
@@ -503,7 +503,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_BREDR) {
+ if (d->dev_type == HCI_PRIMARY) {
if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
rp->entry[count].type = 0x01;
else
@@ -6366,7 +6366,7 @@ void mgmt_index_added(struct hci_dev *hdev)
return;
switch (hdev->dev_type) {
- case HCI_BREDR:
+ case HCI_PRIMARY:
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
@@ -6399,7 +6399,7 @@ void mgmt_index_removed(struct hci_dev *hdev)
return;
switch (hdev->dev_type) {
- case HCI_BREDR:
+ case HCI_PRIMARY:
mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 50976a6481f3..4c1a16a96ae5 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -22,9 +22,9 @@
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
+#include <linux/crypto.h>
#include <crypto/b128ops.h>
#include <crypto/hash.h>
-#include <crypto/skcipher.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -88,7 +88,7 @@ struct smp_dev {
u8 min_key_size;
u8 max_key_size;
- struct crypto_skcipher *tfm_aes;
+ struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
};
@@ -127,7 +127,7 @@ struct smp_chan {
u8 dhkey[32];
u8 mackey[16];
- struct crypto_skcipher *tfm_aes;
+ struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
};
@@ -361,10 +361,8 @@ static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
* s1 and ah.
*/
-static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
+static int smp_e(struct crypto_cipher *tfm, const u8 *k, u8 *r)
{
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
- struct scatterlist sg;
uint8_t tmp[16], data[16];
int err;
@@ -378,7 +376,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
/* The most significant octet of key corresponds to k[0] */
swap_buf(k, tmp, 16);
- err = crypto_skcipher_setkey(tfm, tmp, 16);
+ err = crypto_cipher_setkey(tfm, tmp, 16);
if (err) {
BT_ERR("cipher setkey failed: %d", err);
return err;
@@ -387,16 +385,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
/* Most significant octet of plaintextData corresponds to data[0] */
swap_buf(r, data, 16);
- sg_init_one(&sg, data, 16);
-
- skcipher_request_set_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg, &sg, 16, NULL);
-
- err = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- if (err)
- BT_ERR("Encrypt data error %d", err);
+ crypto_cipher_encrypt_one(tfm, data, data);
/* Most significant octet of encryptedData corresponds to data[0] */
swap_buf(data, r, 16);
@@ -406,7 +395,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
return err;
}
-static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
+static int smp_c1(struct crypto_cipher *tfm_aes, const u8 k[16],
const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
{
@@ -455,7 +444,7 @@ static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
return err;
}
-static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
+static int smp_s1(struct crypto_cipher *tfm_aes, const u8 k[16],
const u8 r1[16], const u8 r2[16], u8 _r[16])
{
int err;
@@ -471,7 +460,7 @@ static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
return err;
}
-static int smp_ah(struct crypto_skcipher *tfm, const u8 irk[16],
+static int smp_ah(struct crypto_cipher *tfm, const u8 irk[16],
const u8 r[3], u8 res[3])
{
u8 _res[16];
@@ -759,7 +748,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
kzfree(smp->slave_csrk);
kzfree(smp->link_key);
- crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
/* Ensure that we don't leave any debug key around if debug key
@@ -1359,9 +1348,9 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
if (!smp)
return NULL;
- smp->tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(smp->tfm_aes)) {
- BT_ERR("Unable to create ECB crypto context");
+ BT_ERR("Unable to create AES crypto context");
kzfree(smp);
return NULL;
}
@@ -1369,7 +1358,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(smp->tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_cipher(smp->tfm_aes);
kzfree(smp);
return NULL;
}
@@ -3120,7 +3109,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
{
struct l2cap_chan *chan;
struct smp_dev *smp;
- struct crypto_skcipher *tfm_aes;
+ struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
if (cid == L2CAP_CID_SMP_BREDR) {
@@ -3132,9 +3121,9 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
if (!smp)
return ERR_PTR(-ENOMEM);
- tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_aes)) {
- BT_ERR("Unable to create ECB crypto context");
+ BT_ERR("Unable to create AES crypto context");
kzfree(smp);
return ERR_CAST(tfm_aes);
}
@@ -3142,7 +3131,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_skcipher(tfm_aes);
+ crypto_free_cipher(tfm_aes);
kzfree(smp);
return ERR_CAST(tfm_cmac);
}
@@ -3156,7 +3145,7 @@ create_chan:
chan = l2cap_chan_create();
if (!chan) {
if (smp) {
- crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
kzfree(smp);
}
@@ -3203,7 +3192,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
smp = chan->data;
if (smp) {
chan->data = NULL;
- crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
kzfree(smp);
}
@@ -3440,7 +3429,7 @@ void smp_unregister(struct hci_dev *hdev)
#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
-static int __init test_ah(struct crypto_skcipher *tfm_aes)
+static int __init test_ah(struct crypto_cipher *tfm_aes)
{
const u8 irk[16] = {
0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3460,7 +3449,7 @@ static int __init test_ah(struct crypto_skcipher *tfm_aes)
return 0;
}
-static int __init test_c1(struct crypto_skcipher *tfm_aes)
+static int __init test_c1(struct crypto_cipher *tfm_aes)
{
const u8 k[16] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3490,7 +3479,7 @@ static int __init test_c1(struct crypto_skcipher *tfm_aes)
return 0;
}
-static int __init test_s1(struct crypto_skcipher *tfm_aes)
+static int __init test_s1(struct crypto_cipher *tfm_aes)
{
const u8 k[16] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3686,7 +3675,7 @@ static const struct file_operations test_smp_fops = {
.llseek = default_llseek,
};
-static int __init run_selftests(struct crypto_skcipher *tfm_aes,
+static int __init run_selftests(struct crypto_cipher *tfm_aes,
struct crypto_shash *tfm_cmac)
{
ktime_t calltime, delta, rettime;
@@ -3764,27 +3753,27 @@ done:
int __init bt_selftest_smp(void)
{
- struct crypto_skcipher *tfm_aes;
+ struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
int err;
- tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_aes)) {
- BT_ERR("Unable to create ECB crypto context");
+ BT_ERR("Unable to create AES crypto context");
return PTR_ERR(tfm_aes);
}
tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_skcipher(tfm_aes);
+ crypto_free_cipher(tfm_aes);
return PTR_ERR(tfm_cmac);
}
err = run_selftests(tfm_aes, tfm_cmac);
crypto_free_shash(tfm_cmac);
- crypto_free_skcipher(tfm_aes);
+ crypto_free_cipher(tfm_aes);
return err;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0c39e0f6da09..09f26940aba5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -61,11 +61,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
- if (is_broadcast_ether_addr(dest))
- br_flood_deliver(br, skb, false);
- else if (is_multicast_ether_addr(dest)) {
+ if (is_broadcast_ether_addr(dest)) {
+ br_flood(br, skb, false, false, true);
+ } else if (is_multicast_ether_addr(dest)) {
if (unlikely(netpoll_tx_running(dev))) {
- br_flood_deliver(br, skb, false);
+ br_flood(br, skb, false, false, true);
goto out;
}
if (br_multicast_rcv(br, NULL, skb, vid)) {
@@ -76,14 +76,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb)))
- br_multicast_deliver(mdst, skb);
+ br_multicast_flood(mdst, skb, false, true);
else
- br_flood_deliver(br, skb, false);
- } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
- br_deliver(dst->dst, skb);
- else
- br_flood_deliver(br, skb, true);
-
+ br_flood(br, skb, false, false, true);
+ } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) {
+ br_forward(dst->dst, skb, false, true);
+ } else {
+ br_flood(br, skb, true, false, true);
+ }
out:
rcu_read_unlock();
return NETDEV_TX_OK;
@@ -349,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_add_slave = br_add_slave,
.ndo_del_slave = br_del_slave,
.ndo_fix_features = br_fix_features,
+ .ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
+ .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6c196037d818..63a83d8d7da3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -21,11 +21,6 @@
#include <linux/netfilter_bridge.h>
#include "br_private.h"
-static int deliver_clone(const struct net_bridge_port *prev,
- struct sk_buff *skb,
- void (*__packet_hook)(const struct net_bridge_port *p,
- struct sk_buff *skb));
-
/* Don't forward packets to originating port or forwarding disabled */
static inline int should_deliver(const struct net_bridge_port *p,
const struct sk_buff *skb)
@@ -75,105 +70,92 @@ int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(br_forward_finish);
-static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
+static void __br_forward(const struct net_bridge_port *to,
+ struct sk_buff *skb, bool local_orig)
{
struct net_bridge_vlan_group *vg;
+ struct net_device *indev;
+ struct net *net;
+ int br_hook;
vg = nbp_vlan_group_rcu(to);
skb = br_handle_vlan(to->br, vg, skb);
if (!skb)
return;
+ indev = skb->dev;
skb->dev = to->dev;
-
- if (unlikely(netpoll_tx_running(to->br->dev))) {
- if (!is_skb_forwardable(skb->dev, skb))
+ if (!local_orig) {
+ if (skb_warn_if_lro(skb)) {
kfree_skb(skb);
- else {
- skb_push(skb, ETH_HLEN);
- br_netpoll_send_skb(to, skb);
+ return;
}
- return;
+ br_hook = NF_BR_FORWARD;
+ skb_forward_csum(skb);
+ net = dev_net(indev);
+ } else {
+ if (unlikely(netpoll_tx_running(to->br->dev))) {
+ if (!is_skb_forwardable(skb->dev, skb)) {
+ kfree_skb(skb);
+ } else {
+ skb_push(skb, ETH_HLEN);
+ br_netpoll_send_skb(to, skb);
+ }
+ return;
+ }
+ br_hook = NF_BR_LOCAL_OUT;
+ net = dev_net(skb->dev);
+ indev = NULL;
}
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
- dev_net(skb->dev), NULL, skb,NULL, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, br_hook,
+ net, NULL, skb, indev, skb->dev,
br_forward_finish);
}
-static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
+static int deliver_clone(const struct net_bridge_port *prev,
+ struct sk_buff *skb, bool local_orig)
{
- struct net_bridge_vlan_group *vg;
- struct net_device *indev;
-
- if (skb_warn_if_lro(skb)) {
- kfree_skb(skb);
- return;
- }
-
- vg = nbp_vlan_group_rcu(to);
- skb = br_handle_vlan(to->br, vg, skb);
- if (!skb)
- return;
-
- indev = skb->dev;
- skb->dev = to->dev;
- skb_forward_csum(skb);
-
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
- dev_net(indev), NULL, skb, indev, skb->dev,
- br_forward_finish);
-}
+ struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-/* called with rcu_read_lock */
-void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
-{
- if (to && should_deliver(to, skb)) {
- __br_deliver(to, skb);
- return;
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb) {
+ dev->stats.tx_dropped++;
+ return -ENOMEM;
}
- kfree_skb(skb);
+ __br_forward(prev, skb, local_orig);
+ return 0;
}
-EXPORT_SYMBOL_GPL(br_deliver);
-/* called with rcu_read_lock */
-void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
+/**
+ * br_forward - forward a packet to a specific port
+ * @to: destination port
+ * @skb: packet being forwarded
+ * @local_rcv: packet will be received locally after forwarding
+ * @local_orig: packet is locally originated
+ *
+ * Should be called with rcu_read_lock.
+ */
+void br_forward(const struct net_bridge_port *to,
+ struct sk_buff *skb, bool local_rcv, bool local_orig)
{
if (to && should_deliver(to, skb)) {
- if (skb0)
- deliver_clone(to, skb, __br_forward);
+ if (local_rcv)
+ deliver_clone(to, skb, local_orig);
else
- __br_forward(to, skb);
+ __br_forward(to, skb, local_orig);
return;
}
- if (!skb0)
+ if (!local_rcv)
kfree_skb(skb);
}
-
-static int deliver_clone(const struct net_bridge_port *prev,
- struct sk_buff *skb,
- void (*__packet_hook)(const struct net_bridge_port *p,
- struct sk_buff *skb))
-{
- struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb) {
- dev->stats.tx_dropped++;
- return -ENOMEM;
- }
-
- __packet_hook(prev, skb);
- return 0;
-}
+EXPORT_SYMBOL_GPL(br_forward);
static struct net_bridge_port *maybe_deliver(
struct net_bridge_port *prev, struct net_bridge_port *p,
- struct sk_buff *skb,
- void (*__packet_hook)(const struct net_bridge_port *p,
- struct sk_buff *skb))
+ struct sk_buff *skb, bool local_orig)
{
int err;
@@ -183,7 +165,7 @@ static struct net_bridge_port *maybe_deliver(
if (!prev)
goto out;
- err = deliver_clone(prev, skb, __packet_hook);
+ err = deliver_clone(prev, skb, local_orig);
if (err)
return ERR_PTR(err);
@@ -191,20 +173,14 @@ out:
return p;
}
-/* called under bridge lock */
-static void br_flood(struct net_bridge *br, struct sk_buff *skb,
- struct sk_buff *skb0,
- void (*__packet_hook)(const struct net_bridge_port *p,
- struct sk_buff *skb),
- bool unicast)
+/* called under rcu_read_lock */
+void br_flood(struct net_bridge *br, struct sk_buff *skb,
+ bool unicast, bool local_rcv, bool local_orig)
{
u8 igmp_type = br_multicast_igmp_type(skb);
- __be16 proto = skb->protocol;
- struct net_bridge_port *prev;
+ struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
- prev = NULL;
-
list_for_each_entry_rcu(p, &br->port_list, list) {
/* Do not flood unicast traffic to ports that turn it off */
if (unicast && !(p->flags & BR_FLOOD))
@@ -217,57 +193,39 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
- prev = maybe_deliver(prev, p, skb, __packet_hook);
+ prev = maybe_deliver(prev, p, skb, local_orig);
if (IS_ERR(prev))
goto out;
if (prev == p)
- br_multicast_count(p->br, p, proto, igmp_type,
+ br_multicast_count(p->br, p, skb, igmp_type,
BR_MCAST_DIR_TX);
}
if (!prev)
goto out;
- if (skb0)
- deliver_clone(prev, skb, __packet_hook);
+ if (local_rcv)
+ deliver_clone(prev, skb, local_orig);
else
- __packet_hook(prev, skb);
+ __br_forward(prev, skb, local_orig);
return;
out:
- if (!skb0)
+ if (!local_rcv)
kfree_skb(skb);
}
-
-/* called with rcu_read_lock */
-void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast)
-{
- br_flood(br, skb, NULL, __br_deliver, unicast);
-}
-
-/* called under bridge lock */
-void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
- struct sk_buff *skb2, bool unicast)
-{
- br_flood(br, skb, skb2, __br_forward, unicast);
-}
-
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
/* called with rcu_read_lock */
-static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, struct sk_buff *skb0,
- void (*__packet_hook)(
- const struct net_bridge_port *p,
- struct sk_buff *skb))
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+ struct sk_buff *skb,
+ bool local_rcv, bool local_orig)
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
u8 igmp_type = br_multicast_igmp_type(skb);
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
- __be16 proto = skb->protocol;
-
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
@@ -282,11 +240,11 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
port = (unsigned long)lport > (unsigned long)rport ?
lport : rport;
- prev = maybe_deliver(prev, port, skb, __packet_hook);
+ prev = maybe_deliver(prev, port, skb, local_orig);
if (IS_ERR(prev))
goto out;
if (prev == port)
- br_multicast_count(port->br, port, proto, igmp_type,
+ br_multicast_count(port->br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
if ((unsigned long)lport >= (unsigned long)port)
@@ -298,28 +256,14 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
if (!prev)
goto out;
- if (skb0)
- deliver_clone(prev, skb, __packet_hook);
+ if (local_rcv)
+ deliver_clone(prev, skb, local_orig);
else
- __packet_hook(prev, skb);
+ __br_forward(prev, skb, local_orig);
return;
out:
- if (!skb0)
+ if (!local_rcv)
kfree_skb(skb);
}
-
-/* called with rcu_read_lock */
-void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb)
-{
- br_multicast_flood(mdst, skb, NULL, __br_deliver);
-}
-
-/* called with rcu_read_lock */
-void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, struct sk_buff *skb2)
-{
- br_multicast_flood(mdst, skb, skb2, __br_forward);
-}
#endif
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 786602bc0567..8b08eec763a5 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -61,7 +61,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
if (!skb)
return NET_RX_DROP;
/* update the multicast stats if the packet is IGMP/MLD */
- br_multicast_count(br, NULL, skb->protocol, br_multicast_igmp_type(skb),
+ br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
@@ -131,13 +131,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- const unsigned char *dest = eth_hdr(skb)->h_dest;
+ bool local_rcv = false, mcast_hit = false, unicast = true;
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
- struct net_bridge *br;
- struct net_bridge_fdb_entry *dst;
+ const unsigned char *dest = eth_hdr(skb)->h_dest;
+ struct net_bridge_fdb_entry *dst = NULL;
struct net_bridge_mdb_entry *mdst;
- struct sk_buff *skb2;
- bool unicast = true;
+ struct net_bridge *br;
u16 vid = 0;
if (!p || p->state == BR_STATE_DISABLED)
@@ -160,53 +159,46 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
- /* The packet skb2 goes to the local host (NULL to skip). */
- skb2 = NULL;
-
- if (br->dev->flags & IFF_PROMISC)
- skb2 = skb;
-
- dst = NULL;
+ local_rcv = !!(br->dev->flags & IFF_PROMISC);
if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
br_do_proxy_arp(skb, br, vid, p);
if (is_broadcast_ether_addr(dest)) {
- skb2 = skb;
+ local_rcv = true;
unicast = false;
} else if (is_multicast_ether_addr(dest)) {
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb))) {
if ((mdst && mdst->mglist) ||
- br_multicast_is_router(br))
- skb2 = skb;
- br_multicast_forward(mdst, skb, skb2);
- skb = NULL;
- if (!skb2)
- goto out;
- } else
- skb2 = skb;
-
+ br_multicast_is_router(br)) {
+ local_rcv = true;
+ br->dev->stats.multicast++;
+ }
+ mcast_hit = true;
+ } else {
+ local_rcv = true;
+ br->dev->stats.multicast++;
+ }
unicast = false;
- br->dev->stats.multicast++;
- } else if ((dst = __br_fdb_get(br, dest, vid)) &&
- dst->is_local) {
- skb2 = skb;
+ } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {
/* Do not forward the packet since it's local. */
- skb = NULL;
+ return br_pass_frame_up(skb);
}
- if (skb) {
- if (dst) {
- dst->used = jiffies;
- br_forward(dst->dst, skb, skb2);
- } else
- br_flood_forward(br, skb, skb2, unicast);
+ if (dst) {
+ dst->used = jiffies;
+ br_forward(dst->dst, skb, local_rcv, false);
+ } else {
+ if (!mcast_hit)
+ br_flood(br, skb, unicast, local_rcv, false);
+ else
+ br_multicast_flood(mdst, skb, local_rcv, false);
}
- if (skb2)
- return br_pass_frame_up(skb2);
+ if (local_rcv)
+ return br_pass_frame_up(skb);
out:
return 0;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e405eef0ae2e..a5423a1eec05 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -843,14 +843,14 @@ static void __br_multicast_send_query(struct net_bridge *br,
if (port) {
skb->dev = port->dev;
- br_multicast_count(br, port, skb->protocol, igmp_type,
+ br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
- br_multicast_count(br, port, skb->protocol, igmp_type,
+ br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
@@ -1676,7 +1676,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
- br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
@@ -1725,7 +1725,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
- br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
@@ -2251,13 +2251,16 @@ unlock:
EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
+ __be16 proto = skb->protocol;
+ unsigned int t_len;
u64_stats_update_begin(&pstats->syncp);
switch (proto) {
case htons(ETH_P_IP):
+ t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
switch (type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
pstats->mstats.igmp_v1reports[dir]++;
@@ -2269,7 +2272,21 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
pstats->mstats.igmp_v3reports[dir]++;
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- pstats->mstats.igmp_queries[dir]++;
+ if (t_len != sizeof(struct igmphdr)) {
+ pstats->mstats.igmp_v3queries[dir]++;
+ } else {
+ unsigned int offset = skb_transport_offset(skb);
+ struct igmphdr *ih, _ihdr;
+
+ ih = skb_header_pointer(skb, offset,
+ sizeof(_ihdr), &_ihdr);
+ if (!ih)
+ break;
+ if (!ih->code)
+ pstats->mstats.igmp_v1queries[dir]++;
+ else
+ pstats->mstats.igmp_v2queries[dir]++;
+ }
break;
case IGMP_HOST_LEAVE_MESSAGE:
pstats->mstats.igmp_leaves[dir]++;
@@ -2278,6 +2295,9 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
+ t_len = ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr);
+ t_len -= skb_network_header_len(skb);
switch (type) {
case ICMPV6_MGM_REPORT:
pstats->mstats.mld_v1reports[dir]++;
@@ -2286,7 +2306,10 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
pstats->mstats.mld_v2reports[dir]++;
break;
case ICMPV6_MGM_QUERY:
- pstats->mstats.mld_queries[dir]++;
+ if (t_len != sizeof(struct mld_msg))
+ pstats->mstats.mld_v2queries[dir]++;
+ else
+ pstats->mstats.mld_v1queries[dir]++;
break;
case ICMPV6_MGM_REDUCTION:
pstats->mstats.mld_leaves[dir]++;
@@ -2299,7 +2322,7 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
}
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
@@ -2314,7 +2337,7 @@ void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
if (WARN_ON(!stats))
return;
- br_mcast_stats_add(stats, proto, type, dir);
+ br_mcast_stats_add(stats, skb, type, dir);
}
int br_multicast_init_stats(struct net_bridge *br)
@@ -2359,14 +2382,17 @@ void br_multicast_get_stats(const struct net_bridge *br,
memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- mcast_stats_add_dir(tdst.igmp_queries, temp.igmp_queries);
+ mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries);
+ mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries);
+ mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries);
mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
tdst.igmp_parse_errors += temp.igmp_parse_errors;
- mcast_stats_add_dir(tdst.mld_queries, temp.mld_queries);
+ mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries);
+ mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries);
mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2d25979273a6..77e7f69bf80d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -700,7 +700,7 @@ static int
br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
- unsigned int mtu = ip_skb_dst_mtu(skb);
+ unsigned int mtu = ip_skb_dst_mtu(sk, skb);
struct iphdr *iph = ip_hdr(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4dc851166ad1..b3088264f844 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -505,14 +505,12 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
/* br_forward.c */
-void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
-void br_forward(const struct net_bridge_port *to,
- struct sk_buff *skb, struct sk_buff *skb0);
+void br_forward(const struct net_bridge_port *to, struct sk_buff *skb,
+ bool local_rcv, bool local_orig);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
-void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
-void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
- struct sk_buff *skb2, bool unicast);
+void br_flood(struct net_bridge *br, struct sk_buff *skb,
+ bool unicast, bool local_rcv, bool local_orig);
/* br_if.c */
void br_port_carrier_check(struct net_bridge_port *p);
@@ -560,10 +558,8 @@ void br_multicast_init(struct net_bridge *br);
void br_multicast_open(struct net_bridge *br);
void br_multicast_stop(struct net_bridge *br);
void br_multicast_dev_del(struct net_bridge *br);
-void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb);
-void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, struct sk_buff *skb2);
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+ struct sk_buff *skb, bool local_rcv, bool local_orig);
int br_multicast_set_router(struct net_bridge *br, unsigned long val);
int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
int br_multicast_toggle(struct net_bridge *br, unsigned long val);
@@ -586,7 +582,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir);
+ const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
@@ -691,35 +687,35 @@ static inline void br_multicast_dev_del(struct net_bridge *br)
{
}
-static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb)
+static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
+ struct sk_buff *skb,
+ bool local_rcv, bool local_orig)
{
}
-static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb,
- struct sk_buff *skb2)
-{
-}
static inline bool br_multicast_is_router(struct net_bridge *br)
{
return 0;
}
+
static inline bool br_multicast_querier_exists(struct net_bridge *br,
struct ethhdr *eth)
{
return false;
}
+
static inline void br_mdb_init(void)
{
}
+
static inline void br_mdb_uninit(void)
{
}
static inline void br_multicast_count(struct net_bridge *br,
const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb,
+ u8 type, u8 dir)
{
}
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 2a449b7ab8fa..5fc4affd9fdb 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
__be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
if (info->bitmask & EBT_802_3_SAP) {
- if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP))
+ if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap))
return false;
- if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP))
+ if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap))
return false;
}
if (info->bitmask & EBT_802_3_TYPE) {
if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE))
return false;
- if (FWINV(info->type != type, EBT_802_3_TYPE))
+ if (NF_INVF(info, EBT_802_3_TYPE, info->type != type))
return false;
}
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index cd457b891b27..227142282b45 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL)
return false;
- if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
- ah->ar_op, EBT_ARP_OPCODE))
+ if ((info->bitmask & EBT_ARP_OPCODE) &&
+ NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op))
return false;
- if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
- ah->ar_hrd, EBT_ARP_HTYPE))
+ if ((info->bitmask & EBT_ARP_HTYPE) &&
+ NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd))
return false;
- if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
- ah->ar_pro, EBT_ARP_PTYPE))
+ if ((info->bitmask & EBT_ARP_PTYPE) &&
+ NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro))
return false;
if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) {
@@ -51,21 +51,22 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(daddr), &daddr);
if (dap == NULL)
return false;
- if (info->bitmask & EBT_ARP_SRC_IP &&
- FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+ if ((info->bitmask & EBT_ARP_SRC_IP) &&
+ NF_INVF(info, EBT_ARP_SRC_IP,
+ info->saddr != (*sap & info->smsk)))
return false;
- if (info->bitmask & EBT_ARP_DST_IP &&
- FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+ if ((info->bitmask & EBT_ARP_DST_IP) &&
+ NF_INVF(info, EBT_ARP_DST_IP,
+ info->daddr != (*dap & info->dmsk)))
return false;
- if (info->bitmask & EBT_ARP_GRAT &&
- FWINV(*dap != *sap, EBT_ARP_GRAT))
+ if ((info->bitmask & EBT_ARP_GRAT) &&
+ NF_INVF(info, EBT_ARP_GRAT, *dap != *sap))
return false;
}
if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
const unsigned char *mp;
unsigned char _mac[ETH_ALEN];
- uint8_t verdict, i;
if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
return false;
@@ -74,11 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_mac), &_mac);
if (mp == NULL)
return false;
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (mp[i] ^ info->smaddr[i]) &
- info->smmsk[i];
- if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
+ if (NF_INVF(info, EBT_ARP_SRC_MAC,
+ !ether_addr_equal_masked(mp, info->smaddr,
+ info->smmsk)))
return false;
}
@@ -88,11 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_mac), &_mac);
if (mp == NULL)
return false;
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (mp[i] ^ info->dmaddr[i]) &
- info->dmmsk[i];
- if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
+ if (NF_INVF(info, EBT_ARP_DST_MAC,
+ !ether_addr_equal_masked(mp, info->dmaddr,
+ info->dmmsk)))
return false;
}
}
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 23bca62d58d2..d06968bdf5ec 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL)
return false;
- if (info->bitmask & EBT_IP_TOS &&
- FWINV(info->tos != ih->tos, EBT_IP_TOS))
+ if ((info->bitmask & EBT_IP_TOS) &&
+ NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos))
return false;
- if (info->bitmask & EBT_IP_SOURCE &&
- FWINV((ih->saddr & info->smsk) !=
- info->saddr, EBT_IP_SOURCE))
+ if ((info->bitmask & EBT_IP_SOURCE) &&
+ NF_INVF(info, EBT_IP_SOURCE,
+ (ih->saddr & info->smsk) != info->saddr))
return false;
if ((info->bitmask & EBT_IP_DEST) &&
- FWINV((ih->daddr & info->dmsk) !=
- info->daddr, EBT_IP_DEST))
+ NF_INVF(info, EBT_IP_DEST,
+ (ih->daddr & info->dmsk) != info->daddr))
return false;
if (info->bitmask & EBT_IP_PROTO) {
- if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
+ if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
return false;
if (!(info->bitmask & EBT_IP_DPORT) &&
!(info->bitmask & EBT_IP_SPORT))
@@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (info->bitmask & EBT_IP_DPORT) {
u32 dst = ntohs(pptr->dst);
- if (FWINV(dst < info->dport[0] ||
- dst > info->dport[1],
- EBT_IP_DPORT))
+ if (NF_INVF(info, EBT_IP_DPORT,
+ dst < info->dport[0] ||
+ dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP_SPORT) {
u32 src = ntohs(pptr->src);
- if (FWINV(src < info->sport[0] ||
- src > info->sport[1],
- EBT_IP_SPORT))
+ if (NF_INVF(info, EBT_IP_SPORT,
+ src < info->sport[0] ||
+ src > info->sport[1]))
return false;
}
}
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 98de6e7fd86d..4617491be41e 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
if (ih6 == NULL)
return false;
- if (info->bitmask & EBT_IP6_TCLASS &&
- FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
+ if ((info->bitmask & EBT_IP6_TCLASS) &&
+ NF_INVF(info, EBT_IP6_TCLASS,
+ info->tclass != ipv6_get_dsfield(ih6)))
return false;
- if ((info->bitmask & EBT_IP6_SOURCE &&
- FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
- &info->saddr), EBT_IP6_SOURCE)) ||
- (info->bitmask & EBT_IP6_DEST &&
- FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
- &info->daddr), EBT_IP6_DEST)))
+ if (((info->bitmask & EBT_IP6_SOURCE) &&
+ NF_INVF(info, EBT_IP6_SOURCE,
+ ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
+ &info->saddr))) ||
+ ((info->bitmask & EBT_IP6_DEST) &&
+ NF_INVF(info, EBT_IP6_DEST,
+ ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
+ &info->daddr))))
return false;
if (info->bitmask & EBT_IP6_PROTO) {
uint8_t nexthdr = ih6->nexthdr;
@@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off);
if (offset_ph == -1)
return false;
- if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
+ if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr))
return false;
if (!(info->bitmask & (EBT_IP6_DPORT |
EBT_IP6_SPORT | EBT_IP6_ICMP6)))
@@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (info->bitmask & EBT_IP6_DPORT) {
u16 dst = ntohs(pptr->tcpudphdr.dst);
- if (FWINV(dst < info->dport[0] ||
- dst > info->dport[1], EBT_IP6_DPORT))
+ if (NF_INVF(info, EBT_IP6_DPORT,
+ dst < info->dport[0] ||
+ dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP6_SPORT) {
u16 src = ntohs(pptr->tcpudphdr.src);
- if (FWINV(src < info->sport[0] ||
- src > info->sport[1], EBT_IP6_SPORT))
+ if (NF_INVF(info, EBT_IP6_SPORT,
+ src < info->sport[0] ||
+ src > info->sport[1]))
return false;
}
if ((info->bitmask & EBT_IP6_ICMP6) &&
- FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
- pptr->icmphdr.type > info->icmpv6_type[1] ||
- pptr->icmphdr.code < info->icmpv6_code[0] ||
- pptr->icmphdr.code > info->icmpv6_code[1],
- EBT_IP6_ICMP6))
+ NF_INVF(info, EBT_IP6_ICMP6,
+ pptr->icmphdr.type < info->icmpv6_type[0] ||
+ pptr->icmphdr.type > info->icmpv6_type[1] ||
+ pptr->icmphdr.code < info->icmpv6_code[0] ||
+ pptr->icmphdr.code > info->icmpv6_code[1]))
return false;
}
return true;
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 6b731e12ecfa..3140eb912d7e 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -17,24 +17,24 @@
#define BPDU_TYPE_TCN 0x80
struct stp_header {
- uint8_t dsap;
- uint8_t ssap;
- uint8_t ctrl;
- uint8_t pid;
- uint8_t vers;
- uint8_t type;
+ u8 dsap;
+ u8 ssap;
+ u8 ctrl;
+ u8 pid;
+ u8 vers;
+ u8 type;
};
struct stp_config_pdu {
- uint8_t flags;
- uint8_t root[8];
- uint8_t root_cost[4];
- uint8_t sender[8];
- uint8_t port[2];
- uint8_t msg_age[2];
- uint8_t max_age[2];
- uint8_t hello_time[2];
- uint8_t forward_delay[2];
+ u8 flags;
+ u8 root[8];
+ u8 root_cost[4];
+ u8 sender[8];
+ u8 port[2];
+ u8 msg_age[2];
+ u8 max_age[2];
+ u8 hello_time[2];
+ u8 forward_delay[2];
};
#define NR16(p) (p[0] << 8 | p[1])
@@ -44,76 +44,73 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
const struct stp_config_pdu *stpc)
{
const struct ebt_stp_config_info *c;
- uint16_t v16;
- uint32_t v32;
- int verdict, i;
+ u16 v16;
+ u32 v32;
c = &info->config;
if ((info->bitmask & EBT_STP_FLAGS) &&
- FWINV(c->flags != stpc->flags, EBT_STP_FLAGS))
+ NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags))
return false;
if (info->bitmask & EBT_STP_ROOTPRIO) {
v16 = NR16(stpc->root);
- if (FWINV(v16 < c->root_priol ||
- v16 > c->root_priou, EBT_STP_ROOTPRIO))
+ if (NF_INVF(info, EBT_STP_ROOTPRIO,
+ v16 < c->root_priol || v16 > c->root_priou))
return false;
}
if (info->bitmask & EBT_STP_ROOTADDR) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (stpc->root[2+i] ^ c->root_addr[i]) &
- c->root_addrmsk[i];
- if (FWINV(verdict != 0, EBT_STP_ROOTADDR))
+ if (NF_INVF(info, EBT_STP_ROOTADDR,
+ !ether_addr_equal_masked(&stpc->root[2],
+ c->root_addr,
+ c->root_addrmsk)))
return false;
}
if (info->bitmask & EBT_STP_ROOTCOST) {
v32 = NR32(stpc->root_cost);
- if (FWINV(v32 < c->root_costl ||
- v32 > c->root_costu, EBT_STP_ROOTCOST))
+ if (NF_INVF(info, EBT_STP_ROOTCOST,
+ v32 < c->root_costl || v32 > c->root_costu))
return false;
}
if (info->bitmask & EBT_STP_SENDERPRIO) {
v16 = NR16(stpc->sender);
- if (FWINV(v16 < c->sender_priol ||
- v16 > c->sender_priou, EBT_STP_SENDERPRIO))
+ if (NF_INVF(info, EBT_STP_SENDERPRIO,
+ v16 < c->sender_priol || v16 > c->sender_priou))
return false;
}
if (info->bitmask & EBT_STP_SENDERADDR) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) &
- c->sender_addrmsk[i];
- if (FWINV(verdict != 0, EBT_STP_SENDERADDR))
+ if (NF_INVF(info, EBT_STP_SENDERADDR,
+ !ether_addr_equal_masked(&stpc->sender[2],
+ c->sender_addr,
+ c->sender_addrmsk)))
return false;
}
if (info->bitmask & EBT_STP_PORT) {
v16 = NR16(stpc->port);
- if (FWINV(v16 < c->portl ||
- v16 > c->portu, EBT_STP_PORT))
+ if (NF_INVF(info, EBT_STP_PORT,
+ v16 < c->portl || v16 > c->portu))
return false;
}
if (info->bitmask & EBT_STP_MSGAGE) {
v16 = NR16(stpc->msg_age);
- if (FWINV(v16 < c->msg_agel ||
- v16 > c->msg_ageu, EBT_STP_MSGAGE))
+ if (NF_INVF(info, EBT_STP_MSGAGE,
+ v16 < c->msg_agel || v16 > c->msg_ageu))
return false;
}
if (info->bitmask & EBT_STP_MAXAGE) {
v16 = NR16(stpc->max_age);
- if (FWINV(v16 < c->max_agel ||
- v16 > c->max_ageu, EBT_STP_MAXAGE))
+ if (NF_INVF(info, EBT_STP_MAXAGE,
+ v16 < c->max_agel || v16 > c->max_ageu))
return false;
}
if (info->bitmask & EBT_STP_HELLOTIME) {
v16 = NR16(stpc->hello_time);
- if (FWINV(v16 < c->hello_timel ||
- v16 > c->hello_timeu, EBT_STP_HELLOTIME))
+ if (NF_INVF(info, EBT_STP_HELLOTIME,
+ v16 < c->hello_timel || v16 > c->hello_timeu))
return false;
}
if (info->bitmask & EBT_STP_FWDD) {
v16 = NR16(stpc->forward_delay);
- if (FWINV(v16 < c->forward_delayl ||
- v16 > c->forward_delayu, EBT_STP_FWDD))
+ if (NF_INVF(info, EBT_STP_FWDD,
+ v16 < c->forward_delayl || v16 > c->forward_delayu))
return false;
}
return true;
@@ -125,7 +122,7 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct ebt_stp_info *info = par->matchinfo;
const struct stp_header *sp;
struct stp_header _stph;
- const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
+ const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
if (sp == NULL)
@@ -135,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (memcmp(sp, header, sizeof(header)))
return false;
- if (info->bitmask & EBT_STP_TYPE &&
- FWINV(info->type != sp->type, EBT_STP_TYPE))
+ if ((info->bitmask & EBT_STP_TYPE) &&
+ NF_INVF(info, EBT_STP_TYPE, info->type != sp->type))
return false;
if (sp->type == BPDU_TYPE_CONFIG &&
@@ -156,8 +153,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
{
const struct ebt_stp_info *info = par->matchinfo;
- const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
- const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+ const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const struct ebt_entry *e = par->entryinfo;
if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5a61f35412a0..cceac5bb658f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device)
return devname[i] != entry[i] && entry[i] != 1;
}
-#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg))
/* process standard matches */
static inline int
ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
@@ -130,7 +129,6 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
const struct ethhdr *h = eth_hdr(skb);
const struct net_bridge_port *p;
__be16 ethproto;
- int verdict, i;
if (skb_vlan_tag_present(skb))
ethproto = htons(ETH_P_8021Q);
@@ -138,38 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
ethproto = h->h_proto;
if (e->bitmask & EBT_802_3) {
- if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
+ if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto)))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
- FWINV2(e->ethproto != ethproto, EBT_IPROTO))
+ NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto))
return 1;
- if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
+ if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in)))
return 1;
- if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
+ if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
return 1;
/* rcu_read_lock()ed by nf_hook_slow */
if (in && (p = br_port_get_rcu(in)) != NULL &&
- FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
+ NF_INVF(e, EBT_ILOGICALIN,
+ ebt_dev_check(e->logical_in, p->br->dev)))
return 1;
if (out && (p = br_port_get_rcu(out)) != NULL &&
- FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
+ NF_INVF(e, EBT_ILOGICALOUT,
+ ebt_dev_check(e->logical_out, p->br->dev)))
return 1;
if (e->bitmask & EBT_SOURCEMAC) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
- e->sourcemsk[i];
- if (FWINV2(verdict != 0, EBT_ISOURCE))
+ if (NF_INVF(e, EBT_ISOURCE,
+ !ether_addr_equal_masked(h->h_source, e->sourcemac,
+ e->sourcemsk)))
return 1;
}
if (e->bitmask & EBT_DESTMAC) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (h->h_dest[i] ^ e->destmac[i]) &
- e->destmsk[i];
- if (FWINV2(verdict != 0, EBT_IDEST))
+ if (NF_INVF(e, EBT_IDEST,
+ !ether_addr_equal_masked(h->h_dest, e->destmac,
+ e->destmsk)))
return 1;
}
return 0;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 77f7e7a9ebe1..0b77ffbc27d6 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -72,7 +72,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(dev), nskb);
+ br_forward(br_port_get_rcu(dev), nskb, false, true);
}
static void nft_reject_br_send_v4_unreach(struct net *net,
@@ -140,7 +140,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(dev), nskb);
+ br_forward(br_port_get_rcu(dev), nskb, false, true);
}
static void nft_reject_br_send_v6_tcp_reset(struct net *net,
@@ -174,7 +174,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(dev), nskb);
+ br_forward(br_port_get_rcu(dev), nskb, false, true);
}
static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
@@ -255,7 +255,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(dev), nskb);
+ br_forward(br_port_get_rcu(dev), nskb, false, true);
}
static void nft_reject_bridge_eval(const struct nft_expr *expr,
diff --git a/net/core/dev.c b/net/core/dev.c
index aba10d2a8bc3..2a9c39f8824e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -94,6 +94,7 @@
#include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <linux/bpf.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
@@ -4972,7 +4973,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
if (rc == BUSY_POLL_BUDGET) {
napi_complete_done(napi, rc);
napi_schedule(napi);
@@ -5128,7 +5129,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
- trace_napi_poll(n);
+ trace_napi_poll(n, work, weight);
}
WARN_ON_ONCE(work > weight);
@@ -5445,6 +5446,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
EXPORT_SYMBOL(netdev_lower_get_next);
/**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry(*iter, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->all_adj_list.lower)
+ return NULL;
+
+ *iter = lower->list.next;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ * lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+ struct netdev_adjacent, list);
+
+ return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
+/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
@@ -6041,6 +6088,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev, *stop_dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_construct)
+ continue;
+ err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+ if (err) {
+ stop_dev = lower_dev;
+ goto rollback;
+ }
+ }
+ return 0;
+
+rollback:
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (lower_dev == stop_dev)
+ break;
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -6525,6 +6616,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
EXPORT_SYMBOL(dev_change_proto_down);
/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @fd: new program fd or negative value to clear
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, int fd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct bpf_prog *prog = NULL;
+ struct netdev_xdp xdp = {};
+ int err;
+
+ if (!ops->ndo_xdp)
+ return -EOPNOTSUPP;
+ if (fd >= 0) {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ xdp.command = XDP_SETUP_PROG;
+ xdp.prog = prog;
+ err = ops->ndo_xdp(dev, &xdp);
+ if (err < 0 && prog)
+ bpf_prog_put(prog);
+
+ return err;
+}
+EXPORT_SYMBOL(dev_change_xdp_fd);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b2e592a198c0..1b5063088f1a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -26,6 +26,10 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
static LIST_HEAD(devlink_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 252e155c837b..d6b3b579560d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -187,7 +187,8 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio
trace_drop_common(skb, location);
}
-static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
+ int work, int budget)
{
struct dm_hw_stat_delta *new_stat;
diff --git a/net/core/filter.c b/net/core/filter.c
index 54071cf70fb5..6c627bc4be6e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1729,6 +1729,23 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ /* If skb_clear_hash() was called due to mangling, we can
+ * trigger SW recalculation here. Later access to hash
+ * can then use the inline skb->hash via context directly
+ * instead of calling this helper again.
+ */
+ return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
+ .func = bpf_get_hash_recalc,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -2008,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
return false;
}
+static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
+ unsigned long len)
+{
+ void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
+
+ if (unlikely(!ptr))
+ return len;
+ if (ptr != dst_buff)
+ memcpy(dst_buff, ptr, len);
+
+ return 0;
+}
+
+static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
+ u64 meta_size)
+{
+ struct sk_buff *skb = (struct sk_buff *)(long) r1;
+ struct bpf_map *map = (struct bpf_map *)(long) r2;
+ u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+ void *meta = (void *)(long) r4;
+
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+ return -EINVAL;
+ if (unlikely(skb_size > skb->len))
+ return -EFAULT;
+
+ return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
+ bpf_skb_copy);
+}
+
+static const struct bpf_func_proto bpf_skb_event_output_proto = {
+ .func = bpf_skb_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_STACK,
+ .arg5_type = ARG_CONST_STACK_SIZE,
+};
+
static unsigned short bpf_tunnel_key_af(u64 flags)
{
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
@@ -2337,8 +2395,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
+ case BPF_FUNC_get_hash_recalc:
+ return &bpf_get_hash_recalc_proto;
case BPF_FUNC_perf_event_output:
- return bpf_get_event_output_proto();
+ return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
@@ -2350,6 +2410,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
}
}
+static const struct bpf_func_proto *
+xdp_func_proto(enum bpf_func_id func_id)
+{
+ return sk_filter_func_proto(func_id);
+}
+
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2417,6 +2483,44 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
+static bool __is_valid_xdp_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (off < 0 || off >= sizeof(struct xdp_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != 4)
+ return false;
+
+ return true;
+}
+
+static bool xdp_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ switch (off) {
+ case offsetof(struct xdp_md, data):
+ *reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct xdp_md, data_end):
+ *reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return __is_valid_xdp_access(off, size, type);
+}
+
+void bpf_warn_invalid_xdp_action(u32 act)
+{
+ WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+}
+EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+
static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -2568,6 +2672,29 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct xdp_md, data):
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)),
+ dst_reg, src_reg,
+ offsetof(struct xdp_buff, data));
+ break;
+ case offsetof(struct xdp_md, data_end):
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)),
+ dst_reg, src_reg,
+ offsetof(struct xdp_buff, data_end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
@@ -2580,6 +2707,12 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
.convert_ctx_access = bpf_net_convert_ctx_access,
};
+static const struct bpf_verifier_ops xdp_ops = {
+ .get_func_proto = xdp_func_proto,
+ .is_valid_access = xdp_is_valid_access,
+ .convert_ctx_access = xdp_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -2595,11 +2728,17 @@ static struct bpf_prog_type_list sched_act_type __read_mostly = {
.type = BPF_PROG_TYPE_SCHED_ACT,
};
+static struct bpf_prog_type_list xdp_type __read_mostly = {
+ .ops = &xdp_ops,
+ .type = BPF_PROG_TYPE_XDP,
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
bpf_register_prog_type(&sched_cls_type);
bpf_register_prog_type(&sched_act_type);
+ bpf_register_prog_type(&xdp_type);
return 0;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a669dea146c6..61ad43f61c5e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
}
EXPORT_SYMBOL(make_flow_keys_digest);
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ __flow_hash_secret_init();
+
+ memset(&keys, 0, sizeof(keys));
+ __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+ NULL, 0, 0, 0,
+ FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+ return __flow_hash_from_keys(&keys, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
/**
* __skb_get_hash: calculate a flow hash
* @skb: sk_buff to calculate flow hash from
@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
},
};
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v4addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v6addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+};
+
static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_dissector,
flow_keys_dissector_keys,
ARRAY_SIZE(flow_keys_dissector_keys));
+ skb_flow_dissector_init(&flow_keys_dissector_symmetric,
+ flow_keys_dissector_symmetric_keys,
+ ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
skb_flow_dissector_init(&flow_keys_buf_dissector,
flow_keys_buf_dissector_keys,
ARRAY_SIZE(flow_keys_buf_dissector_keys));
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 510cd62fcb99..5cdc62a8eb84 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
if (dev->netdev_ops->ndo_neigh_construct) {
- error = dev->netdev_ops->ndo_neigh_construct(n);
+ error = dev->netdev_ops->ndo_neigh_construct(dev, n);
if (error < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
@@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh)
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
- dev->netdev_ops->ndo_neigh_destroy(neigh);
+ dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
dev_put(dev);
neigh_parms_put(neigh->parms);
@@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
case NDTPA_DELAY_PROBE_TIME:
NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
nla_get_msecs(tbp[i]));
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
@@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
return;
set_bit(index, p->data_state);
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index);
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94acfc89ad97..53599bd0c82d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -163,7 +163,7 @@ static void poll_one_napi(struct napi_struct *napi)
*/
work = napi->poll(napi, 0);
WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, work, 0);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f74ab9c3b38f..bbd118b19aef 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -213,6 +213,7 @@
/* Xmit modes */
#define M_START_XMIT 0 /* Default normal TX */
#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
+#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */
/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
@@ -626,6 +627,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
seq_puts(seq, " xmit_mode: netif_receive\n");
+ else if (pkt_dev->xmit_mode == M_QUEUE_XMIT)
+ seq_puts(seq, " xmit_mode: xmit_queue\n");
seq_puts(seq, " Flags: ");
@@ -1142,8 +1145,10 @@ static ssize_t pktgen_if_write(struct file *file,
return len;
i += len;
- if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
- (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+ if ((value > 1) &&
+ ((pkt_dev->xmit_mode == M_QUEUE_XMIT) ||
+ ((pkt_dev->xmit_mode == M_START_XMIT) &&
+ (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))))
return -ENOTSUPP;
pkt_dev->burst = value < 1 ? 1 : value;
sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
@@ -1198,6 +1203,9 @@ static ssize_t pktgen_if_write(struct file *file,
* at module loading time
*/
pkt_dev->clone_skb = 0;
+ } else if (strcmp(f, "queue_xmit") == 0) {
+ pkt_dev->xmit_mode = M_QUEUE_XMIT;
+ pkt_dev->last_ok = 1;
} else {
sprintf(pg_result,
"xmit_mode -:%s:- unknown\nAvailable modes: %s",
@@ -3434,6 +3442,36 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
#endif
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
+ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
+ local_bh_disable();
+ atomic_inc(&pkt_dev->skb->users);
+
+ ret = dev_queue_xmit(pkt_dev->skb);
+ switch (ret) {
+ case NET_XMIT_SUCCESS:
+ pkt_dev->sofar++;
+ pkt_dev->seq_num++;
+ pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+ break;
+ case NET_XMIT_DROP:
+ case NET_XMIT_CN:
+ /* These are all valid return codes for a qdisc but
+ * indicate packets are being dropped or will likely
+ * be dropped soon.
+ */
+ case NETDEV_TX_BUSY:
+ /* qdisc may call dev_hard_start_xmit directly in cases
+ * where no queues exist e.g. loopback device, virtual
+ * devices, etc. In this case we need to handle
+ * NETDEV_TX_ codes.
+ */
+ default:
+ pkt_dev->errors++;
+ net_info_ratelimited("%s xmit error: %d\n",
+ pkt_dev->odevname, ret);
+ break;
+ }
+ goto out;
}
txq = skb_get_tx_queue(odev, pkt_dev->skb);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a9e3805af739..189cc78c77eb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -891,6 +891,16 @@ static size_t rtnl_port_size(const struct net_device *dev,
return port_self_size;
}
+static size_t rtnl_xdp_size(const struct net_device *dev)
+{
+ size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */
+
+ if (!dev->netdev_ops->ndo_xdp)
+ return 0;
+ else
+ return xdp_size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -927,6 +937,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ + rtnl_xdp_size(dev) /* IFLA_XDP */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1211,6 +1222,33 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ struct netdev_xdp xdp_op = {};
+ struct nlattr *xdp;
+ int err;
+
+ if (!dev->netdev_ops->ndo_xdp)
+ return 0;
+ xdp = nla_nest_start(skb, IFLA_XDP);
+ if (!xdp)
+ return -EMSGSIZE;
+ xdp_op.command = XDP_QUERY_PROG;
+ err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ if (err)
+ goto err_cancel;
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+ if (err)
+ goto err_cancel;
+
+ nla_nest_end(skb, xdp);
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, xdp);
+ return err;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
@@ -1307,6 +1345,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
+ if (rtnl_xdp_fill(skb, dev))
+ goto nla_put_failure;
+
if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
if (rtnl_link_fill(skb, dev) < 0)
goto nla_put_failure;
@@ -1392,6 +1433,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
+ [IFLA_XDP] = { .type = NLA_NESTED },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1429,6 +1471,11 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
[IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
};
+static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
+ [IFLA_XDP_FD] = { .type = NLA_S32 },
+ [IFLA_XDP_ATTACHED] = { .type = NLA_U8 },
+};
+
static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
{
const struct rtnl_link_ops *ops = NULL;
@@ -2054,6 +2101,27 @@ static int do_setlink(const struct sk_buff *skb,
status |= DO_SETLINK_NOTIFY;
}
+ if (tb[IFLA_XDP]) {
+ struct nlattr *xdp[IFLA_XDP_MAX + 1];
+
+ err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
+ ifla_xdp_policy);
+ if (err < 0)
+ goto errout;
+
+ if (xdp[IFLA_XDP_ATTACHED]) {
+ err = -EINVAL;
+ goto errout;
+ }
+ if (xdp[IFLA_XDP_FD]) {
+ err = dev_change_xdp_fd(dev,
+ nla_get_s32(xdp[IFLA_XDP_FD]));
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_NOTIFY;
+ }
+ }
+
errout:
if (status & DO_SETLINK_MODIFIED) {
if (status & DO_SETLINK_NOTIFY)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e7ec6d3ad5f0..3864b4b68fa1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3017,24 +3017,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
EXPORT_SYMBOL_GPL(skb_append_pagefrags);
/**
- * skb_push_rcsum - push skb and update receive checksum
- * @skb: buffer to update
- * @len: length of data pulled
- *
- * This function performs an skb_push on the packet and updates
- * the CHECKSUM_COMPLETE checksum. It should be used on
- * receive path processing instead of skb_push unless you know
- * that the checksum difference is zero (e.g., a valid IP header)
- * or you are setting ip_summed to CHECKSUM_NONE.
- */
-static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
-{
- skb_push(skb, len);
- skb_postpush_rcsum(skb, skb->data, len);
- return skb->data;
-}
-
-/**
* skb_pull_rcsum - pull skb and update receive checksum
* @skb: buffer to update
* @len: length of data pulled
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index df4803437888..a796fc7cbc35 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -41,6 +41,7 @@
#include <net/dn_fib.h>
#include <net/dn_neigh.h>
#include <net/dn_dev.h>
+#include <net/nexthop.h>
#define RT_MIN_TABLE 1
@@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr)
struct rtnexthop *nhp = nla_data(attr);
int nhs = 0, nhlen = nla_len(attr);
- while(nhlen >= (int)sizeof(struct rtnexthop)) {
- if ((nhlen -= nhp->rtnh_len) < 0)
- return 0;
+ while (rtnh_ok(nhp, nhlen)) {
nhs++;
- nhp = RTNH_NEXT(nhp);
+ nhp = rtnh_next(nhp, &nhlen);
}
- return nhs;
+ /* leftover implies invalid nexthop configuration, discard it */
+ return nhlen > 0 ? 0 : nhs;
}
static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
@@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
int nhlen = nla_len(attr);
change_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ int attrlen;
+
+ if (!rtnh_ok(nhp, nhlen))
return -EINVAL;
nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
nh->nh_oif = nhp->rtnh_ifindex;
nh->nh_weight = nhp->rtnh_hops + 1;
- if (attrlen) {
+ attrlen = rtnh_attrlen(nhp);
+ if (attrlen > 0) {
struct nlattr *gw_attr;
gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
}
- nhp = RTNH_NEXT(nhp);
+
+ nhp = rtnh_next(nhp, &nhlen);
} endfor_nexthops(fi);
return 0;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 766d2a525ada..7e68bc6bc853 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -774,11 +774,17 @@ static int dsa_of_probe(struct device *dev)
chip_index = -1;
for_each_available_child_of_node(np, child) {
+ int i;
+
chip_index++;
cd = &pd->chip[chip_index];
cd->of_node = child;
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ cd->rtable[i] = DSA_RTABLE_NONE;
+
/* When assigning the host device, increment its refcount */
cd->host_dev = get_device(&mdio_bus->dev);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 83b95fc4cede..f30bad9678f0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -595,7 +595,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
struct device_node *ports = dsa_get_ports(ds, np);
struct dsa_switch_tree *dst;
u32 tree, index;
- int err;
+ int i, err;
err = dsa_parse_member(np, &tree, &index);
if (err)
@@ -622,6 +622,11 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
ds->dst = dst;
ds->index = index;
+
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ ds->rtable[i] = DSA_RTABLE_NONE;
+
dsa_dst_add_ds(dst, ds, index);
err = dsa_dst_complete(dst);
@@ -672,7 +677,7 @@ int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
}
EXPORT_SYMBOL_GPL(dsa_register_switch);
-void _dsa_unregister_switch(struct dsa_switch *ds)
+static void _dsa_unregister_switch(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7236eb26dc97..fc9196745225 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -333,6 +333,44 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
return 0;
}
+static int dsa_fastest_ageing_time(struct dsa_switch *ds,
+ unsigned int ageing_time)
+{
+ int i;
+
+ for (i = 0; i < DSA_MAX_PORTS; ++i) {
+ struct dsa_port *dp = &ds->ports[i];
+
+ if (dp && dp->ageing_time && dp->ageing_time < ageing_time)
+ ageing_time = dp->ageing_time;
+ }
+
+ return ageing_time;
+}
+
+static int dsa_slave_ageing_time(struct net_device *dev,
+ const struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
+ unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+
+ /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ /* Keep the fastest ageing time in case of multiple bridges */
+ ds->ports[p->port].ageing_time = ageing_time;
+ ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
+
+ if (ds->drv->set_ageing_time)
+ return ds->drv->set_ageing_time(ds, ageing_time);
+
+ return 0;
+}
+
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -346,6 +384,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
ret = dsa_slave_vlan_filtering(dev, attr, trans);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ ret = dsa_slave_ageing_time(dev, attr, trans);
+ break;
default:
ret = -EOPNOTSUPP;
break;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 8c004a0c8d64..d7efbf0dad20 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev)
return 0;
}
-static int lowpan_neigh_construct(struct neighbour *n)
+static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
{
struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
@@ -130,8 +130,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
pr_debug("adding new link\n");
- if (!tb[IFLA_LINK] ||
- !net_eq(dev_net(ldev), &init_net))
+ if (!tb[IFLA_LINK])
return -EINVAL;
/* find and hold wpan device */
wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index ef185dd4110d..649e7d45e88f 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -262,7 +262,7 @@ static inline bool lowpan_rx_h_check(struct sk_buff *skb)
/* check on ieee802154 conform 6LoWPAN header */
if (!ieee802154_is_data(fc) ||
- !ieee802154_is_intra_pan(fc))
+ !ieee802154_skb_is_intra_pan_addressing(fc, skb))
return false;
/* check if we can dereference the dispatch */
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index c35fdfa6d04e..cb7176cd4cd6 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -140,6 +140,8 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
rdev->wpan_phy.dev.class = &wpan_phy_class;
rdev->wpan_phy.dev.platform_data = rdev;
+ wpan_phy_net_set(&rdev->wpan_phy, &init_net);
+
init_waitqueue_head(&rdev->dev_wait);
return &rdev->wpan_phy;
@@ -207,6 +209,49 @@ void wpan_phy_free(struct wpan_phy *phy)
}
EXPORT_SYMBOL(wpan_phy_free);
+int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
+ struct net *net)
+{
+ struct wpan_dev *wpan_dev;
+ int err = 0;
+
+ list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
+ if (!wpan_dev->netdev)
+ continue;
+ wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
+ if (err)
+ break;
+ wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ }
+
+ if (err) {
+ /* failed -- clean up to old netns */
+ net = wpan_phy_net(&rdev->wpan_phy);
+
+ list_for_each_entry_continue_reverse(wpan_dev,
+ &rdev->wpan_dev_list,
+ list) {
+ if (!wpan_dev->netdev)
+ continue;
+ wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ err = dev_change_net_namespace(wpan_dev->netdev, net,
+ "wpan%d");
+ WARN_ON(err);
+ wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ }
+
+ return err;
+ }
+
+ wpan_phy_net_set(&rdev->wpan_phy, net);
+
+ err = device_rename(&rdev->wpan_phy.dev, dev_name(&rdev->wpan_phy.dev));
+ WARN_ON(err);
+
+ return 0;
+}
+
void cfg802154_dev_free(struct cfg802154_registered_device *rdev)
{
kfree(rdev);
@@ -286,14 +331,34 @@ static struct notifier_block cfg802154_netdev_notifier = {
.notifier_call = cfg802154_netdev_notifier_call,
};
+static void __net_exit cfg802154_pernet_exit(struct net *net)
+{
+ struct cfg802154_registered_device *rdev;
+
+ rtnl_lock();
+ list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
+ if (net_eq(wpan_phy_net(&rdev->wpan_phy), net))
+ WARN_ON(cfg802154_switch_netns(rdev, &init_net));
+ }
+ rtnl_unlock();
+}
+
+static struct pernet_operations cfg802154_pernet_ops = {
+ .exit = cfg802154_pernet_exit,
+};
+
static int __init wpan_phy_class_init(void)
{
int rc;
- rc = wpan_phy_sysfs_init();
+ rc = register_pernet_device(&cfg802154_pernet_ops);
if (rc)
goto err;
+ rc = wpan_phy_sysfs_init();
+ if (rc)
+ goto err_sysfs;
+
rc = register_netdevice_notifier(&cfg802154_netdev_notifier);
if (rc)
goto err_nl;
@@ -315,6 +380,8 @@ err_notifier:
unregister_netdevice_notifier(&cfg802154_netdev_notifier);
err_nl:
wpan_phy_sysfs_exit();
+err_sysfs:
+ unregister_pernet_device(&cfg802154_pernet_ops);
err:
return rc;
}
@@ -326,6 +393,7 @@ static void __exit wpan_phy_class_exit(void)
ieee802154_nl_exit();
unregister_netdevice_notifier(&cfg802154_netdev_notifier);
wpan_phy_sysfs_exit();
+ unregister_pernet_device(&cfg802154_pernet_ops);
}
module_exit(wpan_phy_class_exit);
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 231fade959f3..81141f58d079 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -38,6 +38,8 @@ wpan_phy_to_rdev(struct wpan_phy *wpan_phy)
extern struct list_head cfg802154_rdev_list;
extern int cfg802154_rdev_list_generation;
+int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
+ struct net *net);
/* free object */
void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
struct cfg802154_registered_device *
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 116187b5c267..d90a4ed5b8a0 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -80,7 +80,8 @@ __cfg802154_wpan_dev_from_attrs(struct net *netns, struct nlattr **attrs)
list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
struct wpan_dev *wpan_dev;
- /* TODO netns compare */
+ if (wpan_phy_net(&rdev->wpan_phy) != netns)
+ continue;
if (have_wpan_dev_id && rdev->wpan_phy_idx != wpan_phy_idx)
continue;
@@ -175,7 +176,8 @@ __cfg802154_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
if (!rdev)
return ERR_PTR(-ENODEV);
- /* TODO netns compare */
+ if (netns != wpan_phy_net(&rdev->wpan_phy))
+ return ERR_PTR(-ENODEV);
return rdev;
}
@@ -233,6 +235,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
+ [NL802154_ATTR_PID] = { .type = NLA_U32 },
+ [NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 },
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
@@ -590,7 +594,6 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct cfg802154_registered_device *rdev;
int ifidx = nla_get_u32(tb[NL802154_ATTR_IFINDEX]);
- /* TODO netns */
netdev = __dev_get_by_index(&init_net, ifidx);
if (!netdev)
return -ENODEV;
@@ -629,7 +632,8 @@ nl802154_dump_wpan_phy(struct sk_buff *skb, struct netlink_callback *cb)
}
list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
- /* TODO net ns compare */
+ if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk)))
+ continue;
if (++idx <= state->start)
continue;
if (state->filter_wpan_phy != -1 &&
@@ -871,7 +875,8 @@ nl802154_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
- /* TODO netns compare */
+ if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk)))
+ continue;
if (wp_idx < wp_start) {
wp_idx++;
continue;
@@ -1271,6 +1276,37 @@ nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
}
+static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net *net;
+ int err;
+
+ if (info->attrs[NL802154_ATTR_PID]) {
+ u32 pid = nla_get_u32(info->attrs[NL802154_ATTR_PID]);
+
+ net = get_net_ns_by_pid(pid);
+ } else if (info->attrs[NL802154_ATTR_NETNS_FD]) {
+ u32 fd = nla_get_u32(info->attrs[NL802154_ATTR_NETNS_FD]);
+
+ net = get_net_ns_by_fd(fd);
+ } else {
+ return -EINVAL;
+ }
+
+ if (IS_ERR(net))
+ return PTR_ERR(net);
+
+ err = 0;
+
+ /* check if anything to do */
+ if (!net_eq(wpan_phy_net(&rdev->wpan_phy), net))
+ err = cfg802154_switch_netns(rdev, net);
+
+ put_net(net);
+ return err;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
@@ -2262,6 +2298,14 @@ static const struct genl_ops nl802154_ops[] = {
NL802154_FLAG_NEED_RTNL,
},
{
+ .cmd = NL802154_CMD_SET_WPAN_PHY_NETNS,
+ .doit = nl802154_wpan_phy_netns,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
.cmd = NL802154_CMD_SET_PAN_ID,
.doit = nl802154_set_pan_id,
.policy = nl802154_policy,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d39e9e47a26e..55513e654d79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -73,7 +73,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/kmod.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
@@ -1916,6 +1916,3 @@ static int __init ipv4_proc_init(void)
return 0;
}
#endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e333bc86bd39..415e117967c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1834,7 +1834,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -1846,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -1903,7 +1903,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -2027,16 +2027,16 @@ static void inet_forward_change(struct net *net)
for_each_netdev(net, dev) {
struct in_device *in_dev;
+
if (on)
dev_disable_lro(dev);
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
+
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
- rcu_read_unlock();
}
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0a7b96646f..8b4ffd216839 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- IPCB(skb)->flags |= IPSKB_FORWARDED;
+ IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cbac493c913a..dde37fb340bf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -223,8 +223,10 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *segs;
int ret = 0;
- /* common case: locally created skb or seglen is <= mtu */
- if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
+ /* common case: fragmentation of segments is not allowed,
+ * or seglen is <= mtu
+ */
+ if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
@@ -271,7 +273,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
return dst_output(net, sk, skb);
}
#endif
- mtu = ip_skb_dst_mtu(skb);
+ mtu = ip_skb_dst_mtu(sk, skb);
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
@@ -541,7 +543,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb);
- mtu = ip_skb_dst_mtu(skb);
+ mtu = ip_skb_dst_mtu(sk, skb);
if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
mtu = IPCB(skb)->frag_max_size;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index afd6b5968caf..9d847c302551 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -63,6 +63,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
int pkt_len = skb->len - skb_inner_network_offset(skb);
struct net *net = dev_net(rt->dst.dev);
struct net_device *dev = skb->dev;
+ int skb_iif = skb->skb_iif;
struct iphdr *iph;
int err;
@@ -72,6 +73,14 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ if (skb_iif && proto == IPPROTO_UDP) {
+ /* Arrived from an ingress interface and got udp encapuslated.
+ * The encapsulated network segment length may exceed dst mtu.
+ * Allow IP Fragmentation of segments.
+ */
+ IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
+ }
+
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 978370132f29..4ae3f8e6c6cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,14 +148,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, iph->protocol, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_IPIP, 0);
+ iph->protocol, 0);
err = 0;
goto out;
}
@@ -177,12 +177,19 @@ out:
return err;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
@@ -193,11 +200,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return -1;
@@ -207,24 +226,51 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
+ u8 ipproto;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipproto = IPPROTO_IPIP;
+ break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ ipproto = IPPROTO_MPLS;
+ break;
+#endif
+ default:
+ goto tx_error;
+ }
- if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ if (tiph->protocol != ipproto && tiph->protocol != 0)
goto tx_error;
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
@@ -234,6 +280,20 @@ tx_error:
return NETDEV_TX_OK;
}
+static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
+{
+ switch (ipproto) {
+ case 0:
+ case IPPROTO_IPIP:
+#if IS_ENABLED(CONFIG_MPLS)
+ case IPPROTO_MPLS:
+#endif
+ return true;
+ }
+
+ return false;
+}
+
static int
ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -244,7 +304,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EFAULT;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ if (p.iph.version != 4 ||
+ !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
return -EINVAL;
}
@@ -301,10 +362,23 @@ static int ipip_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 0;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
- tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
+static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ipip_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
{
@@ -335,6 +409,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
}
@@ -427,6 +504,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_TOS */
nla_total_size(1) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
/* IFLA_IPTUN_ENCAP_TYPE */
@@ -450,6 +529,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
@@ -476,6 +556,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
[IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
@@ -489,6 +570,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
.policy = ipip_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipip_tunnel_setup,
+ .validate = ipip_tunnel_validate,
.newlink = ipip_newlink,
.changelink = ipip_changelink,
.dellink = ip_tunnel_dellink,
@@ -503,6 +585,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 1,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip_err,
+ .priority = 1,
+};
+#endif
+
static int __net_init ipip_init_net(struct net *net)
{
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
@@ -525,7 +615,7 @@ static int __init ipip_init(void)
{
int err;
- pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
+ pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&ipip_net_ops);
if (err < 0)
@@ -533,8 +623,15 @@ static int __init ipip_init(void)
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
if (err < 0) {
pr_info("%s: can't register tunnel\n", __func__);
- goto xfrm_tunnel_failed;
+ goto xfrm_tunnel_ipip_failed;
+ }
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register tunnel\n", __func__);
+ goto xfrm_tunnel_mplsip_failed;
}
+#endif
err = rtnl_link_register(&ipip_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -543,8 +640,13 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+xfrm_tunnel_mplsip_failed:
+
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+xfrm_tunnel_ipip_failed:
unregister_pernet_device(&ipip_net_ops);
goto out;
}
@@ -554,7 +656,10 @@ static void __exit ipip_fini(void)
rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__);
-
+#if IS_ENABLED(CONFIG_MPLS)
+ if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
+ pr_info("%s: can't deregister tunnel\n", __func__);
+#endif
unregister_pernet_device(&ipip_net_ops);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5ad48ec77710..eec234161b89 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1150,6 +1150,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
c->mfc_parent = mfc->mfcc_parent;
+ c->mfc_un.res.lastuse = jiffies;
ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
@@ -1748,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->dev->stats.tx_bytes += skb->len;
}
- IPCB(skb)->flags |= IPSKB_FORWARDED;
+ IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
* not only before forwarding, but after forwarding on all output
@@ -1792,6 +1793,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
vif = cache->mfc_parent;
cache->mfc_un.res.pkt++;
cache->mfc_un.res.bytes += skb->len;
+ cache->mfc_un.res.lastuse = jiffies;
if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
@@ -2071,10 +2073,10 @@ drop:
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm)
{
- int ct;
- struct rtnexthop *nhp;
- struct nlattr *mp_attr;
struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
if (c->mfc_parent >= MAXVIFS)
@@ -2106,7 +2108,10 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
mfcs.mfcs_packets = c->mfc_un.res.pkt;
mfcs.mfcs_bytes = c->mfc_un.res.bytes;
mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES,
+ jiffies_to_clock_t(c->mfc_un.res.lastuse),
+ RTA_PAD))
return -EMSGSIZE;
rtm->rtm_type = RTN_MULTICAST;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2033f929aa66..c8dd9e26b185 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
__be32 src_ipaddr, tgt_ipaddr;
long ret;
-#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
-
- if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
- ARPT_INV_ARPOP))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPOP,
+ (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop))
return 0;
- if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
- ARPT_INV_ARPHRD))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPHRD,
+ (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd))
return 0;
- if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
- ARPT_INV_ARPPRO))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPPRO,
+ (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro))
return 0;
- if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
- ARPT_INV_ARPHLN))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPHLN,
+ (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln))
return 0;
src_devaddr = arpptr;
@@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
arpptr += dev->addr_len;
memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
- if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
- ARPT_INV_SRCDEVADDR) ||
- FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
- ARPT_INV_TGTDEVADDR))
+ if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR,
+ arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr,
+ dev->addr_len)) ||
+ NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR,
+ arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr,
+ dev->addr_len)))
return 0;
- if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
- ARPT_INV_SRCIP) ||
- FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
- ARPT_INV_TGTIP))
+ if (NF_INVF(arpinfo, ARPT_INV_SRCIP,
+ (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) ||
+ NF_INVF(arpinfo, ARPT_INV_TGTIP,
+ (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr))
return 0;
/* Look for ifname matches. */
ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
- if (FWINV(ret != 0, ARPT_INV_VIA_IN))
+ if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0))
return 0;
ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
- if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
+ if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0))
return 0;
return 1;
-#undef FWINV
}
static inline int arp_checkentry(const struct arpt_arp *arp)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 54906e0e8e0c..f0df66f54ce6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip,
{
unsigned long ret;
-#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
-
- if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
- IPT_INV_SRCIP) ||
- FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
- IPT_INV_DSTIP))
+ if (NF_INVF(ipinfo, IPT_INV_SRCIP,
+ (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
+ NF_INVF(ipinfo, IPT_INV_DSTIP,
+ (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
return false;
ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
- if (FWINV(ret != 0, IPT_INV_VIA_IN))
+ if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
return false;
ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
- if (FWINV(ret != 0, IPT_INV_VIA_OUT))
+ if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
return false;
/* Check specific protocol */
if (ipinfo->proto &&
- FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
+ NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
return false;
/* If we have a fragment rule but the packet is not a fragment
* then we return zero */
- if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
+ if (NF_INVF(ipinfo, IPT_INV_FRAG,
+ (ipinfo->flags & IPT_F_FRAG) && !isfrag))
return false;
return true;
@@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e)
return e->target_offset == sizeof(struct ipt_entry) &&
memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
-#undef FWINV
}
/* for const-correctness */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 57fc97cdac70..aebdb337fd7e 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -87,10 +87,6 @@ iptable_mangle_hook(void *priv,
{
if (state->hook == NF_INET_LOCAL_OUT)
return ipt_mangle_out(skb, state);
- if (state->hook == NF_INET_POST_ROUTING)
- return ipt_do_table(skb, state,
- state->net->ipv4.iptable_mangle);
- /* PREROUTING/INPUT/FORWARD: */
return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b6ea57ec5e14..fd8220213afc 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return NULL;
+ if (ip_hdr(oldskb)->protocol != IPPROTO_TCP)
+ return NULL;
+
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(struct tcphdr), _oth);
if (oth == NULL)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index debdd8b33e69..d84930b2dd95 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -24,6 +24,13 @@
int sysctl_tcp_thin_linear_timeouts __read_mostly;
+/**
+ * tcp_write_err() - close socket and save error info
+ * @sk: The socket the error has appeared on.
+ *
+ * Returns: Nothing (void)
+ */
+
static void tcp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
@@ -33,16 +40,21 @@ static void tcp_write_err(struct sock *sk)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
-/* Do not allow orphaned sockets to eat all our resources.
- * This is direct violation of TCP specs, but it is required
- * to prevent DoS attacks. It is called when a retransmission timeout
- * or zero probe timeout occurs on orphaned socket.
+/**
+ * tcp_out_of_resources() - Close socket if out of resources
+ * @sk: pointer to current socket
+ * @do_reset: send a last packet with reset flag
*
- * Criteria is still not confirmed experimentally and may change.
- * We kill the socket, if:
- * 1. If number of orphaned sockets exceeds an administratively configured
- * limit.
- * 2. If we have strong memory pressure.
+ * Do not allow orphaned sockets to eat all our resources.
+ * This is direct violation of TCP specs, but it is required
+ * to prevent DoS attacks. It is called when a retransmission timeout
+ * or zero probe timeout occurs on orphaned socket.
+ *
+ * Criteria is still not confirmed experimentally and may change.
+ * We kill the socket, if:
+ * 1. If number of orphaned sockets exceeds an administratively configured
+ * limit.
+ * 2. If we have strong memory pressure.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -74,7 +86,11 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
return 0;
}
-/* Calculate maximal number or retries on an orphaned socket. */
+/**
+ * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
+ * @sk: Pointer to the current socket.
+ * @alive: bool, socket alive state
+ */
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
@@ -115,10 +131,22 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
}
}
-/* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsuccessful, exponentially backed-off
+
+/**
+ * retransmits_timed_out() - returns true if this connection has timed out
+ * @sk: The current socket
+ * @boundary: max number of retransmissions
+ * @timeout: A custom timeout value.
+ * If set to 0 the default timeout is calculated and used.
+ * Using TCP_RTO_MIN and the number of unsuccessful retransmits.
+ * @syn_set: true if the SYN Bit was set.
+ *
+ * The default "timeout" value this function can calculate and use
+ * is equivalent to the timeout of a TCP Connection
+ * after "boundary" unsuccessful, exponentially backed-off
* retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
* syn_set flag is set.
+ *
*/
static bool retransmits_timed_out(struct sock *sk,
unsigned int boundary,
@@ -257,6 +285,16 @@ out:
sk_mem_reclaim(sk);
}
+
+/**
+ * tcp_delack_timer() - The TCP delayed ACK timeout handler
+ * @data: Pointer to the current socket. (gets casted to struct sock *)
+ *
+ * This function gets (indirectly) called when the kernel timer for a TCP packet
+ * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
+ *
+ * Returns: Nothing (void)
+ */
static void tcp_delack_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
@@ -350,10 +388,18 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}
-/*
- * The TCP retransmit timer.
- */
+/**
+ * tcp_retransmit_timer() - The TCP retransmit timeout handler
+ * @sk: Pointer to the current socket.
+ *
+ * This function gets called when the kernel timer for a TCP packet
+ * of this socket expires.
+ *
+ * It handles retransmission, timer adjustment and other necesarry measures.
+ *
+ * Returns: Nothing (void)
+ */
void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -494,7 +540,8 @@ out_reset_timer:
out:;
}
-/* Called with BH disabled */
+/* Called with bottom-half processing disabled.
+ Called by tcp_write_timer() */
void tcp_write_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -539,7 +586,7 @@ static void tcp_write_timer(unsigned long data)
if (!sock_owned_by_user(sk)) {
tcp_write_timer_handler(sk);
} else {
- /* deleguate our work to tcp_release_cb() */
+ /* delegate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
sock_hold(sk);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 0d0171830620..ec35eaa5c029 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/mpls.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -16,11 +17,14 @@
static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnelmpls4_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
- return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+ return (family == AF_INET) ? &tunnel4_handlers :
+ (family == AF_INET6) ? &tunnel64_handlers :
+ &tunnelmpls4_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
@@ -125,6 +129,26 @@ drop:
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static int tunnelmpls4_rcv(struct sk_buff *skb)
+{
+ struct xfrm_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct mpls_label)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+
static void tunnel4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
@@ -145,6 +169,17 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+#endif
+
static const struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
@@ -161,24 +196,47 @@ static const struct net_protocol tunnel64_protocol = {
};
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct net_protocol tunnelmpls4_protocol = {
+ .handler = tunnelmpls4_rcv,
+ .err_handler = tunnelmpls4_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+#endif
+
static int __init tunnel4_init(void)
{
- if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
- pr_err("%s: can't add protocol\n", __func__);
- return -EAGAIN;
- }
+ if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP))
+ goto err;
#if IS_ENABLED(CONFIG_IPV6)
if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
- pr_err("tunnel64 init: can't add protocol\n");
inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
- return -EAGAIN;
+ goto err;
+ }
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_add_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) {
+ inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+ inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+ goto err;
}
#endif
return 0;
+
+err:
+ pr_err("%s: can't add protocol\n", __func__);
+ return -EAGAIN;
}
static void __exit tunnel4_fini(void)
{
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
+ pr_err("tunnelmpls4 close: can't remove protocol\n");
+#endif
#if IS_ENABLED(CONFIG_IPV6)
if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
pr_err("tunnel64 close: can't remove protocol\n");
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a1f6b7b31531..24f1b0898e40 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -547,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -559,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bcef2369d64..771be1fa4176 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
}
}
+ free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 487ef3bc7bbc..7adce139d92a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1500,6 +1500,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
c->mf6c_parent = mfc->mf6cc_parent;
+ c->mfc_un.res.lastuse = jiffies;
ip6mr_update_thresholds(mrt, c, ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
@@ -1592,14 +1593,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
- NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all);
- }
- else
+ } else {
err = -EADDRINUSE;
+ }
write_unlock_bh(&mrt_lock);
+ if (!err)
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
rtnl_unlock();
return err;
@@ -1617,11 +1619,11 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt, false);
err = 0;
@@ -2091,6 +2093,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
vif = cache->mf6c_parent;
cache->mfc_un.res.pkt++;
cache->mfc_un.res.bytes += skb->len;
+ cache->mfc_un.res.lastuse = jiffies;
if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
@@ -2233,10 +2236,10 @@ int ip6_mr_input(struct sk_buff *skb)
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
struct mfc6_cache *c, struct rtmsg *rtm)
{
- int ct;
- struct rtnexthop *nhp;
- struct nlattr *mp_attr;
struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
if (c->mf6c_parent >= MAXMIFS)
@@ -2269,7 +2272,10 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
mfcs.mfcs_packets = c->mfc_un.res.pkt;
mfcs.mfcs_bytes = c->mfc_un.res.bytes;
mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES,
+ jiffies_to_clock_t(c->mfc_un.res.lastuse),
+ RTA_PAD))
return -EMSGSIZE;
rtm->rtm_type = RTN_MULTICAST;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 63e06c3dd319..61ed95054efa 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb,
unsigned long ret;
const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
-#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
-
- if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
- &ip6info->src), IP6T_INV_SRCIP) ||
- FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
- &ip6info->dst), IP6T_INV_DSTIP))
+ if (NF_INVF(ip6info, IP6T_INV_SRCIP,
+ ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+ &ip6info->src)) ||
+ NF_INVF(ip6info, IP6T_INV_DSTIP,
+ ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+ &ip6info->dst)))
return false;
ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_IN))
+ if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0))
return false;
ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
+ if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0))
return false;
/* ... might want to do something with class and flowlabel here ... */
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cb2b28883252..2b1a9dcdbcb3 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
{
if (state->hook == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, state);
- if (state->hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, state,
- state->net->ipv6.ip6table_mangle);
- /* INPUT/FORWARD */
return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 917a5cd4b8fc..182b6a9be29d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -688,12 +688,19 @@ out:
return 0;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
const struct iphdr *iph;
struct ip_tunnel *tunnel;
@@ -702,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel) {
- if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
tunnel->parms.iph.protocol != 0)
goto drop;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return 1;
@@ -720,6 +735,18 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
* stores the embedded IPv4 address in v4dst and returns true.
@@ -958,7 +985,8 @@ tx_error:
return NETDEV_TX_OK;
}
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+ struct net_device *dev, u8 ipproto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
@@ -966,9 +994,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
kfree_skb(skb);
@@ -981,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
{
switch (skb->protocol) {
case htons(ETH_P_IP):
- ipip_tunnel_xmit(skb, dev);
+ sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
break;
case htons(ETH_P_IPV6):
ipip6_tunnel_xmit(skb, dev);
break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
+ break;
+#endif
default:
goto tx_err;
}
@@ -1093,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
#endif
+bool ipip6_valid_ip_proto(u8 ipproto)
+{
+ return ipproto == IPPROTO_IPV6 ||
+ ipproto == IPPROTO_IPIP ||
+#if IS_ENABLED(CONFIG_MPLS)
+ ipproto == IPPROTO_MPLS ||
+#endif
+ ipproto == 0;
+}
+
static int
ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -1152,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.protocol != IPPROTO_IPV6 &&
- p.iph.protocol != IPPROTO_IPIP &&
- p.iph.protocol != 0)
+ if (!ipip6_valid_ip_proto(p.iph.protocol))
goto done;
if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
@@ -1379,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
- if (proto != IPPROTO_IPV6 &&
- proto != IPPROTO_IPIP &&
- proto != 0)
+ if (!ipip6_valid_ip_proto(proto))
return -EINVAL;
return 0;
@@ -1723,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 2,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+#endif
+
static void __net_exit sit_destroy_tunnels(struct net *net,
struct list_head *head)
{
@@ -1818,6 +1865,9 @@ static void __exit sit_cleanup(void)
rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+#endif
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1827,7 +1877,7 @@ static int __init sit_init(void)
{
int err;
- pr_info("IPv6 over IPv4 tunneling driver\n");
+ pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&sit_net_ops);
if (err < 0)
@@ -1842,6 +1892,13 @@ static int __init sit_init(void)
pr_info("%s: can't register ip4ip4\n", __func__);
goto xfrm_tunnel4_failed;
}
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register mplsip\n", __func__);
+ goto xfrm_tunnel_mpls_failed;
+ }
+#endif
err = rtnl_link_register(&sit_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1850,6 +1907,10 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+xfrm_tunnel_mpls_failed:
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_tunnel4_failed:
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3a8f881b22f1..a9aff6079c42 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -306,6 +306,24 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
mutex_lock(&sta->ampdu_mlme.mtx);
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
+ tid_agg_rx = rcu_dereference_protected(
+ sta->ampdu_mlme.tid_rx[tid],
+ lockdep_is_held(&sta->ampdu_mlme.mtx));
+
+ if (tid_agg_rx->dialog_token == dialog_token) {
+ ht_dbg_ratelimited(sta->sdata,
+ "updated AddBA Req from %pM on tid %u\n",
+ sta->sta.addr, tid);
+ /* We have no API to update the timeout value in the
+ * driver so reject the timeout update.
+ */
+ status = WLAN_STATUS_REQUEST_DECLINED;
+ ieee80211_send_addba_resp(sta->sdata, sta->sta.addr,
+ tid, dialog_token, status,
+ 1, buf_size, timeout);
+ goto end;
+ }
+
ht_dbg_ratelimited(sta->sdata,
"unexpected AddBA Req from %pM on tid %u\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c12e4001f19..47e99ab8d97a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -997,6 +997,7 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
if (sta->mesh->plink_state != NL80211_PLINK_ESTAB)
changed = mesh_plink_inc_estab_count(sdata);
sta->mesh->plink_state = params->plink_state;
+ sta->mesh->aid = params->peer_aid;
ieee80211_mps_sta_status_update(sta);
changed |= ieee80211_mps_set_sta_local_pm(sta,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 54edfb6fc1d1..f56d342c31b8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1250,6 +1250,7 @@ struct ieee80211_local {
int scan_channel_idx;
int scan_ies_len;
int hw_scan_ies_bufsize;
+ struct cfg80211_scan_info scan_info;
struct work_struct sched_scan_stopped_work;
struct ieee80211_sub_if_data __rcu *sched_scan_sdata;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6a1603bcdced..c66411df9863 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -148,25 +148,7 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
void mesh_sta_cleanup(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed = 0;
-
- /*
- * maybe userspace handles peer allocation and peering, but in either
- * case the beacon is still generated by the kernel and we might need
- * an update.
- */
- if (sdata->u.mesh.user_mpm &&
- sta->mesh->plink_state == NL80211_PLINK_ESTAB)
- changed |= mesh_plink_dec_estab_count(sdata);
- changed |= mesh_accept_plinks_update(sdata);
- if (!sdata->u.mesh.user_mpm) {
- changed |= mesh_plink_deactivate(sta);
- del_timer_sync(&sta->mesh->plink_timer);
- }
-
- /* make sure no readers can access nexthop sta from here on */
- mesh_path_flush_by_nexthop(sta);
- synchronize_net();
+ u32 changed = mesh_plink_deactivate(sta);
if (changed)
ieee80211_mbss_info_change_notify(sdata, changed);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 79f2a0a13db8..7fcdcf622655 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -370,13 +370,21 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
spin_lock_bh(&sta->mesh->plink_lock);
changed = __mesh_plink_deactivate(sta);
- sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
- mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
- sta->sta.addr, sta->mesh->llid, sta->mesh->plid,
- sta->mesh->reason);
+
+ if (!sdata->u.mesh.user_mpm) {
+ sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
+ mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
+ sta->sta.addr, sta->mesh->llid,
+ sta->mesh->plid, sta->mesh->reason);
+ }
spin_unlock_bh(&sta->mesh->plink_lock);
+ if (!sdata->u.mesh.user_mpm)
+ del_timer_sync(&sta->mesh->plink_timer);
mesh_path_flush_by_nexthop(sta);
+ /* make sure no readers can access nexthop sta from here on */
+ synchronize_net();
+
return changed;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9a1eb70cb120..2e8a9024625a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1624,8 +1624,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (mmie_keyidx < NUM_DEFAULT_KEYS ||
mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
return RX_DROP_MONITOR; /* unexpected BIP keyidx */
- if (rx->sta)
+ if (rx->sta) {
+ if (ieee80211_is_group_privacy_action(skb) &&
+ test_sta_flag(rx->sta, WLAN_STA_MFP))
+ return RX_DROP_MONITOR;
+
rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+ }
if (!rx->key)
rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
} else if (!ieee80211_has_protected(fc)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f9648ef9e31f..070b40f15850 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,6 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
+ * Copyright 2016 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -70,6 +71,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
.boottime_ns = rx_status->boottime_ns,
};
bool signal_valid;
+ struct ieee80211_sub_if_data *scan_sdata;
if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
bss_meta.signal = rx_status->signal * 100;
@@ -83,6 +85,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
bss_meta.chan = channel;
+
+ rcu_read_lock();
+ scan_sdata = rcu_dereference(local->scan_sdata);
+ if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
+ scan_sdata->vif.bss_conf.assoc &&
+ ieee80211_have_rx_timestamp(rx_status)) {
+ bss_meta.parent_tsf =
+ ieee80211_calculate_rx_timestamp(local, rx_status,
+ len + FCS_LEN, 24);
+ ether_addr_copy(bss_meta.parent_bssid,
+ scan_sdata->vif.bss_conf.bssid);
+ }
+ rcu_read_unlock();
+
cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
mgmt, len, GFP_ATOMIC);
if (!cbss)
@@ -345,6 +361,12 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
if (rc == 0)
return;
+
+ /* HW scan failed and is going to be reported as aborted,
+ * so clear old scan info.
+ */
+ memset(&local->scan_info, 0, sizeof(local->scan_info));
+ aborted = true;
}
kfree(local->hw_scan_req);
@@ -353,8 +375,10 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
scan_req = rcu_dereference_protected(local->scan_req,
lockdep_is_held(&local->mtx));
- if (scan_req != local->int_scan_req)
- cfg80211_scan_done(scan_req, aborted);
+ if (scan_req != local->int_scan_req) {
+ local->scan_info.aborted = aborted;
+ cfg80211_scan_done(scan_req, &local->scan_info);
+ }
RCU_INIT_POINTER(local->scan_req, NULL);
scan_sdata = rcu_dereference_protected(local->scan_sdata,
@@ -391,15 +415,19 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
ieee80211_start_next_roc(local);
}
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
+void ieee80211_scan_completed(struct ieee80211_hw *hw,
+ struct cfg80211_scan_info *info)
{
struct ieee80211_local *local = hw_to_local(hw);
- trace_api_scan_completed(local, aborted);
+ trace_api_scan_completed(local, info);
set_bit(SCAN_COMPLETED, &local->scanning);
- if (aborted)
+ if (info->aborted)
set_bit(SCAN_ABORTED, &local->scanning);
+
+ memcpy(&local->scan_info, info, sizeof(*info));
+
ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
}
EXPORT_SYMBOL(ieee80211_scan_completed);
@@ -566,6 +594,9 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_req->req.ie = ies;
local->hw_scan_req->req.flags = req->flags;
eth_broadcast_addr(local->hw_scan_req->req.bssid);
+ local->hw_scan_req->req.duration = req->duration;
+ local->hw_scan_req->req.duration_mandatory =
+ req->duration_mandatory;
local->hw_scan_band = 0;
@@ -1073,6 +1104,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
*/
cancel_delayed_work(&local->scan_work);
/* and clean up */
+ memset(&local->scan_info, 0, sizeof(local->scan_info));
__ieee80211_scan_completed(&local->hw, true);
out:
mutex_unlock(&local->mtx);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 2ddc661f0988..97f4c9d6b54c 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -129,42 +129,31 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
}
if (wide_bw_chansw_ie) {
- new_vht_chandef.chan = new_chan;
- new_vht_chandef.center_freq1 =
- ieee80211_channel_to_frequency(
+ struct ieee80211_vht_operation vht_oper = {
+ .chan_width =
+ wide_bw_chansw_ie->new_channel_width,
+ .center_freq_seg1_idx =
wide_bw_chansw_ie->new_center_freq_seg0,
- new_band);
-
- switch (wide_bw_chansw_ie->new_channel_width) {
- default:
- /* hmmm, ignore VHT and use HT if present */
- case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ .center_freq_seg2_idx =
+ wide_bw_chansw_ie->new_center_freq_seg1,
+ /* .basic_mcs_set doesn't matter */
+ };
+
+ /* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
+ * to the previously parsed chandef
+ */
+ new_vht_chandef = csa_ie->chandef;
+
+ /* ignore if parsing fails */
+ if (!ieee80211_chandef_vht_oper(&vht_oper, &new_vht_chandef))
new_vht_chandef.chan = NULL;
- break;
- case IEEE80211_VHT_CHANWIDTH_80MHZ:
- new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
- break;
- case IEEE80211_VHT_CHANWIDTH_160MHZ:
- new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
- break;
- case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
- /* field is otherwise reserved */
- new_vht_chandef.center_freq2 =
- ieee80211_channel_to_frequency(
- wide_bw_chansw_ie->new_center_freq_seg1,
- new_band);
- new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
- break;
- }
+
if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
ieee80211_chandef_downgrade(&new_vht_chandef);
if (sta_flags & IEEE80211_STA_DISABLE_160MHZ &&
new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
ieee80211_chandef_downgrade(&new_vht_chandef);
- if (sta_flags & IEEE80211_STA_DISABLE_40MHZ &&
- new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
- ieee80211_chandef_downgrade(&new_vht_chandef);
}
/* if VHT data is there validate & use it */
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 1c7d45a6d93e..b5d28f14b9cf 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1747,6 +1747,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
goto out;
}
+ ret = 0;
call_drv:
drv_tdls_recv_channel_switch(sdata->local, sdata, &params);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 44ec605a5682..91461c415525 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -593,6 +593,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
else if (tx->sta &&
(key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
tx->key = key;
+ else if (ieee80211_is_group_privacy_action(tx->skb) &&
+ (key = rcu_dereference(tx->sdata->default_multicast_key)))
+ tx->key = key;
else if (ieee80211_is_mgmt(hdr->frame_control) &&
is_multicast_ether_addr(hdr->addr1) &&
ieee80211_is_robust_mgmt_frame(tx->skb) &&
@@ -625,7 +628,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
case WLAN_CIPHER_SUITE_GCMP_256:
if (!ieee80211_is_data_present(hdr->frame_control) &&
!ieee80211_use_mfp(hdr->frame_control, tx->sta,
- tx->skb))
+ tx->skb) &&
+ !ieee80211_is_group_privacy_action(tx->skb))
tx->key = NULL;
else
skip_hw = (tx->key->conf.flags &
@@ -1445,7 +1449,9 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
local->cvars = kcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
GFP_KERNEL);
if (!local->cvars) {
+ spin_lock_bh(&fq->lock);
fq_reset(fq, fq_skb_free_func);
+ spin_unlock_bh(&fq->lock);
return -ENOMEM;
}
@@ -1465,7 +1471,9 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
kfree(local->cvars);
local->cvars = NULL;
+ spin_lock_bh(&fq->lock);
fq_reset(fq, fq_skb_free_func);
+ spin_unlock_bh(&fq->lock);
}
struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e9beaa58573c..5c161e7759b5 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1009,10 +1009,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
unsigned int flags;
if (event == NETDEV_REGISTER) {
- /* For now just support Ethernet and IPGRE devices */
+ /* For now just support Ethernet, IPGRE, SIT and IPIP devices */
if (dev->type == ARPHRD_ETHER ||
dev->type == ARPHRD_LOOPBACK ||
- dev->type == ARPHRD_IPGRE) {
+ dev->type == ARPHRD_IPGRE ||
+ dev->type == ARPHRD_SIT ||
+ dev->type == ARPHRD_TUNNEL) {
mdev = mpls_add_dev(dev);
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
new file mode 100644
index 000000000000..08a8a6031fd7
--- /dev/null
+++ b/net/ncsi/Kconfig
@@ -0,0 +1,12 @@
+#
+# Configuration for NCSI support
+#
+
+config NET_NCSI
+ bool "NCSI interface support"
+ depends on INET
+ ---help---
+ This module provides NCSI (Network Controller Sideband Interface)
+ support. Enable this only if your system connects to a network
+ device via NCSI and the ethernet driver you're using supports
+ the protocol explicitly.
diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
new file mode 100644
index 000000000000..dd12b564f2e7
--- /dev/null
+++ b/net/ncsi/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for NCSI API
+#
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
new file mode 100644
index 000000000000..33738c060547
--- /dev/null
+++ b/net/ncsi/internal.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_INTERNAL_H__
+#define __NCSI_INTERNAL_H__
+
+enum {
+ NCSI_CAP_BASE = 0,
+ NCSI_CAP_GENERIC = 0,
+ NCSI_CAP_BC,
+ NCSI_CAP_MC,
+ NCSI_CAP_BUFFER,
+ NCSI_CAP_AEN,
+ NCSI_CAP_VLAN,
+ NCSI_CAP_MAX
+};
+
+enum {
+ NCSI_CAP_GENERIC_HWA = 0x01, /* HW arbitration */
+ NCSI_CAP_GENERIC_HDS = 0x02, /* HNC driver status change */
+ NCSI_CAP_GENERIC_FC = 0x04, /* HNC to MC flow control */
+ NCSI_CAP_GENERIC_FC1 = 0x08, /* MC to HNC flow control */
+ NCSI_CAP_GENERIC_MC = 0x10, /* Global MC filtering */
+ NCSI_CAP_GENERIC_HWA_UNKNOWN = 0x00, /* Unknown HW arbitration */
+ NCSI_CAP_GENERIC_HWA_SUPPORT = 0x20, /* Supported HW arbitration */
+ NCSI_CAP_GENERIC_HWA_NOT_SUPPORT = 0x40, /* No HW arbitration */
+ NCSI_CAP_GENERIC_HWA_RESERVED = 0x60, /* Reserved HW arbitration */
+ NCSI_CAP_GENERIC_HWA_MASK = 0x60, /* Mask for HW arbitration */
+ NCSI_CAP_GENERIC_MASK = 0x7f,
+ NCSI_CAP_BC_ARP = 0x01, /* ARP packet filtering */
+ NCSI_CAP_BC_DHCPC = 0x02, /* DHCP client filtering */
+ NCSI_CAP_BC_DHCPS = 0x04, /* DHCP server filtering */
+ NCSI_CAP_BC_NETBIOS = 0x08, /* NetBIOS packet filtering */
+ NCSI_CAP_BC_MASK = 0x0f,
+ NCSI_CAP_MC_IPV6_NEIGHBOR = 0x01, /* IPv6 neighbor filtering */
+ NCSI_CAP_MC_IPV6_ROUTER = 0x02, /* IPv6 router filering */
+ NCSI_CAP_MC_DHCPV6_RELAY = 0x04, /* DHCPv6 relay / server MC */
+ NCSI_CAP_MC_DHCPV6_WELL_KNOWN = 0x08, /* DHCPv6 well-known MC */
+ NCSI_CAP_MC_IPV6_MLD = 0x10, /* IPv6 MLD filtering */
+ NCSI_CAP_MC_IPV6_NEIGHBOR_S = 0x20, /* IPv6 neighbour filtering */
+ NCSI_CAP_MC_MASK = 0x3f,
+ NCSI_CAP_AEN_LSC = 0x01, /* Link status change */
+ NCSI_CAP_AEN_CR = 0x02, /* Configuration required */
+ NCSI_CAP_AEN_HDS = 0x04, /* HNC driver status */
+ NCSI_CAP_AEN_MASK = 0x07,
+ NCSI_CAP_VLAN_ONLY = 0x01, /* Filter VLAN packet only */
+ NCSI_CAP_VLAN_NO = 0x02, /* Filter VLAN and non-VLAN */
+ NCSI_CAP_VLAN_ANY = 0x04, /* Filter Any-and-non-VLAN */
+ NCSI_CAP_VLAN_MASK = 0x07
+};
+
+enum {
+ NCSI_MODE_BASE = 0,
+ NCSI_MODE_ENABLE = 0,
+ NCSI_MODE_TX_ENABLE,
+ NCSI_MODE_LINK,
+ NCSI_MODE_VLAN,
+ NCSI_MODE_BC,
+ NCSI_MODE_MC,
+ NCSI_MODE_AEN,
+ NCSI_MODE_FC,
+ NCSI_MODE_MAX
+};
+
+enum {
+ NCSI_FILTER_BASE = 0,
+ NCSI_FILTER_VLAN = 0,
+ NCSI_FILTER_UC,
+ NCSI_FILTER_MC,
+ NCSI_FILTER_MIXED,
+ NCSI_FILTER_MAX
+};
+
+struct ncsi_channel_version {
+ u32 version; /* Supported BCD encoded NCSI version */
+ u32 alpha2; /* Supported BCD encoded NCSI version */
+ u8 fw_name[12]; /* Firware name string */
+ u32 fw_version; /* Firmware version */
+ u16 pci_ids[4]; /* PCI identification */
+ u32 mf_id; /* Manufacture ID */
+};
+
+struct ncsi_channel_cap {
+ u32 index; /* Index of channel capabilities */
+ u32 cap; /* NCSI channel capability */
+};
+
+struct ncsi_channel_mode {
+ u32 index; /* Index of channel modes */
+ u32 enable; /* Enabled or disabled */
+ u32 size; /* Valid entries in ncm_data[] */
+ u32 data[8]; /* Data entries */
+};
+
+struct ncsi_channel_filter {
+ u32 index; /* Index of channel filters */
+ u32 total; /* Total entries in the filter table */
+ u64 bitmap; /* Bitmap of valid entries */
+ u32 data[]; /* Data for the valid entries */
+};
+
+struct ncsi_channel_stats {
+ u32 hnc_cnt_hi; /* Counter cleared */
+ u32 hnc_cnt_lo; /* Counter cleared */
+ u32 hnc_rx_bytes; /* Rx bytes */
+ u32 hnc_tx_bytes; /* Tx bytes */
+ u32 hnc_rx_uc_pkts; /* Rx UC packets */
+ u32 hnc_rx_mc_pkts; /* Rx MC packets */
+ u32 hnc_rx_bc_pkts; /* Rx BC packets */
+ u32 hnc_tx_uc_pkts; /* Tx UC packets */
+ u32 hnc_tx_mc_pkts; /* Tx MC packets */
+ u32 hnc_tx_bc_pkts; /* Tx BC packets */
+ u32 hnc_fcs_err; /* FCS errors */
+ u32 hnc_align_err; /* Alignment errors */
+ u32 hnc_false_carrier; /* False carrier detection */
+ u32 hnc_runt_pkts; /* Rx runt packets */
+ u32 hnc_jabber_pkts; /* Rx jabber packets */
+ u32 hnc_rx_pause_xon; /* Rx pause XON frames */
+ u32 hnc_rx_pause_xoff; /* Rx XOFF frames */
+ u32 hnc_tx_pause_xon; /* Tx XON frames */
+ u32 hnc_tx_pause_xoff; /* Tx XOFF frames */
+ u32 hnc_tx_s_collision; /* Single collision frames */
+ u32 hnc_tx_m_collision; /* Multiple collision frames */
+ u32 hnc_l_collision; /* Late collision frames */
+ u32 hnc_e_collision; /* Excessive collision frames */
+ u32 hnc_rx_ctl_frames; /* Rx control frames */
+ u32 hnc_rx_64_frames; /* Rx 64-bytes frames */
+ u32 hnc_rx_127_frames; /* Rx 65-127 bytes frames */
+ u32 hnc_rx_255_frames; /* Rx 128-255 bytes frames */
+ u32 hnc_rx_511_frames; /* Rx 256-511 bytes frames */
+ u32 hnc_rx_1023_frames; /* Rx 512-1023 bytes frames */
+ u32 hnc_rx_1522_frames; /* Rx 1024-1522 bytes frames */
+ u32 hnc_rx_9022_frames; /* Rx 1523-9022 bytes frames */
+ u32 hnc_tx_64_frames; /* Tx 64-bytes frames */
+ u32 hnc_tx_127_frames; /* Tx 65-127 bytes frames */
+ u32 hnc_tx_255_frames; /* Tx 128-255 bytes frames */
+ u32 hnc_tx_511_frames; /* Tx 256-511 bytes frames */
+ u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */
+ u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */
+ u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */
+ u32 hnc_rx_valid_bytes; /* Rx valid bytes */
+ u32 hnc_rx_runt_pkts; /* Rx error runt packets */
+ u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */
+ u32 ncsi_rx_cmds; /* Rx NCSI commands */
+ u32 ncsi_dropped_cmds; /* Dropped commands */
+ u32 ncsi_cmd_type_errs; /* Command type errors */
+ u32 ncsi_cmd_csum_errs; /* Command checksum errors */
+ u32 ncsi_rx_pkts; /* Rx NCSI packets */
+ u32 ncsi_tx_pkts; /* Tx NCSI packets */
+ u32 ncsi_tx_aen_pkts; /* Tx AEN packets */
+ u32 pt_tx_pkts; /* Tx packets */
+ u32 pt_tx_dropped; /* Tx dropped packets */
+ u32 pt_tx_channel_err; /* Tx channel errors */
+ u32 pt_tx_us_err; /* Tx undersize errors */
+ u32 pt_rx_pkts; /* Rx packets */
+ u32 pt_rx_dropped; /* Rx dropped packets */
+ u32 pt_rx_channel_err; /* Rx channel errors */
+ u32 pt_rx_us_err; /* Rx undersize errors */
+ u32 pt_rx_os_err; /* Rx oversize errors */
+};
+
+struct ncsi_dev_priv;
+struct ncsi_package;
+
+#define NCSI_PACKAGE_SHIFT 5
+#define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7)
+#define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1))
+#define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c))
+
+struct ncsi_channel {
+ unsigned char id;
+ int state;
+#define NCSI_CHANNEL_INACTIVE 1
+#define NCSI_CHANNEL_ACTIVE 2
+#define NCSI_CHANNEL_INVISIBLE 3
+ spinlock_t lock; /* Protect filters etc */
+ struct ncsi_package *package;
+ struct ncsi_channel_version version;
+ struct ncsi_channel_cap caps[NCSI_CAP_MAX];
+ struct ncsi_channel_mode modes[NCSI_MODE_MAX];
+ struct ncsi_channel_filter *filters[NCSI_FILTER_MAX];
+ struct ncsi_channel_stats stats;
+ struct timer_list timer; /* Link monitor timer */
+ bool enabled; /* Timer is enabled */
+ unsigned int timeout; /* Times of timeout */
+ struct list_head node;
+ struct list_head link;
+};
+
+struct ncsi_package {
+ unsigned char id; /* NCSI 3-bits package ID */
+ unsigned char uuid[16]; /* UUID */
+ struct ncsi_dev_priv *ndp; /* NCSI device */
+ spinlock_t lock; /* Protect the package */
+ unsigned int channel_num; /* Number of channels */
+ struct list_head channels; /* List of chanels */
+ struct list_head node; /* Form list of packages */
+};
+
+struct ncsi_request {
+ unsigned char id; /* Request ID - 0 to 255 */
+ bool used; /* Request that has been assigned */
+ bool driven; /* Drive state machine */
+ struct ncsi_dev_priv *ndp; /* Associated NCSI device */
+ struct sk_buff *cmd; /* Associated NCSI command packet */
+ struct sk_buff *rsp; /* Associated NCSI response packet */
+ struct timer_list timer; /* Timer on waiting for response */
+ bool enabled; /* Time has been enabled or not */
+};
+
+enum {
+ ncsi_dev_state_major = 0xff00,
+ ncsi_dev_state_minor = 0x00ff,
+ ncsi_dev_state_probe_deselect = 0x0201,
+ ncsi_dev_state_probe_package,
+ ncsi_dev_state_probe_channel,
+ ncsi_dev_state_probe_cis,
+ ncsi_dev_state_probe_gvi,
+ ncsi_dev_state_probe_gc,
+ ncsi_dev_state_probe_gls,
+ ncsi_dev_state_probe_dp,
+ ncsi_dev_state_config_sp = 0x0301,
+ ncsi_dev_state_config_cis,
+ ncsi_dev_state_config_sma,
+ ncsi_dev_state_config_ebf,
+#if IS_ENABLED(CONFIG_IPV6)
+ ncsi_dev_state_config_egmf,
+#endif
+ ncsi_dev_state_config_ecnt,
+ ncsi_dev_state_config_ec,
+ ncsi_dev_state_config_ae,
+ ncsi_dev_state_config_gls,
+ ncsi_dev_state_config_done,
+ ncsi_dev_state_suspend_select = 0x0401,
+ ncsi_dev_state_suspend_dcnt,
+ ncsi_dev_state_suspend_dc,
+ ncsi_dev_state_suspend_deselect,
+ ncsi_dev_state_suspend_done
+};
+
+struct ncsi_dev_priv {
+ struct ncsi_dev ndev; /* Associated NCSI device */
+ unsigned int flags; /* NCSI device flags */
+#define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */
+#define NCSI_DEV_HWA 2 /* Enabled HW arbitration */
+#define NCSI_DEV_RESHUFFLE 4
+ spinlock_t lock; /* Protect the NCSI device */
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned int inet6_addr_num; /* Number of IPv6 addresses */
+#endif
+ unsigned int package_num; /* Number of packages */
+ struct list_head packages; /* List of packages */
+ struct ncsi_request requests[256]; /* Request table */
+ unsigned int request_id; /* Last used request ID */
+ unsigned int pending_req_num; /* Number of pending requests */
+ struct ncsi_package *active_package; /* Currently handled package */
+ struct ncsi_channel *active_channel; /* Currently handled channel */
+ struct list_head channel_queue; /* Config queue of channels */
+ struct work_struct work; /* For channel management */
+ struct packet_type ptype; /* NCSI packet Rx handler */
+ struct list_head node; /* Form NCSI device list */
+};
+
+struct ncsi_cmd_arg {
+ struct ncsi_dev_priv *ndp; /* Associated NCSI device */
+ unsigned char type; /* Command in the NCSI packet */
+ unsigned char id; /* Request ID (sequence number) */
+ unsigned char package; /* Destination package ID */
+ unsigned char channel; /* Detination channel ID or 0x1f */
+ unsigned short payload; /* Command packet payload length */
+ bool driven; /* Drive the state machine? */
+ union {
+ unsigned char bytes[16]; /* Command packet specific data */
+ unsigned short words[8];
+ unsigned int dwords[4];
+ };
+};
+
+extern struct list_head ncsi_dev_list;
+extern spinlock_t ncsi_dev_lock;
+
+#define TO_NCSI_DEV_PRIV(nd) \
+ container_of(nd, struct ncsi_dev_priv, ndev)
+#define NCSI_FOR_EACH_DEV(ndp) \
+ list_for_each_entry_rcu(ndp, &ncsi_dev_list, node)
+#define NCSI_FOR_EACH_PACKAGE(ndp, np) \
+ list_for_each_entry_rcu(np, &ndp->packages, node)
+#define NCSI_FOR_EACH_CHANNEL(np, nc) \
+ list_for_each_entry_rcu(nc, &np->channels, node)
+
+/* Resources */
+int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
+int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
+int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
+void ncsi_start_channel_monitor(struct ncsi_channel *nc);
+void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
+struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
+ unsigned char id);
+struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np,
+ unsigned char id);
+struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp,
+ unsigned char id);
+struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
+ unsigned char id);
+void ncsi_remove_package(struct ncsi_package *np);
+void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp,
+ unsigned char id,
+ struct ncsi_package **np,
+ struct ncsi_channel **nc);
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven);
+void ncsi_free_request(struct ncsi_request *nr);
+struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
+int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
+
+/* Packet handlers */
+u32 ncsi_calculate_checksum(unsigned char *data, int len);
+int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca);
+int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
+int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb);
+
+#endif /* __NCSI_INTERNAL_H__ */
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
new file mode 100644
index 000000000000..d463468442ae
--- /dev/null
+++ b/net/ncsi/ncsi-aen.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+static int ncsi_validate_aen_pkt(struct ncsi_aen_pkt_hdr *h,
+ const unsigned short payload)
+{
+ u32 checksum;
+ __be32 *pchecksum;
+
+ if (h->common.revision != NCSI_PKT_REVISION)
+ return -EINVAL;
+ if (ntohs(h->common.length) != payload)
+ return -EINVAL;
+
+ /* Validate checksum, which might be zeroes if the
+ * sender doesn't support checksum according to NCSI
+ * specification.
+ */
+ pchecksum = (__be32 *)((void *)(h + 1) + payload - 4);
+ if (ntohl(*pchecksum) == 0)
+ return 0;
+
+ checksum = ncsi_calculate_checksum((unsigned char *)h,
+ sizeof(*h) + payload - 4);
+ if (*pchecksum != htonl(checksum))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
+ struct ncsi_aen_pkt_hdr *h)
+{
+ struct ncsi_aen_lsc_pkt *lsc;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+ unsigned long old_data;
+ unsigned long flags;
+
+ /* Find the NCSI channel */
+ ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update the link status */
+ ncm = &nc->modes[NCSI_MODE_LINK];
+ lsc = (struct ncsi_aen_lsc_pkt *)h;
+ old_data = ncm->data[2];
+ ncm->data[2] = ntohl(lsc->status);
+ ncm->data[4] = ntohl(lsc->oem_status);
+ if (!((old_data ^ ncm->data[2]) & 0x1) ||
+ !list_empty(&nc->link))
+ return 0;
+ if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) &&
+ !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1)))
+ return 0;
+
+ if (!(ndp->flags & NCSI_DEV_HWA) &&
+ nc->state == NCSI_CHANNEL_ACTIVE)
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
+
+ ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ return ncsi_process_next_channel(ndp);
+}
+
+static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,
+ struct ncsi_aen_pkt_hdr *h)
+{
+ struct ncsi_channel *nc;
+ unsigned long flags;
+
+ /* Find the NCSI channel */
+ ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ if (!list_empty(&nc->link) ||
+ nc->state != NCSI_CHANNEL_ACTIVE)
+ return 0;
+
+ ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&ndp->lock, flags);
+ xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ return ncsi_process_next_channel(ndp);
+}
+
+static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
+ struct ncsi_aen_pkt_hdr *h)
+{
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+ struct ncsi_aen_hncdsc_pkt *hncdsc;
+ unsigned long flags;
+
+ /* Find the NCSI channel */
+ ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* If the channel is active one, we need reconfigure it */
+ ncm = &nc->modes[NCSI_MODE_LINK];
+ hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
+ ncm->data[3] = ntohl(hncdsc->status);
+ if (!list_empty(&nc->link) ||
+ nc->state != NCSI_CHANNEL_ACTIVE ||
+ (ncm->data[3] & 0x1))
+ return 0;
+
+ if (ndp->flags & NCSI_DEV_HWA)
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
+
+ /* If this channel is the active one and the link doesn't
+ * work, we have to choose another channel to be active one.
+ * The logic here is exactly similar to what we do when link
+ * is down on the active channel.
+ */
+ ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ ncsi_process_next_channel(ndp);
+
+ return 0;
+}
+
+static struct ncsi_aen_handler {
+ unsigned char type;
+ int payload;
+ int (*handler)(struct ncsi_dev_priv *ndp,
+ struct ncsi_aen_pkt_hdr *h);
+} ncsi_aen_handlers[] = {
+ { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc },
+ { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr },
+ { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc }
+};
+
+int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
+{
+ struct ncsi_aen_pkt_hdr *h;
+ struct ncsi_aen_handler *nah = NULL;
+ int i, ret;
+
+ /* Find the handler */
+ h = (struct ncsi_aen_pkt_hdr *)skb_network_header(skb);
+ for (i = 0; i < ARRAY_SIZE(ncsi_aen_handlers); i++) {
+ if (ncsi_aen_handlers[i].type == h->type) {
+ nah = &ncsi_aen_handlers[i];
+ break;
+ }
+ }
+
+ if (!nah) {
+ netdev_warn(ndp->ndev.dev, "Invalid AEN (0x%x) received\n",
+ h->type);
+ return -ENOENT;
+ }
+
+ ret = ncsi_validate_aen_pkt(h, nah->payload);
+ if (ret)
+ goto out;
+
+ ret = nah->handler(ndp, h);
+out:
+ consume_skb(skb);
+ return ret;
+}
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
new file mode 100644
index 000000000000..21057a8ceeac
--- /dev/null
+++ b/net/ncsi/ncsi-cmd.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+u32 ncsi_calculate_checksum(unsigned char *data, int len)
+{
+ u32 checksum = 0;
+ int i;
+
+ for (i = 0; i < len; i += 2)
+ checksum += (((u32)data[i] << 8) | data[i + 1]);
+
+ checksum = (~checksum + 1);
+ return checksum;
+}
+
+/* This function should be called after the data area has been
+ * populated completely.
+ */
+static void ncsi_cmd_build_header(struct ncsi_pkt_hdr *h,
+ struct ncsi_cmd_arg *nca)
+{
+ u32 checksum;
+ __be32 *pchecksum;
+
+ h->mc_id = 0;
+ h->revision = NCSI_PKT_REVISION;
+ h->reserved = 0;
+ h->id = nca->id;
+ h->type = nca->type;
+ h->channel = NCSI_TO_CHANNEL(nca->package,
+ nca->channel);
+ h->length = htons(nca->payload);
+ h->reserved1[0] = 0;
+ h->reserved1[1] = 0;
+
+ /* Fill with calculated checksum */
+ checksum = ncsi_calculate_checksum((unsigned char *)h,
+ sizeof(*h) + nca->payload);
+ pchecksum = (__be32 *)((void *)h + sizeof(struct ncsi_pkt_hdr) +
+ nca->payload);
+ *pchecksum = htonl(checksum);
+}
+
+static int ncsi_cmd_handler_default(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_sp(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_sp_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_sp_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hw_arbitration = nca->bytes[0];
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_dc(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_dc_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_dc_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->ald = nca->bytes[0];
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_rc(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_rc_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_rc_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_ae(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_ae_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_ae_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->mc_id = nca->bytes[0];
+ cmd->mode = htonl(nca->dwords[1]);
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_sl(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_sl_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_sl_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->mode = htonl(nca->dwords[0]);
+ cmd->oem_mode = htonl(nca->dwords[1]);
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_svf(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_svf_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_svf_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->vlan = htons(nca->words[0]);
+ cmd->index = nca->bytes[2];
+ cmd->enable = nca->bytes[3];
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_ev(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_ev_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_ev_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->mode = nca->bytes[0];
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_sma(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_sma_pkt *cmd;
+ int i;
+
+ cmd = (struct ncsi_cmd_sma_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ for (i = 0; i < 6; i++)
+ cmd->mac[i] = nca->bytes[i];
+ cmd->index = nca->bytes[6];
+ cmd->at_e = nca->bytes[7];
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_ebf(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_ebf_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_ebf_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->mode = htonl(nca->dwords[0]);
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_egmf(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_egmf_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_egmf_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->mode = htonl(nca->dwords[0]);
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static int ncsi_cmd_handler_snfc(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_snfc_pkt *cmd;
+
+ cmd = (struct ncsi_cmd_snfc_pkt *)skb_put(skb, sizeof(*cmd));
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->mode = nca->bytes[0];
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
+static struct ncsi_cmd_handler {
+ unsigned char type;
+ int payload;
+ int (*handler)(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca);
+} ncsi_cmd_handlers[] = {
+ { NCSI_PKT_CMD_CIS, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_SP, 4, ncsi_cmd_handler_sp },
+ { NCSI_PKT_CMD_DP, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_EC, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_DC, 4, ncsi_cmd_handler_dc },
+ { NCSI_PKT_CMD_RC, 4, ncsi_cmd_handler_rc },
+ { NCSI_PKT_CMD_ECNT, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_DCNT, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_AE, 8, ncsi_cmd_handler_ae },
+ { NCSI_PKT_CMD_SL, 8, ncsi_cmd_handler_sl },
+ { NCSI_PKT_CMD_GLS, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_SVF, 4, ncsi_cmd_handler_svf },
+ { NCSI_PKT_CMD_EV, 4, ncsi_cmd_handler_ev },
+ { NCSI_PKT_CMD_DV, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_SMA, 8, ncsi_cmd_handler_sma },
+ { NCSI_PKT_CMD_EBF, 4, ncsi_cmd_handler_ebf },
+ { NCSI_PKT_CMD_DBF, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_EGMF, 4, ncsi_cmd_handler_egmf },
+ { NCSI_PKT_CMD_DGMF, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_SNFC, 4, ncsi_cmd_handler_snfc },
+ { NCSI_PKT_CMD_GVI, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GC, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GP, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GCPS, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GNS, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GNPTS, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_OEM, 0, NULL },
+ { NCSI_PKT_CMD_PLDM, 0, NULL },
+ { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+};
+
+static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_dev_priv *ndp = nca->ndp;
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct net_device *dev = nd->dev;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+ int len = hlen + tlen;
+ struct sk_buff *skb;
+ struct ncsi_request *nr;
+
+ nr = ncsi_alloc_request(ndp, nca->driven);
+ if (!nr)
+ return NULL;
+
+ /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
+ * The packet needs padding if its payload is less than 26 bytes to
+ * meet 64 bytes minimal ethernet frame length.
+ */
+ len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
+ if (nca->payload < 26)
+ len += 26;
+ else
+ len += nca->payload;
+
+ /* Allocate skb */
+ skb = alloc_skb(len, GFP_ATOMIC);
+ if (!skb) {
+ ncsi_free_request(nr);
+ return NULL;
+ }
+
+ nr->cmd = skb;
+ skb_reserve(skb, hlen);
+ skb_reset_network_header(skb);
+
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_NCSI);
+
+ return nr;
+}
+
+int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_request *nr;
+ struct ethhdr *eh;
+ struct ncsi_cmd_handler *nch = NULL;
+ int i, ret;
+
+ /* Search for the handler */
+ for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) {
+ if (ncsi_cmd_handlers[i].type == nca->type) {
+ if (ncsi_cmd_handlers[i].handler)
+ nch = &ncsi_cmd_handlers[i];
+ else
+ nch = NULL;
+
+ break;
+ }
+ }
+
+ if (!nch) {
+ netdev_err(nca->ndp->ndev.dev,
+ "Cannot send packet with type 0x%02x\n", nca->type);
+ return -ENOENT;
+ }
+
+ /* Get packet payload length and allocate the request */
+ nca->payload = nch->payload;
+ nr = ncsi_alloc_command(nca);
+ if (!nr)
+ return -ENOMEM;
+
+ /* Prepare the packet */
+ nca->id = nr->id;
+ ret = nch->handler(nr->cmd, nca);
+ if (ret) {
+ ncsi_free_request(nr);
+ return ret;
+ }
+
+ /* Fill the ethernet header */
+ eh = (struct ethhdr *)skb_push(nr->cmd, sizeof(*eh));
+ eh->h_proto = htons(ETH_P_NCSI);
+ eth_broadcast_addr(eh->h_dest);
+ eth_broadcast_addr(eh->h_source);
+
+ /* Start the timer for the request that might not have
+ * corresponding response. Given NCSI is an internal
+ * connection a 1 second delay should be sufficient.
+ */
+ nr->enabled = true;
+ mod_timer(&nr->timer, jiffies + 1 * HZ);
+
+ /* Send NCSI packet */
+ skb_get(nr->cmd);
+ ret = dev_queue_xmit(nr->cmd);
+ if (ret < 0) {
+ ncsi_free_request(nr);
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
new file mode 100644
index 000000000000..d627a39ddcd0
--- /dev/null
+++ b/net/ncsi/ncsi-manage.c
@@ -0,0 +1,1199 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/if_inet6.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+LIST_HEAD(ncsi_dev_list);
+DEFINE_SPINLOCK(ncsi_dev_lock);
+
+static inline int ncsi_filter_size(int table)
+{
+ int sizes[] = { 2, 6, 6, 6 };
+
+ BUILD_BUG_ON(ARRAY_SIZE(sizes) != NCSI_FILTER_MAX);
+ if (table < NCSI_FILTER_BASE || table >= NCSI_FILTER_MAX)
+ return -EINVAL;
+
+ return sizes[table];
+}
+
+int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
+{
+ struct ncsi_channel_filter *ncf;
+ void *bitmap;
+ int index, size;
+ unsigned long flags;
+
+ ncf = nc->filters[table];
+ if (!ncf)
+ return -ENXIO;
+
+ size = ncsi_filter_size(table);
+ if (size < 0)
+ return size;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ bitmap = (void *)&ncf->bitmap;
+ index = -1;
+ while ((index = find_next_bit(bitmap, ncf->total, index + 1))
+ < ncf->total) {
+ if (!memcmp(ncf->data + size * index, data, size)) {
+ spin_unlock_irqrestore(&nc->lock, flags);
+ return index;
+ }
+ }
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ return -ENOENT;
+}
+
+int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data)
+{
+ struct ncsi_channel_filter *ncf;
+ int index, size;
+ void *bitmap;
+ unsigned long flags;
+
+ size = ncsi_filter_size(table);
+ if (size < 0)
+ return size;
+
+ index = ncsi_find_filter(nc, table, data);
+ if (index >= 0)
+ return index;
+
+ ncf = nc->filters[table];
+ if (!ncf)
+ return -ENODEV;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ bitmap = (void *)&ncf->bitmap;
+ do {
+ index = find_next_zero_bit(bitmap, ncf->total, 0);
+ if (index >= ncf->total) {
+ spin_unlock_irqrestore(&nc->lock, flags);
+ return -ENOSPC;
+ }
+ } while (test_and_set_bit(index, bitmap));
+
+ memcpy(ncf->data + size * index, data, size);
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ return index;
+}
+
+int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index)
+{
+ struct ncsi_channel_filter *ncf;
+ int size;
+ void *bitmap;
+ unsigned long flags;
+
+ size = ncsi_filter_size(table);
+ if (size < 0)
+ return size;
+
+ ncf = nc->filters[table];
+ if (!ncf || index >= ncf->total)
+ return -ENODEV;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ bitmap = (void *)&ncf->bitmap;
+ if (test_and_clear_bit(index, bitmap))
+ memset(ncf->data + size * index, 0, size);
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ return 0;
+}
+
+static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
+{
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+
+ nd->state = ncsi_dev_state_functional;
+ if (force_down) {
+ nd->link_up = 0;
+ goto report;
+ }
+
+ nd->link_up = 0;
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (!list_empty(&nc->link) ||
+ nc->state != NCSI_CHANNEL_ACTIVE)
+ continue;
+
+ if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+ nd->link_up = 1;
+ goto report;
+ }
+ }
+ }
+
+report:
+ nd->handler(nd);
+}
+
+static void ncsi_channel_monitor(unsigned long data)
+{
+ struct ncsi_channel *nc = (struct ncsi_channel *)data;
+ struct ncsi_package *np = nc->package;
+ struct ncsi_dev_priv *ndp = np->ndp;
+ struct ncsi_cmd_arg nca;
+ bool enabled;
+ unsigned int timeout;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ timeout = nc->timeout;
+ enabled = nc->enabled;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ if (!enabled || !list_empty(&nc->link))
+ return;
+ if (nc->state != NCSI_CHANNEL_INACTIVE &&
+ nc->state != NCSI_CHANNEL_ACTIVE)
+ return;
+
+ if (!(timeout % 2)) {
+ nca.ndp = ndp;
+ nca.package = np->id;
+ nca.channel = nc->id;
+ nca.type = NCSI_PKT_CMD_GLS;
+ nca.driven = false;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret) {
+ netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
+ ret);
+ return;
+ }
+ }
+
+ if (timeout + 1 >= 3) {
+ if (!(ndp->flags & NCSI_DEV_HWA) &&
+ nc->state == NCSI_CHANNEL_ACTIVE)
+ ncsi_report_link(ndp, true);
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ ncsi_process_next_channel(ndp);
+ return;
+ }
+
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->timeout = timeout + 1;
+ nc->enabled = true;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+}
+
+void ncsi_start_channel_monitor(struct ncsi_channel *nc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ WARN_ON_ONCE(nc->enabled);
+ nc->timeout = 0;
+ nc->enabled = true;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+}
+
+void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ if (!nc->enabled) {
+ spin_unlock_irqrestore(&nc->lock, flags);
+ return;
+ }
+ nc->enabled = false;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ del_timer_sync(&nc->timer);
+}
+
+struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
+ unsigned char id)
+{
+ struct ncsi_channel *nc;
+
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (nc->id == id)
+ return nc;
+ }
+
+ return NULL;
+}
+
+struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
+{
+ struct ncsi_channel *nc, *tmp;
+ int index;
+ unsigned long flags;
+
+ nc = kzalloc(sizeof(*nc), GFP_ATOMIC);
+ if (!nc)
+ return NULL;
+
+ nc->id = id;
+ nc->package = np;
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ nc->enabled = false;
+ setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc);
+ spin_lock_init(&nc->lock);
+ INIT_LIST_HEAD(&nc->link);
+ for (index = 0; index < NCSI_CAP_MAX; index++)
+ nc->caps[index].index = index;
+ for (index = 0; index < NCSI_MODE_MAX; index++)
+ nc->modes[index].index = index;
+
+ spin_lock_irqsave(&np->lock, flags);
+ tmp = ncsi_find_channel(np, id);
+ if (tmp) {
+ spin_unlock_irqrestore(&np->lock, flags);
+ kfree(nc);
+ return tmp;
+ }
+
+ list_add_tail_rcu(&nc->node, &np->channels);
+ np->channel_num++;
+ spin_unlock_irqrestore(&np->lock, flags);
+
+ return nc;
+}
+
+static void ncsi_remove_channel(struct ncsi_channel *nc)
+{
+ struct ncsi_package *np = nc->package;
+ struct ncsi_channel_filter *ncf;
+ unsigned long flags;
+ int i;
+
+ /* Release filters */
+ spin_lock_irqsave(&nc->lock, flags);
+ for (i = 0; i < NCSI_FILTER_MAX; i++) {
+ ncf = nc->filters[i];
+ if (!ncf)
+ continue;
+
+ nc->filters[i] = NULL;
+ kfree(ncf);
+ }
+
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ ncsi_stop_channel_monitor(nc);
+
+ /* Remove and free channel */
+ spin_lock_irqsave(&np->lock, flags);
+ list_del_rcu(&nc->node);
+ np->channel_num--;
+ spin_unlock_irqrestore(&np->lock, flags);
+
+ kfree(nc);
+}
+
+struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp,
+ unsigned char id)
+{
+ struct ncsi_package *np;
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (np->id == id)
+ return np;
+ }
+
+ return NULL;
+}
+
+struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
+ unsigned char id)
+{
+ struct ncsi_package *np, *tmp;
+ unsigned long flags;
+
+ np = kzalloc(sizeof(*np), GFP_ATOMIC);
+ if (!np)
+ return NULL;
+
+ np->id = id;
+ np->ndp = ndp;
+ spin_lock_init(&np->lock);
+ INIT_LIST_HEAD(&np->channels);
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ tmp = ncsi_find_package(ndp, id);
+ if (tmp) {
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ kfree(np);
+ return tmp;
+ }
+
+ list_add_tail_rcu(&np->node, &ndp->packages);
+ ndp->package_num++;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ return np;
+}
+
+void ncsi_remove_package(struct ncsi_package *np)
+{
+ struct ncsi_dev_priv *ndp = np->ndp;
+ struct ncsi_channel *nc, *tmp;
+ unsigned long flags;
+
+ /* Release all child channels */
+ list_for_each_entry_safe(nc, tmp, &np->channels, node)
+ ncsi_remove_channel(nc);
+
+ /* Remove and free package */
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_del_rcu(&np->node);
+ ndp->package_num--;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ kfree(np);
+}
+
+void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp,
+ unsigned char id,
+ struct ncsi_package **np,
+ struct ncsi_channel **nc)
+{
+ struct ncsi_package *p;
+ struct ncsi_channel *c;
+
+ p = ncsi_find_package(ndp, NCSI_PACKAGE_INDEX(id));
+ c = p ? ncsi_find_channel(p, NCSI_CHANNEL_INDEX(id)) : NULL;
+
+ if (np)
+ *np = p;
+ if (nc)
+ *nc = c;
+}
+
+/* For two consecutive NCSI commands, the packet IDs shouldn't
+ * be same. Otherwise, the bogus response might be replied. So
+ * the available IDs are allocated in round-robin fashion.
+ */
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven)
+{
+ struct ncsi_request *nr = NULL;
+ int i, limit = ARRAY_SIZE(ndp->requests);
+ unsigned long flags;
+
+ /* Check if there is one available request until the ceiling */
+ spin_lock_irqsave(&ndp->lock, flags);
+ for (i = ndp->request_id; !nr && i < limit; i++) {
+ if (ndp->requests[i].used)
+ continue;
+
+ nr = &ndp->requests[i];
+ nr->used = true;
+ nr->driven = driven;
+ if (++ndp->request_id >= limit)
+ ndp->request_id = 0;
+ }
+
+ /* Fail back to check from the starting cursor */
+ for (i = 0; !nr && i < ndp->request_id; i++) {
+ if (ndp->requests[i].used)
+ continue;
+
+ nr = &ndp->requests[i];
+ nr->used = true;
+ nr->driven = driven;
+ if (++ndp->request_id >= limit)
+ ndp->request_id = 0;
+ }
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ return nr;
+}
+
+void ncsi_free_request(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct sk_buff *cmd, *rsp;
+ unsigned long flags;
+ bool driven;
+
+ if (nr->enabled) {
+ nr->enabled = false;
+ del_timer_sync(&nr->timer);
+ }
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ cmd = nr->cmd;
+ rsp = nr->rsp;
+ nr->cmd = NULL;
+ nr->rsp = NULL;
+ nr->used = false;
+ driven = nr->driven;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ if (driven && cmd && --ndp->pending_req_num == 0)
+ schedule_work(&ndp->work);
+
+ /* Release command and response */
+ consume_skb(cmd);
+ consume_skb(rsp);
+}
+
+struct ncsi_dev *ncsi_find_dev(struct net_device *dev)
+{
+ struct ncsi_dev_priv *ndp;
+
+ NCSI_FOR_EACH_DEV(ndp) {
+ if (ndp->ndev.dev == dev)
+ return &ndp->ndev;
+ }
+
+ return NULL;
+}
+
+static void ncsi_request_timeout(unsigned long data)
+{
+ struct ncsi_request *nr = (struct ncsi_request *)data;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ unsigned long flags;
+
+ /* If the request already had associated response,
+ * let the response handler to release it.
+ */
+ spin_lock_irqsave(&ndp->lock, flags);
+ nr->enabled = false;
+ if (nr->rsp || !nr->cmd) {
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ /* Release the request */
+ ncsi_free_request(nr);
+}
+
+static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct ncsi_package *np = ndp->active_package;
+ struct ncsi_channel *nc = ndp->active_channel;
+ struct ncsi_cmd_arg nca;
+ int ret;
+
+ nca.ndp = ndp;
+ nca.driven = true;
+ switch (nd->state) {
+ case ncsi_dev_state_suspend:
+ nd->state = ncsi_dev_state_suspend_select;
+ /* Fall through */
+ case ncsi_dev_state_suspend_select:
+ case ncsi_dev_state_suspend_dcnt:
+ case ncsi_dev_state_suspend_dc:
+ case ncsi_dev_state_suspend_deselect:
+ ndp->pending_req_num = 1;
+
+ np = ndp->active_package;
+ nc = ndp->active_channel;
+ nca.package = np->id;
+ if (nd->state == ncsi_dev_state_suspend_select) {
+ nca.type = NCSI_PKT_CMD_SP;
+ nca.channel = 0x1f;
+ if (ndp->flags & NCSI_DEV_HWA)
+ nca.bytes[0] = 0;
+ else
+ nca.bytes[0] = 1;
+ nd->state = ncsi_dev_state_suspend_dcnt;
+ } else if (nd->state == ncsi_dev_state_suspend_dcnt) {
+ nca.type = NCSI_PKT_CMD_DCNT;
+ nca.channel = nc->id;
+ nd->state = ncsi_dev_state_suspend_dc;
+ } else if (nd->state == ncsi_dev_state_suspend_dc) {
+ nca.type = NCSI_PKT_CMD_DC;
+ nca.channel = nc->id;
+ nca.bytes[0] = 1;
+ nd->state = ncsi_dev_state_suspend_deselect;
+ } else if (nd->state == ncsi_dev_state_suspend_deselect) {
+ nca.type = NCSI_PKT_CMD_DP;
+ nca.channel = 0x1f;
+ nd->state = ncsi_dev_state_suspend_done;
+ }
+
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret) {
+ nd->state = ncsi_dev_state_functional;
+ return;
+ }
+
+ break;
+ case ncsi_dev_state_suspend_done:
+ xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ ncsi_process_next_channel(ndp);
+
+ break;
+ default:
+ netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
+ nd->state);
+ }
+}
+
+static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct net_device *dev = nd->dev;
+ struct ncsi_package *np = ndp->active_package;
+ struct ncsi_channel *nc = ndp->active_channel;
+ struct ncsi_cmd_arg nca;
+ unsigned char index;
+ int ret;
+
+ nca.ndp = ndp;
+ nca.driven = true;
+ switch (nd->state) {
+ case ncsi_dev_state_config:
+ case ncsi_dev_state_config_sp:
+ ndp->pending_req_num = 1;
+
+ /* Select the specific package */
+ nca.type = NCSI_PKT_CMD_SP;
+ if (ndp->flags & NCSI_DEV_HWA)
+ nca.bytes[0] = 0;
+ else
+ nca.bytes[0] = 1;
+ nca.package = np->id;
+ nca.channel = 0x1f;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_config_cis;
+ break;
+ case ncsi_dev_state_config_cis:
+ ndp->pending_req_num = 1;
+
+ /* Clear initial state */
+ nca.type = NCSI_PKT_CMD_CIS;
+ nca.package = np->id;
+ nca.channel = nc->id;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_config_sma;
+ break;
+ case ncsi_dev_state_config_sma:
+ case ncsi_dev_state_config_ebf:
+#if IS_ENABLED(CONFIG_IPV6)
+ case ncsi_dev_state_config_egmf:
+#endif
+ case ncsi_dev_state_config_ecnt:
+ case ncsi_dev_state_config_ec:
+ case ncsi_dev_state_config_ae:
+ case ncsi_dev_state_config_gls:
+ ndp->pending_req_num = 1;
+
+ nca.package = np->id;
+ nca.channel = nc->id;
+
+ /* Use first entry in unicast filter table. Note that
+ * the MAC filter table starts from entry 1 instead of
+ * 0.
+ */
+ if (nd->state == ncsi_dev_state_config_sma) {
+ nca.type = NCSI_PKT_CMD_SMA;
+ for (index = 0; index < 6; index++)
+ nca.bytes[index] = dev->dev_addr[index];
+ nca.bytes[6] = 0x1;
+ nca.bytes[7] = 0x1;
+ nd->state = ncsi_dev_state_config_ebf;
+ } else if (nd->state == ncsi_dev_state_config_ebf) {
+ nca.type = NCSI_PKT_CMD_EBF;
+ nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
+ nd->state = ncsi_dev_state_config_ecnt;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ndp->inet6_addr_num > 0 &&
+ (nc->caps[NCSI_CAP_GENERIC].cap &
+ NCSI_CAP_GENERIC_MC))
+ nd->state = ncsi_dev_state_config_egmf;
+ else
+ nd->state = ncsi_dev_state_config_ecnt;
+ } else if (nd->state == ncsi_dev_state_config_egmf) {
+ nca.type = NCSI_PKT_CMD_EGMF;
+ nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
+ nd->state = ncsi_dev_state_config_ecnt;
+#endif /* CONFIG_IPV6 */
+ } else if (nd->state == ncsi_dev_state_config_ecnt) {
+ nca.type = NCSI_PKT_CMD_ECNT;
+ nd->state = ncsi_dev_state_config_ec;
+ } else if (nd->state == ncsi_dev_state_config_ec) {
+ /* Enable AEN if it's supported */
+ nca.type = NCSI_PKT_CMD_EC;
+ nd->state = ncsi_dev_state_config_ae;
+ if (!(nc->caps[NCSI_CAP_AEN].cap & NCSI_CAP_AEN_MASK))
+ nd->state = ncsi_dev_state_config_gls;
+ } else if (nd->state == ncsi_dev_state_config_ae) {
+ nca.type = NCSI_PKT_CMD_AE;
+ nca.bytes[0] = 0;
+ nca.dwords[1] = nc->caps[NCSI_CAP_AEN].cap;
+ nd->state = ncsi_dev_state_config_gls;
+ } else if (nd->state == ncsi_dev_state_config_gls) {
+ nca.type = NCSI_PKT_CMD_GLS;
+ nd->state = ncsi_dev_state_config_done;
+ }
+
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ break;
+ case ncsi_dev_state_config_done:
+ if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1)
+ xchg(&nc->state, NCSI_CHANNEL_ACTIVE);
+ else
+ xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+
+ ncsi_start_channel_monitor(nc);
+ ncsi_process_next_channel(ndp);
+ break;
+ default:
+ netdev_warn(dev, "Wrong NCSI state 0x%x in config\n",
+ nd->state);
+ }
+
+ return;
+
+error:
+ ncsi_report_link(ndp, true);
+}
+
+static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_package *np;
+ struct ncsi_channel *nc, *found;
+ struct ncsi_channel_mode *ncm;
+ unsigned long flags;
+
+ /* The search is done once an inactive channel with up
+ * link is found.
+ */
+ found = NULL;
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (!list_empty(&nc->link) ||
+ nc->state != NCSI_CHANNEL_INACTIVE)
+ continue;
+
+ if (!found)
+ found = nc;
+
+ ncm = &nc->modes[NCSI_MODE_LINK];
+ if (ncm->data[2] & 0x1) {
+ found = nc;
+ goto out;
+ }
+ }
+ }
+
+ if (!found) {
+ ncsi_report_link(ndp, true);
+ return -ENODEV;
+ }
+
+out:
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&found->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ return ncsi_process_next_channel(ndp);
+}
+
+static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ unsigned int cap;
+
+ /* The hardware arbitration is disabled if any one channel
+ * doesn't support explicitly.
+ */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ cap = nc->caps[NCSI_CAP_GENERIC].cap;
+ if (!(cap & NCSI_CAP_GENERIC_HWA) ||
+ (cap & NCSI_CAP_GENERIC_HWA_MASK) !=
+ NCSI_CAP_GENERIC_HWA_SUPPORT) {
+ ndp->flags &= ~NCSI_DEV_HWA;
+ return false;
+ }
+ }
+ }
+
+ ndp->flags |= NCSI_DEV_HWA;
+ return true;
+}
+
+static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ unsigned long flags;
+
+ /* Move all available channels to processing queue */
+ spin_lock_irqsave(&ndp->lock, flags);
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE ||
+ !list_empty(&nc->link));
+ ncsi_stop_channel_monitor(nc);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ }
+ }
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ /* We can have no channels in extremely case */
+ if (list_empty(&ndp->channel_queue)) {
+ ncsi_report_link(ndp, false);
+ return -ENOENT;
+ }
+
+ return ncsi_process_next_channel(ndp);
+}
+
+static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ struct ncsi_cmd_arg nca;
+ unsigned char index;
+ int ret;
+
+ nca.ndp = ndp;
+ nca.driven = true;
+ switch (nd->state) {
+ case ncsi_dev_state_probe:
+ nd->state = ncsi_dev_state_probe_deselect;
+ /* Fall through */
+ case ncsi_dev_state_probe_deselect:
+ ndp->pending_req_num = 8;
+
+ /* Deselect all possible packages */
+ nca.type = NCSI_PKT_CMD_DP;
+ nca.channel = 0x1f;
+ for (index = 0; index < 8; index++) {
+ nca.package = index;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ }
+
+ nd->state = ncsi_dev_state_probe_package;
+ break;
+ case ncsi_dev_state_probe_package:
+ ndp->pending_req_num = 16;
+
+ /* Select all possible packages */
+ nca.type = NCSI_PKT_CMD_SP;
+ nca.bytes[0] = 1;
+ nca.channel = 0x1f;
+ for (index = 0; index < 8; index++) {
+ nca.package = index;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ }
+
+ /* Disable all possible packages */
+ nca.type = NCSI_PKT_CMD_DP;
+ for (index = 0; index < 8; index++) {
+ nca.package = index;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ }
+
+ nd->state = ncsi_dev_state_probe_channel;
+ break;
+ case ncsi_dev_state_probe_channel:
+ if (!ndp->active_package)
+ ndp->active_package = list_first_or_null_rcu(
+ &ndp->packages, struct ncsi_package, node);
+ else if (list_is_last(&ndp->active_package->node,
+ &ndp->packages))
+ ndp->active_package = NULL;
+ else
+ ndp->active_package = list_next_entry(
+ ndp->active_package, node);
+
+ /* All available packages and channels are enumerated. The
+ * enumeration happens for once when the NCSI interface is
+ * started. So we need continue to start the interface after
+ * the enumeration.
+ *
+ * We have to choose an active channel before configuring it.
+ * Note that we possibly don't have active channel in extreme
+ * situation.
+ */
+ if (!ndp->active_package) {
+ ndp->flags |= NCSI_DEV_PROBED;
+ if (ncsi_check_hwa(ndp))
+ ncsi_enable_hwa(ndp);
+ else
+ ncsi_choose_active_channel(ndp);
+ return;
+ }
+
+ /* Select the active package */
+ ndp->pending_req_num = 1;
+ nca.type = NCSI_PKT_CMD_SP;
+ nca.bytes[0] = 1;
+ nca.package = ndp->active_package->id;
+ nca.channel = 0x1f;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_probe_cis;
+ break;
+ case ncsi_dev_state_probe_cis:
+ ndp->pending_req_num = 32;
+
+ /* Clear initial state */
+ nca.type = NCSI_PKT_CMD_CIS;
+ nca.package = ndp->active_package->id;
+ for (index = 0; index < 0x20; index++) {
+ nca.channel = index;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ }
+
+ nd->state = ncsi_dev_state_probe_gvi;
+ break;
+ case ncsi_dev_state_probe_gvi:
+ case ncsi_dev_state_probe_gc:
+ case ncsi_dev_state_probe_gls:
+ np = ndp->active_package;
+ ndp->pending_req_num = np->channel_num;
+
+ /* Retrieve version, capability or link status */
+ if (nd->state == ncsi_dev_state_probe_gvi)
+ nca.type = NCSI_PKT_CMD_GVI;
+ else if (nd->state == ncsi_dev_state_probe_gc)
+ nca.type = NCSI_PKT_CMD_GC;
+ else
+ nca.type = NCSI_PKT_CMD_GLS;
+
+ nca.package = np->id;
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ nca.channel = nc->id;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+ }
+
+ if (nd->state == ncsi_dev_state_probe_gvi)
+ nd->state = ncsi_dev_state_probe_gc;
+ else if (nd->state == ncsi_dev_state_probe_gc)
+ nd->state = ncsi_dev_state_probe_gls;
+ else
+ nd->state = ncsi_dev_state_probe_dp;
+ break;
+ case ncsi_dev_state_probe_dp:
+ ndp->pending_req_num = 1;
+
+ /* Deselect the active package */
+ nca.type = NCSI_PKT_CMD_DP;
+ nca.package = ndp->active_package->id;
+ nca.channel = 0x1f;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ /* Scan channels in next package */
+ nd->state = ncsi_dev_state_probe_channel;
+ break;
+ default:
+ netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n",
+ nd->state);
+ }
+
+ return;
+error:
+ ncsi_report_link(ndp, true);
+}
+
+static void ncsi_dev_work(struct work_struct *work)
+{
+ struct ncsi_dev_priv *ndp = container_of(work,
+ struct ncsi_dev_priv, work);
+ struct ncsi_dev *nd = &ndp->ndev;
+
+ switch (nd->state & ncsi_dev_state_major) {
+ case ncsi_dev_state_probe:
+ ncsi_probe_channel(ndp);
+ break;
+ case ncsi_dev_state_suspend:
+ ncsi_suspend_channel(ndp);
+ break;
+ case ncsi_dev_state_config:
+ ncsi_configure_channel(ndp);
+ break;
+ default:
+ netdev_warn(nd->dev, "Wrong NCSI state 0x%x in workqueue\n",
+ nd->state);
+ }
+}
+
+int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_channel *nc;
+ int old_state;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ nc = list_first_or_null_rcu(&ndp->channel_queue,
+ struct ncsi_channel, link);
+ if (nc) {
+ old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE);
+ list_del_init(&nc->link);
+ }
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ ndp->active_channel = nc;
+ ndp->active_package = nc ? nc->package : NULL;
+ if (!nc) {
+ if (ndp->flags & NCSI_DEV_RESHUFFLE) {
+ ndp->flags &= ~NCSI_DEV_RESHUFFLE;
+ return ncsi_choose_active_channel(ndp);
+ }
+
+ ncsi_report_link(ndp, false);
+ return -ENODEV;
+ }
+
+ switch (old_state) {
+ case NCSI_CHANNEL_INACTIVE:
+ ndp->ndev.state = ncsi_dev_state_config;
+ ncsi_configure_channel(ndp);
+ break;
+ case NCSI_CHANNEL_ACTIVE:
+ ndp->ndev.state = ncsi_dev_state_suspend;
+ ncsi_suspend_channel(ndp);
+ break;
+ default:
+ netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n",
+ nc->state, nc->package->id, nc->id);
+ ncsi_report_link(ndp, false);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ncsi_inet6addr_event(struct notifier_block *this,
+ unsigned long event, void *data)
+{
+ struct inet6_ifaddr *ifa = data;
+ struct net_device *dev = ifa->idev->dev;
+ struct ncsi_dev *nd = ncsi_find_dev(dev);
+ struct ncsi_dev_priv *ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ struct ncsi_cmd_arg nca;
+ bool action;
+ int ret;
+
+ if (!ndp || (ipv6_addr_type(&ifa->addr) &
+ (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)))
+ return NOTIFY_OK;
+
+ switch (event) {
+ case NETDEV_UP:
+ action = (++ndp->inet6_addr_num) == 1;
+ nca.type = NCSI_PKT_CMD_EGMF;
+ break;
+ case NETDEV_DOWN:
+ action = (--ndp->inet6_addr_num == 0);
+ nca.type = NCSI_PKT_CMD_DGMF;
+ break;
+ default:
+ return NOTIFY_OK;
+ }
+
+ /* We might not have active channel or packages. The IPv6
+ * required multicast will be enabled when active channel
+ * or packages are chosen.
+ */
+ np = ndp->active_package;
+ nc = ndp->active_channel;
+ if (!action || !np || !nc)
+ return NOTIFY_OK;
+
+ /* We needn't enable or disable it if the function isn't supported */
+ if (!(nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC))
+ return NOTIFY_OK;
+
+ nca.ndp = ndp;
+ nca.driven = false;
+ nca.package = np->id;
+ nca.channel = nc->id;
+ nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret) {
+ netdev_warn(dev, "Fail to %s global multicast filter (%d)\n",
+ (event == NETDEV_UP) ? "enable" : "disable", ret);
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block ncsi_inet6addr_notifier = {
+ .notifier_call = ncsi_inet6addr_event,
+};
+#endif /* CONFIG_IPV6 */
+
+struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+ void (*handler)(struct ncsi_dev *ndev))
+{
+ struct ncsi_dev_priv *ndp;
+ struct ncsi_dev *nd;
+ unsigned long flags;
+ int i;
+
+ /* Check if the device has been registered or not */
+ nd = ncsi_find_dev(dev);
+ if (nd)
+ return nd;
+
+ /* Create NCSI device */
+ ndp = kzalloc(sizeof(*ndp), GFP_ATOMIC);
+ if (!ndp)
+ return NULL;
+
+ nd = &ndp->ndev;
+ nd->state = ncsi_dev_state_registered;
+ nd->dev = dev;
+ nd->handler = handler;
+ ndp->pending_req_num = 0;
+ INIT_LIST_HEAD(&ndp->channel_queue);
+ INIT_WORK(&ndp->work, ncsi_dev_work);
+
+ /* Initialize private NCSI device */
+ spin_lock_init(&ndp->lock);
+ INIT_LIST_HEAD(&ndp->packages);
+ ndp->request_id = 0;
+ for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) {
+ ndp->requests[i].id = i;
+ ndp->requests[i].ndp = ndp;
+ setup_timer(&ndp->requests[i].timer,
+ ncsi_request_timeout,
+ (unsigned long)&ndp->requests[i]);
+ }
+
+ spin_lock_irqsave(&ncsi_dev_lock, flags);
+#if IS_ENABLED(CONFIG_IPV6)
+ ndp->inet6_addr_num = 0;
+ if (list_empty(&ncsi_dev_list))
+ register_inet6addr_notifier(&ncsi_inet6addr_notifier);
+#endif
+ list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
+ spin_unlock_irqrestore(&ncsi_dev_lock, flags);
+
+ /* Register NCSI packet Rx handler */
+ ndp->ptype.type = cpu_to_be16(ETH_P_NCSI);
+ ndp->ptype.func = ncsi_rcv_rsp;
+ ndp->ptype.dev = dev;
+ dev_add_pack(&ndp->ptype);
+
+ return nd;
+}
+EXPORT_SYMBOL_GPL(ncsi_register_dev);
+
+int ncsi_start_dev(struct ncsi_dev *nd)
+{
+ struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ int old_state, ret;
+
+ if (nd->state != ncsi_dev_state_registered &&
+ nd->state != ncsi_dev_state_functional)
+ return -ENOTTY;
+
+ if (!(ndp->flags & NCSI_DEV_PROBED)) {
+ nd->state = ncsi_dev_state_probe;
+ schedule_work(&ndp->work);
+ return 0;
+ }
+
+ /* Reset channel's state and start over */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ WARN_ON_ONCE(!list_empty(&nc->link) ||
+ old_state == NCSI_CHANNEL_INVISIBLE);
+ }
+ }
+
+ if (ndp->flags & NCSI_DEV_HWA)
+ ret = ncsi_enable_hwa(ndp);
+ else
+ ret = ncsi_choose_active_channel(ndp);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ncsi_start_dev);
+
+void ncsi_unregister_dev(struct ncsi_dev *nd)
+{
+ struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+ struct ncsi_package *np, *tmp;
+ unsigned long flags;
+
+ dev_remove_pack(&ndp->ptype);
+
+ list_for_each_entry_safe(np, tmp, &ndp->packages, node)
+ ncsi_remove_package(np);
+
+ spin_lock_irqsave(&ncsi_dev_lock, flags);
+ list_del_rcu(&ndp->node);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (list_empty(&ncsi_dev_list))
+ unregister_inet6addr_notifier(&ncsi_inet6addr_notifier);
+#endif
+ spin_unlock_irqrestore(&ncsi_dev_lock, flags);
+
+ kfree(ndp);
+}
+EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
new file mode 100644
index 000000000000..3ea49ed0a935
--- /dev/null
+++ b/net/ncsi/ncsi-pkt.h
@@ -0,0 +1,415 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_PKT_H__
+#define __NCSI_PKT_H__
+
+struct ncsi_pkt_hdr {
+ unsigned char mc_id; /* Management controller ID */
+ unsigned char revision; /* NCSI version - 0x01 */
+ unsigned char reserved; /* Reserved */
+ unsigned char id; /* Packet sequence number */
+ unsigned char type; /* Packet type */
+ unsigned char channel; /* Network controller ID */
+ __be16 length; /* Payload length */
+ __be32 reserved1[2]; /* Reserved */
+};
+
+struct ncsi_cmd_pkt_hdr {
+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+};
+
+struct ncsi_rsp_pkt_hdr {
+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+ __be16 code; /* Response code */
+ __be16 reason; /* Response reason */
+};
+
+struct ncsi_aen_pkt_hdr {
+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+ unsigned char reserved2[3]; /* Reserved */
+ unsigned char type; /* AEN packet type */
+};
+
+/* NCSI common command packet */
+struct ncsi_cmd_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[26];
+};
+
+struct ncsi_rsp_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Select Package */
+struct ncsi_cmd_sp_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ unsigned char reserved[3]; /* Reserved */
+ unsigned char hw_arbitration; /* HW arbitration */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Disable Channel */
+struct ncsi_cmd_dc_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ unsigned char reserved[3]; /* Reserved */
+ unsigned char ald; /* Allow link down */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Reset Channel */
+struct ncsi_cmd_rc_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be32 reserved; /* Reserved */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* AEN Enable */
+struct ncsi_cmd_ae_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ unsigned char reserved[3]; /* Reserved */
+ unsigned char mc_id; /* MC ID */
+ __be32 mode; /* AEN working mode */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[18];
+};
+
+/* Set Link */
+struct ncsi_cmd_sl_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be32 mode; /* Link working mode */
+ __be32 oem_mode; /* OEM link mode */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[18];
+};
+
+/* Set VLAN Filter */
+struct ncsi_cmd_svf_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be16 reserved; /* Reserved */
+ __be16 vlan; /* VLAN ID */
+ __be16 reserved1; /* Reserved */
+ unsigned char index; /* VLAN table index */
+ unsigned char enable; /* Enable or disable */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[14];
+};
+
+/* Enable VLAN */
+struct ncsi_cmd_ev_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ unsigned char reserved[3]; /* Reserved */
+ unsigned char mode; /* VLAN filter mode */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Set MAC Address */
+struct ncsi_cmd_sma_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ unsigned char mac[6]; /* MAC address */
+ unsigned char index; /* MAC table index */
+ unsigned char at_e; /* Addr type and operation */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[18];
+};
+
+/* Enable Broadcast Filter */
+struct ncsi_cmd_ebf_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be32 mode; /* Filter mode */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Enable Global Multicast Filter */
+struct ncsi_cmd_egmf_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be32 mode; /* Global MC mode */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Set NCSI Flow Control */
+struct ncsi_cmd_snfc_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ unsigned char reserved[3]; /* Reserved */
+ unsigned char mode; /* Flow control mode */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* Get Link Status */
+struct ncsi_rsp_gls_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 status; /* Link status */
+ __be32 other; /* Other indications */
+ __be32 oem_status; /* OEM link status */
+ __be32 checksum;
+ unsigned char pad[10];
+};
+
+/* Get Version ID */
+struct ncsi_rsp_gvi_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 ncsi_version; /* NCSI version */
+ unsigned char reserved[3]; /* Reserved */
+ unsigned char alpha2; /* NCSI version */
+ unsigned char fw_name[12]; /* f/w name string */
+ __be32 fw_version; /* f/w version */
+ __be16 pci_ids[4]; /* PCI IDs */
+ __be32 mf_id; /* Manufacture ID */
+ __be32 checksum;
+};
+
+/* Get Capabilities */
+struct ncsi_rsp_gc_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 cap; /* Capabilities */
+ __be32 bc_cap; /* Broadcast cap */
+ __be32 mc_cap; /* Multicast cap */
+ __be32 buf_cap; /* Buffering cap */
+ __be32 aen_cap; /* AEN cap */
+ unsigned char vlan_cnt; /* VLAN filter count */
+ unsigned char mixed_cnt; /* Mix filter count */
+ unsigned char mc_cnt; /* MC filter count */
+ unsigned char uc_cnt; /* UC filter count */
+ unsigned char reserved[2]; /* Reserved */
+ unsigned char vlan_mode; /* VLAN mode */
+ unsigned char channel_cnt; /* Channel count */
+ __be32 checksum; /* Checksum */
+};
+
+/* Get Parameters */
+struct ncsi_rsp_gp_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ unsigned char mac_cnt; /* Number of MAC addr */
+ unsigned char reserved[2]; /* Reserved */
+ unsigned char mac_enable; /* MAC addr enable flags */
+ unsigned char vlan_cnt; /* VLAN tag count */
+ unsigned char reserved1; /* Reserved */
+ __be16 vlan_enable; /* VLAN tag enable flags */
+ __be32 link_mode; /* Link setting */
+ __be32 bc_mode; /* BC filter mode */
+ __be32 valid_modes; /* Valid mode parameters */
+ unsigned char vlan_mode; /* VLAN mode */
+ unsigned char fc_mode; /* Flow control mode */
+ unsigned char reserved2[2]; /* Reserved */
+ __be32 aen_mode; /* AEN mode */
+ unsigned char mac[6]; /* Supported MAC addr */
+ __be16 vlan; /* Supported VLAN tags */
+ __be32 checksum; /* Checksum */
+};
+
+/* Get Controller Packet Statistics */
+struct ncsi_rsp_gcps_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 cnt_hi; /* Counter cleared */
+ __be32 cnt_lo; /* Counter cleared */
+ __be32 rx_bytes; /* Rx bytes */
+ __be32 tx_bytes; /* Tx bytes */
+ __be32 rx_uc_pkts; /* Rx UC packets */
+ __be32 rx_mc_pkts; /* Rx MC packets */
+ __be32 rx_bc_pkts; /* Rx BC packets */
+ __be32 tx_uc_pkts; /* Tx UC packets */
+ __be32 tx_mc_pkts; /* Tx MC packets */
+ __be32 tx_bc_pkts; /* Tx BC packets */
+ __be32 fcs_err; /* FCS errors */
+ __be32 align_err; /* Alignment errors */
+ __be32 false_carrier; /* False carrier detection */
+ __be32 runt_pkts; /* Rx runt packets */
+ __be32 jabber_pkts; /* Rx jabber packets */
+ __be32 rx_pause_xon; /* Rx pause XON frames */
+ __be32 rx_pause_xoff; /* Rx XOFF frames */
+ __be32 tx_pause_xon; /* Tx XON frames */
+ __be32 tx_pause_xoff; /* Tx XOFF frames */
+ __be32 tx_s_collision; /* Single collision frames */
+ __be32 tx_m_collision; /* Multiple collision frames */
+ __be32 l_collision; /* Late collision frames */
+ __be32 e_collision; /* Excessive collision frames */
+ __be32 rx_ctl_frames; /* Rx control frames */
+ __be32 rx_64_frames; /* Rx 64-bytes frames */
+ __be32 rx_127_frames; /* Rx 65-127 bytes frames */
+ __be32 rx_255_frames; /* Rx 128-255 bytes frames */
+ __be32 rx_511_frames; /* Rx 256-511 bytes frames */
+ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */
+ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */
+ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */
+ __be32 tx_64_frames; /* Tx 64-bytes frames */
+ __be32 tx_127_frames; /* Tx 65-127 bytes frames */
+ __be32 tx_255_frames; /* Tx 128-255 bytes frames */
+ __be32 tx_511_frames; /* Tx 256-511 bytes frames */
+ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */
+ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */
+ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */
+ __be32 rx_valid_bytes; /* Rx valid bytes */
+ __be32 rx_runt_pkts; /* Rx error runt packets */
+ __be32 rx_jabber_pkts; /* Rx error jabber packets */
+ __be32 checksum; /* Checksum */
+};
+
+/* Get NCSI Statistics */
+struct ncsi_rsp_gns_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 rx_cmds; /* Rx NCSI commands */
+ __be32 dropped_cmds; /* Dropped commands */
+ __be32 cmd_type_errs; /* Command type errors */
+ __be32 cmd_csum_errs; /* Command checksum errors */
+ __be32 rx_pkts; /* Rx NCSI packets */
+ __be32 tx_pkts; /* Tx NCSI packets */
+ __be32 tx_aen_pkts; /* Tx AEN packets */
+ __be32 checksum; /* Checksum */
+};
+
+/* Get NCSI Pass-through Statistics */
+struct ncsi_rsp_gnpts_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 tx_pkts; /* Tx packets */
+ __be32 tx_dropped; /* Tx dropped packets */
+ __be32 tx_channel_err; /* Tx channel errors */
+ __be32 tx_us_err; /* Tx undersize errors */
+ __be32 rx_pkts; /* Rx packets */
+ __be32 rx_dropped; /* Rx dropped packets */
+ __be32 rx_channel_err; /* Rx channel errors */
+ __be32 rx_us_err; /* Rx undersize errors */
+ __be32 rx_os_err; /* Rx oversize errors */
+ __be32 checksum; /* Checksum */
+};
+
+/* Get package status */
+struct ncsi_rsp_gps_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ __be32 status; /* Hardware arbitration status */
+ __be32 checksum;
+};
+
+/* Get package UUID */
+struct ncsi_rsp_gpuuid_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */
+ unsigned char uuid[16]; /* UUID */
+ __be32 checksum;
+};
+
+/* AEN: Link State Change */
+struct ncsi_aen_lsc_pkt {
+ struct ncsi_aen_pkt_hdr aen; /* AEN header */
+ __be32 status; /* Link status */
+ __be32 oem_status; /* OEM link status */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[14];
+};
+
+/* AEN: Configuration Required */
+struct ncsi_aen_cr_pkt {
+ struct ncsi_aen_pkt_hdr aen; /* AEN header */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[22];
+};
+
+/* AEN: Host Network Controller Driver Status Change */
+struct ncsi_aen_hncdsc_pkt {
+ struct ncsi_aen_pkt_hdr aen; /* AEN header */
+ __be32 status; /* Status */
+ __be32 checksum; /* Checksum */
+ unsigned char pad[18];
+};
+
+/* NCSI packet revision */
+#define NCSI_PKT_REVISION 0x01
+
+/* NCSI packet commands */
+#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */
+#define NCSI_PKT_CMD_SP 0x01 /* Select Package */
+#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */
+#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */
+#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */
+#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */
+#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */
+#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */
+#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */
+#define NCSI_PKT_CMD_SL 0x09 /* Set Link */
+#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */
+#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */
+#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */
+#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */
+#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */
+#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */
+#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */
+#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */
+#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */
+#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */
+#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */
+#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */
+#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */
+#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */
+#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */
+#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */
+#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */
+#define NCSI_PKT_CMD_OEM 0x50 /* OEM */
+#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */
+#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */
+
+/* NCSI packet responses */
+#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80)
+#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80)
+#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80)
+#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80)
+#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80)
+#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80)
+#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80)
+#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80)
+#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80)
+#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80)
+#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80)
+#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80)
+#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80)
+#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80)
+#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80)
+#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80)
+#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80)
+#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80)
+#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80)
+#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80)
+#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80)
+#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80)
+#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80)
+#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80)
+#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80)
+#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80)
+#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80)
+#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80)
+#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80)
+#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80)
+
+/* NCSI response code/reason */
+#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */
+#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */
+#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */
+#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */
+#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */
+#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */
+#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */
+#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */
+#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */
+#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */
+#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */
+
+/* NCSI AEN packet type */
+#define NCSI_PKT_AEN 0xFF /* AEN Packet */
+#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */
+#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */
+#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */
+
+#endif /* __NCSI_PKT_H__ */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
new file mode 100644
index 000000000000..af84389a6bf1
--- /dev/null
+++ b/net/ncsi/ncsi-rsp.c
@@ -0,0 +1,1035 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/ncsi.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+#include "ncsi-pkt.h"
+
+static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
+ unsigned short payload)
+{
+ struct ncsi_rsp_pkt_hdr *h;
+ u32 checksum;
+ __be32 *pchecksum;
+
+ /* Check NCSI packet header. We don't need validate
+ * the packet type, which should have been checked
+ * before calling this function.
+ */
+ h = (struct ncsi_rsp_pkt_hdr *)skb_network_header(nr->rsp);
+ if (h->common.revision != NCSI_PKT_REVISION)
+ return -EINVAL;
+ if (ntohs(h->common.length) != payload)
+ return -EINVAL;
+
+ /* Check on code and reason */
+ if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED ||
+ ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR)
+ return -EINVAL;
+
+ /* Validate checksum, which might be zeroes if the
+ * sender doesn't support checksum according to NCSI
+ * specification.
+ */
+ pchecksum = (__be32 *)((void *)(h + 1) + payload - 4);
+ if (ntohl(*pchecksum) == 0)
+ return 0;
+
+ checksum = ncsi_calculate_checksum((unsigned char *)h,
+ sizeof(*h) + payload - 4);
+ if (*pchecksum != htonl(checksum))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_cis(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ unsigned char id;
+
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, &np, &nc);
+ if (!nc) {
+ if (ndp->flags & NCSI_DEV_PROBED)
+ return -ENXIO;
+
+ id = NCSI_CHANNEL_INDEX(rsp->rsp.common.channel);
+ nc = ncsi_add_channel(np, id);
+ }
+
+ return nc ? 0 : -ENODEV;
+}
+
+static int ncsi_rsp_handler_sp(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+ unsigned char id;
+
+ /* Add the package if it's not existing. Otherwise,
+ * to change the state of its child channels.
+ */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ &np, NULL);
+ if (!np) {
+ if (ndp->flags & NCSI_DEV_PROBED)
+ return -ENXIO;
+
+ id = NCSI_PACKAGE_INDEX(rsp->rsp.common.channel);
+ np = ncsi_add_package(ndp, id);
+ if (!np)
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_dp(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ unsigned long flags;
+
+ /* Find the package */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ &np, NULL);
+ if (!np)
+ return -ENODEV;
+
+ /* Change state of all channels attached to the package */
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ }
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ ncm = &nc->modes[NCSI_MODE_ENABLE];
+ if (ncm->enable)
+ return -EBUSY;
+
+ ncm->enable = 1;
+ return 0;
+}
+
+static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+ int ret;
+
+ ret = ncsi_validate_rsp_pkt(nr, 4);
+ if (ret)
+ return ret;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ ncm = &nc->modes[NCSI_MODE_ENABLE];
+ if (!ncm->enable)
+ return -EBUSY;
+
+ ncm->enable = 0;
+ return 0;
+}
+
+static int ncsi_rsp_handler_rc(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ unsigned long flags;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update state for the specified channel */
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+ if (ncm->enable)
+ return -EBUSY;
+
+ ncm->enable = 1;
+ return 0;
+}
+
+static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+ if (!ncm->enable)
+ return -EBUSY;
+
+ ncm->enable = 1;
+ return 0;
+}
+
+static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_ae_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if the AEN has been enabled */
+ ncm = &nc->modes[NCSI_MODE_AEN];
+ if (ncm->enable)
+ return -EBUSY;
+
+ /* Update to AEN configuration */
+ cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
+ ncm->enable = 1;
+ ncm->data[0] = cmd->mc_id;
+ ncm->data[1] = ntohl(cmd->mode);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_sl(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_sl_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ cmd = (struct ncsi_cmd_sl_pkt *)skb_network_header(nr->cmd);
+ ncm = &nc->modes[NCSI_MODE_LINK];
+ ncm->data[0] = ntohl(cmd->mode);
+ ncm->data[1] = ntohl(cmd->oem_mode);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gls(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gls_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+ unsigned long flags;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_gls_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ ncm = &nc->modes[NCSI_MODE_LINK];
+ ncm->data[2] = ntohl(rsp->status);
+ ncm->data[3] = ntohl(rsp->other);
+ ncm->data[4] = ntohl(rsp->oem_status);
+
+ if (nr->driven)
+ return 0;
+
+ /* Reset the channel monitor if it has been enabled */
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->timeout = 0;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_svf_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_filter *ncf;
+ unsigned short vlan;
+ int ret;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd);
+ ncf = nc->filters[NCSI_FILTER_VLAN];
+ if (!ncf)
+ return -ENOENT;
+ if (cmd->index >= ncf->total)
+ return -ERANGE;
+
+ /* Add or remove the VLAN filter */
+ if (!(cmd->enable & 0x1)) {
+ ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index);
+ } else {
+ vlan = ntohs(cmd->vlan);
+ ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+ }
+
+ return ret;
+}
+
+static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_ev_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if VLAN mode has been enabled */
+ ncm = &nc->modes[NCSI_MODE_VLAN];
+ if (ncm->enable)
+ return -EBUSY;
+
+ /* Update to VLAN mode */
+ cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
+ ncm->enable = 1;
+ ncm->data[0] = ntohl(cmd->mode);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if VLAN mode has been enabled */
+ ncm = &nc->modes[NCSI_MODE_VLAN];
+ if (!ncm->enable)
+ return -EBUSY;
+
+ /* Update to VLAN mode */
+ ncm->enable = 0;
+ return 0;
+}
+
+static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_sma_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_filter *ncf;
+ void *bitmap;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* According to NCSI spec 1.01, the mixed filter table
+ * isn't supported yet.
+ */
+ cmd = (struct ncsi_cmd_sma_pkt *)skb_network_header(nr->cmd);
+ switch (cmd->at_e >> 5) {
+ case 0x0: /* UC address */
+ ncf = nc->filters[NCSI_FILTER_UC];
+ break;
+ case 0x1: /* MC address */
+ ncf = nc->filters[NCSI_FILTER_MC];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Sanity check on the filter */
+ if (!ncf)
+ return -ENOENT;
+ else if (cmd->index >= ncf->total)
+ return -ERANGE;
+
+ bitmap = &ncf->bitmap;
+ if (cmd->at_e & 0x1) {
+ if (test_and_set_bit(cmd->index, bitmap))
+ return -EBUSY;
+ memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
+ } else {
+ if (!test_and_clear_bit(cmd->index, bitmap))
+ return -EBUSY;
+
+ memset(ncf->data + 6 * cmd->index, 0, 6);
+ }
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_ebf_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the package and channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if broadcast filter has been enabled */
+ ncm = &nc->modes[NCSI_MODE_BC];
+ if (ncm->enable)
+ return -EBUSY;
+
+ /* Update to broadcast filter mode */
+ cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
+ ncm->enable = 1;
+ ncm->data[0] = ntohl(cmd->mode);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if broadcast filter isn't enabled */
+ ncm = &nc->modes[NCSI_MODE_BC];
+ if (!ncm->enable)
+ return -EBUSY;
+
+ /* Update to broadcast filter mode */
+ ncm->enable = 0;
+ ncm->data[0] = 0;
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_egmf_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if multicast filter has been enabled */
+ ncm = &nc->modes[NCSI_MODE_MC];
+ if (ncm->enable)
+ return -EBUSY;
+
+ /* Update to multicast filter mode */
+ cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
+ ncm->enable = 1;
+ ncm->data[0] = ntohl(cmd->mode);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if multicast filter has been enabled */
+ ncm = &nc->modes[NCSI_MODE_MC];
+ if (!ncm->enable)
+ return -EBUSY;
+
+ /* Update to multicast filter mode */
+ ncm->enable = 0;
+ ncm->data[0] = 0;
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
+{
+ struct ncsi_cmd_snfc_pkt *cmd;
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_mode *ncm;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Check if flow control has been enabled */
+ ncm = &nc->modes[NCSI_MODE_FC];
+ if (ncm->enable)
+ return -EBUSY;
+
+ /* Update to flow control mode */
+ cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
+ ncm->enable = 1;
+ ncm->data[0] = cmd->mode;
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gvi_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_version *ncv;
+ int i;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gvi_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update to channel's version info */
+ ncv = &nc->version;
+ ncv->version = ntohl(rsp->ncsi_version);
+ ncv->alpha2 = rsp->alpha2;
+ memcpy(ncv->fw_name, rsp->fw_name, 12);
+ ncv->fw_version = ntohl(rsp->fw_version);
+ for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++)
+ ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]);
+ ncv->mf_id = ntohl(rsp->mf_id);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gc_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_filter *ncf;
+ size_t size, entry_size;
+ int cnt, i;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update channel's capabilities */
+ nc->caps[NCSI_CAP_GENERIC].cap = ntohl(rsp->cap) &
+ NCSI_CAP_GENERIC_MASK;
+ nc->caps[NCSI_CAP_BC].cap = ntohl(rsp->bc_cap) &
+ NCSI_CAP_BC_MASK;
+ nc->caps[NCSI_CAP_MC].cap = ntohl(rsp->mc_cap) &
+ NCSI_CAP_MC_MASK;
+ nc->caps[NCSI_CAP_BUFFER].cap = ntohl(rsp->buf_cap);
+ nc->caps[NCSI_CAP_AEN].cap = ntohl(rsp->aen_cap) &
+ NCSI_CAP_AEN_MASK;
+ nc->caps[NCSI_CAP_VLAN].cap = rsp->vlan_mode &
+ NCSI_CAP_VLAN_MASK;
+
+ /* Build filters */
+ for (i = 0; i < NCSI_FILTER_MAX; i++) {
+ switch (i) {
+ case NCSI_FILTER_VLAN:
+ cnt = rsp->vlan_cnt;
+ entry_size = 2;
+ break;
+ case NCSI_FILTER_MIXED:
+ cnt = rsp->mixed_cnt;
+ entry_size = 6;
+ break;
+ case NCSI_FILTER_MC:
+ cnt = rsp->mc_cnt;
+ entry_size = 6;
+ break;
+ case NCSI_FILTER_UC:
+ cnt = rsp->uc_cnt;
+ entry_size = 6;
+ break;
+ default:
+ continue;
+ }
+
+ if (!cnt || nc->filters[i])
+ continue;
+
+ size = sizeof(*ncf) + cnt * entry_size;
+ ncf = kzalloc(size, GFP_ATOMIC);
+ if (!ncf) {
+ pr_warn("%s: Cannot alloc filter table (%d)\n",
+ __func__, i);
+ return -ENOMEM;
+ }
+
+ ncf->index = i;
+ ncf->total = cnt;
+ ncf->bitmap = 0x0ul;
+ nc->filters[i] = ncf;
+ }
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gp(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ unsigned short enable, vlan;
+ unsigned char *pdata;
+ int table, i;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Modes with explicit enabled indications */
+ if (ntohl(rsp->valid_modes) & 0x1) { /* BC filter mode */
+ nc->modes[NCSI_MODE_BC].enable = 1;
+ nc->modes[NCSI_MODE_BC].data[0] = ntohl(rsp->bc_mode);
+ }
+ if (ntohl(rsp->valid_modes) & 0x2) /* Channel enabled */
+ nc->modes[NCSI_MODE_ENABLE].enable = 1;
+ if (ntohl(rsp->valid_modes) & 0x4) /* Channel Tx enabled */
+ nc->modes[NCSI_MODE_TX_ENABLE].enable = 1;
+ if (ntohl(rsp->valid_modes) & 0x8) /* MC filter mode */
+ nc->modes[NCSI_MODE_MC].enable = 1;
+
+ /* Modes without explicit enabled indications */
+ nc->modes[NCSI_MODE_LINK].enable = 1;
+ nc->modes[NCSI_MODE_LINK].data[0] = ntohl(rsp->link_mode);
+ nc->modes[NCSI_MODE_VLAN].enable = 1;
+ nc->modes[NCSI_MODE_VLAN].data[0] = rsp->vlan_mode;
+ nc->modes[NCSI_MODE_FC].enable = 1;
+ nc->modes[NCSI_MODE_FC].data[0] = rsp->fc_mode;
+ nc->modes[NCSI_MODE_AEN].enable = 1;
+ nc->modes[NCSI_MODE_AEN].data[0] = ntohl(rsp->aen_mode);
+
+ /* MAC addresses filter table */
+ pdata = (unsigned char *)rsp + 48;
+ enable = rsp->mac_enable;
+ for (i = 0; i < rsp->mac_cnt; i++, pdata += 6) {
+ if (i >= (nc->filters[NCSI_FILTER_UC]->total +
+ nc->filters[NCSI_FILTER_MC]->total))
+ table = NCSI_FILTER_MIXED;
+ else if (i >= nc->filters[NCSI_FILTER_UC]->total)
+ table = NCSI_FILTER_MC;
+ else
+ table = NCSI_FILTER_UC;
+
+ if (!(enable & (0x1 << i)))
+ continue;
+
+ if (ncsi_find_filter(nc, table, pdata) >= 0)
+ continue;
+
+ ncsi_add_filter(nc, table, pdata);
+ }
+
+ /* VLAN filter table */
+ enable = ntohs(rsp->vlan_enable);
+ for (i = 0; i < rsp->vlan_cnt; i++, pdata += 2) {
+ if (!(enable & (0x1 << i)))
+ continue;
+
+ vlan = ntohs(*(__be16 *)pdata);
+ if (ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan) >= 0)
+ continue;
+
+ ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+ }
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gcps_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_stats *ncs;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gcps_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update HNC's statistics */
+ ncs = &nc->stats;
+ ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi);
+ ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo);
+ ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes);
+ ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes);
+ ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts);
+ ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts);
+ ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts);
+ ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts);
+ ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts);
+ ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts);
+ ncs->hnc_fcs_err = ntohl(rsp->fcs_err);
+ ncs->hnc_align_err = ntohl(rsp->align_err);
+ ncs->hnc_false_carrier = ntohl(rsp->false_carrier);
+ ncs->hnc_runt_pkts = ntohl(rsp->runt_pkts);
+ ncs->hnc_jabber_pkts = ntohl(rsp->jabber_pkts);
+ ncs->hnc_rx_pause_xon = ntohl(rsp->rx_pause_xon);
+ ncs->hnc_rx_pause_xoff = ntohl(rsp->rx_pause_xoff);
+ ncs->hnc_tx_pause_xon = ntohl(rsp->tx_pause_xon);
+ ncs->hnc_tx_pause_xoff = ntohl(rsp->tx_pause_xoff);
+ ncs->hnc_tx_s_collision = ntohl(rsp->tx_s_collision);
+ ncs->hnc_tx_m_collision = ntohl(rsp->tx_m_collision);
+ ncs->hnc_l_collision = ntohl(rsp->l_collision);
+ ncs->hnc_e_collision = ntohl(rsp->e_collision);
+ ncs->hnc_rx_ctl_frames = ntohl(rsp->rx_ctl_frames);
+ ncs->hnc_rx_64_frames = ntohl(rsp->rx_64_frames);
+ ncs->hnc_rx_127_frames = ntohl(rsp->rx_127_frames);
+ ncs->hnc_rx_255_frames = ntohl(rsp->rx_255_frames);
+ ncs->hnc_rx_511_frames = ntohl(rsp->rx_511_frames);
+ ncs->hnc_rx_1023_frames = ntohl(rsp->rx_1023_frames);
+ ncs->hnc_rx_1522_frames = ntohl(rsp->rx_1522_frames);
+ ncs->hnc_rx_9022_frames = ntohl(rsp->rx_9022_frames);
+ ncs->hnc_tx_64_frames = ntohl(rsp->tx_64_frames);
+ ncs->hnc_tx_127_frames = ntohl(rsp->tx_127_frames);
+ ncs->hnc_tx_255_frames = ntohl(rsp->tx_255_frames);
+ ncs->hnc_tx_511_frames = ntohl(rsp->tx_511_frames);
+ ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames);
+ ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames);
+ ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames);
+ ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes);
+ ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts);
+ ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gns(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gns_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_stats *ncs;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gns_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update HNC's statistics */
+ ncs = &nc->stats;
+ ncs->ncsi_rx_cmds = ntohl(rsp->rx_cmds);
+ ncs->ncsi_dropped_cmds = ntohl(rsp->dropped_cmds);
+ ncs->ncsi_cmd_type_errs = ntohl(rsp->cmd_type_errs);
+ ncs->ncsi_cmd_csum_errs = ntohl(rsp->cmd_csum_errs);
+ ncs->ncsi_rx_pkts = ntohl(rsp->rx_pkts);
+ ncs->ncsi_tx_pkts = ntohl(rsp->tx_pkts);
+ ncs->ncsi_tx_aen_pkts = ntohl(rsp->tx_aen_pkts);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gnpts(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gnpts_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
+ struct ncsi_channel_stats *ncs;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gnpts_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ NULL, &nc);
+ if (!nc)
+ return -ENODEV;
+
+ /* Update HNC's statistics */
+ ncs = &nc->stats;
+ ncs->pt_tx_pkts = ntohl(rsp->tx_pkts);
+ ncs->pt_tx_dropped = ntohl(rsp->tx_dropped);
+ ncs->pt_tx_channel_err = ntohl(rsp->tx_channel_err);
+ ncs->pt_tx_us_err = ntohl(rsp->tx_us_err);
+ ncs->pt_rx_pkts = ntohl(rsp->rx_pkts);
+ ncs->pt_rx_dropped = ntohl(rsp->rx_dropped);
+ ncs->pt_rx_channel_err = ntohl(rsp->rx_channel_err);
+ ncs->pt_rx_us_err = ntohl(rsp->rx_us_err);
+ ncs->pt_rx_os_err = ntohl(rsp->rx_os_err);
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gps(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gps_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+
+ /* Find the package */
+ rsp = (struct ncsi_rsp_gps_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ &np, NULL);
+ if (!np)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_gpuuid_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+
+ /* Find the package */
+ rsp = (struct ncsi_rsp_gpuuid_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ &np, NULL);
+ if (!np)
+ return -ENODEV;
+
+ memcpy(np->uuid, rsp->uuid, sizeof(rsp->uuid));
+
+ return 0;
+}
+
+static struct ncsi_rsp_handler {
+ unsigned char type;
+ int payload;
+ int (*handler)(struct ncsi_request *nr);
+} ncsi_rsp_handlers[] = {
+ { NCSI_PKT_RSP_CIS, 4, ncsi_rsp_handler_cis },
+ { NCSI_PKT_RSP_SP, 4, ncsi_rsp_handler_sp },
+ { NCSI_PKT_RSP_DP, 4, ncsi_rsp_handler_dp },
+ { NCSI_PKT_RSP_EC, 4, ncsi_rsp_handler_ec },
+ { NCSI_PKT_RSP_DC, 4, ncsi_rsp_handler_dc },
+ { NCSI_PKT_RSP_RC, 4, ncsi_rsp_handler_rc },
+ { NCSI_PKT_RSP_ECNT, 4, ncsi_rsp_handler_ecnt },
+ { NCSI_PKT_RSP_DCNT, 4, ncsi_rsp_handler_dcnt },
+ { NCSI_PKT_RSP_AE, 4, ncsi_rsp_handler_ae },
+ { NCSI_PKT_RSP_SL, 4, ncsi_rsp_handler_sl },
+ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls },
+ { NCSI_PKT_RSP_SVF, 4, ncsi_rsp_handler_svf },
+ { NCSI_PKT_RSP_EV, 4, ncsi_rsp_handler_ev },
+ { NCSI_PKT_RSP_DV, 4, ncsi_rsp_handler_dv },
+ { NCSI_PKT_RSP_SMA, 4, ncsi_rsp_handler_sma },
+ { NCSI_PKT_RSP_EBF, 4, ncsi_rsp_handler_ebf },
+ { NCSI_PKT_RSP_DBF, 4, ncsi_rsp_handler_dbf },
+ { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf },
+ { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf },
+ { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc },
+ { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi },
+ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc },
+ { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp },
+ { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps },
+ { NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns },
+ { NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts },
+ { NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps },
+ { NCSI_PKT_RSP_OEM, 0, NULL },
+ { NCSI_PKT_RSP_PLDM, 0, NULL },
+ { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }
+};
+
+int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct ncsi_rsp_handler *nrh = NULL;
+ struct ncsi_dev *nd;
+ struct ncsi_dev_priv *ndp;
+ struct ncsi_request *nr;
+ struct ncsi_pkt_hdr *hdr;
+ unsigned long flags;
+ int payload, i, ret;
+
+ /* Find the NCSI device */
+ nd = ncsi_find_dev(dev);
+ ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+ if (!ndp)
+ return -ENODEV;
+
+ /* Check if it is AEN packet */
+ hdr = (struct ncsi_pkt_hdr *)skb_network_header(skb);
+ if (hdr->type == NCSI_PKT_AEN)
+ return ncsi_aen_handler(ndp, skb);
+
+ /* Find the handler */
+ for (i = 0; i < ARRAY_SIZE(ncsi_rsp_handlers); i++) {
+ if (ncsi_rsp_handlers[i].type == hdr->type) {
+ if (ncsi_rsp_handlers[i].handler)
+ nrh = &ncsi_rsp_handlers[i];
+ else
+ nrh = NULL;
+
+ break;
+ }
+ }
+
+ if (!nrh) {
+ netdev_err(nd->dev, "Received unrecognized packet (0x%x)\n",
+ hdr->type);
+ return -ENOENT;
+ }
+
+ /* Associate with the request */
+ spin_lock_irqsave(&ndp->lock, flags);
+ nr = &ndp->requests[hdr->id];
+ if (!nr->used) {
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return -ENODEV;
+ }
+
+ nr->rsp = skb;
+ if (!nr->enabled) {
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /* Validate the packet */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ payload = nrh->payload;
+ if (payload < 0)
+ payload = ntohs(hdr->length);
+ ret = ncsi_validate_rsp_pkt(nr, payload);
+ if (ret)
+ goto out;
+
+ /* Process the packet */
+ ret = nrh->handler(nr);
+out:
+ ncsi_free_request(nr);
+ return ret;
+}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 95e757c377f9..9266ceebd112 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -609,9 +609,8 @@ config NETFILTER_XT_MARK
The target allows you to create rules in the "mangle" table which alter
the netfilter mark (nfmark) field associated with the packet.
- Prior to routing, the nfmark can influence the routing method (see
- "Use netfilter MARK value as routing key") and can also be used by
- other subsystems to change their behavior.
+ Prior to routing, the nfmark can influence the routing method and can
+ also be used by other subsystems to change their behavior.
config NETFILTER_XT_CONNMARK
tristate 'ctmark target and match support'
@@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK
The target allows you to create rules in the "raw" and "mangle" tables
which set the skbuff mark by means of hash calculation within a given
- range. The nfmark can influence the routing method (see "Use netfilter
- MARK value as routing key") and can also be used by other subsystems to
- change their behaviour.
+ range. The nfmark can influence the routing method and can also be used
+ by other subsystems to change their behaviour.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f204274a9b6b..153e33ffeeaa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -327,16 +327,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
-
- if (nf_ct_zone_add(tmpl, flags, zone) < 0)
- goto out_free;
-
+ nf_ct_zone_add(tmpl, zone);
atomic_set(&tmpl->ct_general.use, 0);
return tmpl;
-out_free:
- kfree(tmpl);
- return NULL;
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
@@ -929,16 +923,13 @@ __nf_conntrack_alloc(struct net *net,
offsetof(struct nf_conn, proto) -
offsetof(struct nf_conn, __nfct_init_offset[0]));
- if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
- goto out_free;
+ nf_ct_zone_add(ct, zone);
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
atomic_set(&ct->ct_general.use, 0);
return ct;
-out_free:
- kmem_cache_free(nf_conntrack_cachep, ct);
out:
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
@@ -1342,14 +1333,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
- .len = sizeof(struct nf_conntrack_zone),
- .align = __alignof__(struct nf_conntrack_zone),
- .id = NF_CT_EXT_ZONE,
-};
-#endif
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1532,9 +1515,6 @@ void nf_conntrack_cleanup_end(void)
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_extend_unregister(&nf_ct_zone_extend);
-#endif
nf_conntrack_proto_fini();
nf_conntrack_seqadj_fini();
nf_conntrack_labels_fini();
@@ -1617,24 +1597,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
}
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_hash_resize(unsigned int hashsize)
{
- int i, bucket, rc;
- unsigned int hashsize, old_size;
+ int i, bucket;
+ unsigned int old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- if (current->nsproxy->net_ns != &init_net)
- return -EOPNOTSUPP;
-
- /* On boot, we can set this without any fancy locking. */
- if (!nf_conntrack_htable_size)
- return param_set_uint(val, kp);
-
- rc = kstrtouint(val, 0, &hashsize);
- if (rc)
- return rc;
if (!hashsize)
return -EINVAL;
@@ -1642,6 +1612,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hash)
return -ENOMEM;
+ old_size = nf_conntrack_htable_size;
+ if (old_size == hashsize) {
+ nf_ct_free_hashtable(hash, hashsize);
+ return 0;
+ }
+
local_bh_disable();
nf_conntrack_all_lock();
write_seqcount_begin(&nf_conntrack_generation);
@@ -1677,6 +1653,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
+
+int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+{
+ unsigned int hashsize;
+ int rc;
+
+ if (current->nsproxy->net_ns != &init_net)
+ return -EOPNOTSUPP;
+
+ /* On boot, we can set this without any fancy locking. */
+ if (!nf_conntrack_htable_size)
+ return param_set_uint(val, kp);
+
+ rc = kstrtouint(val, 0, &hashsize);
+ if (rc)
+ return rc;
+
+ return nf_conntrack_hash_resize(hashsize);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
@@ -1733,7 +1728,7 @@ int nf_conntrack_init_start(void)
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn), 0,
- SLAB_DESTROY_BY_RCU, NULL);
+ SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
if (!nf_conntrack_cachep)
goto err_cachep;
@@ -1773,11 +1768,6 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_seqadj;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- ret = nf_ct_extend_register(&nf_ct_zone_extend);
- if (ret < 0)
- goto err_extend;
-#endif
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
@@ -1793,10 +1783,6 @@ int nf_conntrack_init_start(void)
return 0;
err_proto:
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_extend_unregister(&nf_ct_zone_extend);
-err_extend:
-#endif
nf_conntrack_seqadj_fini();
err_seqadj:
nf_conntrack_labels_fini();
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 196cb39649e1..3a1a88b9bafa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -389,11 +389,38 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
struct net *net)
{
struct nf_conntrack_tuple_hash *h;
+ const struct hlist_nulls_node *nn;
+ int cpu;
+
+ /* Get rid of expecteds, set helpers to NULL. */
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+ unhelp(h, me);
+ spin_unlock_bh(&pcpu->lock);
+ }
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+ struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
+ struct net *net;
unsigned int i;
- int cpu;
+
+ mutex_lock(&nf_ct_helper_mutex);
+ hlist_del_rcu(&me->hnode);
+ nf_ct_helper_count--;
+ mutex_unlock(&nf_ct_helper_mutex);
+
+ /* Make sure every nothing is still using the helper unless its a
+ * connection in the hash.
+ */
+ synchronize_rcu();
/* Get rid of expectations */
spin_lock_bh(&nf_conntrack_expect_lock);
@@ -413,15 +440,11 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
spin_unlock_bh(&nf_conntrack_expect_lock);
- /* Get rid of expecteds, set helpers to NULL. */
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+ rtnl_lock();
+ for_each_net(net)
+ __nf_conntrack_helper_unregister(me, net);
+ rtnl_unlock();
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
- unhelp(h, me);
- spin_unlock_bh(&pcpu->lock);
- }
local_bh_disable();
for (i = 0; i < nf_conntrack_htable_size; i++) {
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
@@ -433,26 +456,6 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
local_bh_enable();
}
-
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
-{
- struct net *net;
-
- mutex_lock(&nf_ct_helper_mutex);
- hlist_del_rcu(&me->hnode);
- nf_ct_helper_count--;
- mutex_unlock(&nf_ct_helper_mutex);
-
- /* Make sure every nothing is still using the helper unless its a
- * connection in the hash.
- */
- synchronize_rcu();
-
- rtnl_lock();
- for_each_net(net)
- __nf_conntrack_helper_unregister(me, net);
- rtnl_unlock();
-}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
static struct nf_ct_ext_type helper_extend __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index c026c472ea80..2aaa188ee961 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -434,8 +434,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
#ifdef CONFIG_SYSCTL
/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
+static int log_invalid_proto_min __read_mostly;
+static int log_invalid_proto_max __read_mostly = 255;
+
+/* size the user *wants to set */
+static unsigned int nf_conntrack_htable_size_user __read_mostly;
+
+static int
+nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ /* update ret, we might not be able to satisfy request */
+ ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
+
+ /* update it to the actual value used by conntrack */
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+ return ret;
+}
static struct ctl_table_header *nf_ct_netfilter_header;
@@ -456,10 +477,10 @@ static struct ctl_table nf_ct_sysctl_table[] = {
},
{
.procname = "nf_conntrack_buckets",
- .data = &nf_conntrack_htable_size,
+ .data = &nf_conntrack_htable_size_user,
.maxlen = sizeof(unsigned int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .mode = 0644,
+ .proc_handler = nf_conntrack_hash_sysctl,
},
{
.procname = "nf_conntrack_checksum",
@@ -515,6 +536,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
+ if (!net_eq(&init_net, net))
+ table[2].mode = 0444;
+
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)
goto out_unregister_netfilter;
@@ -604,6 +628,8 @@ static int __init nf_conntrack_standalone_init(void)
ret = -ENOMEM;
goto out_sysctl;
}
+
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
#endif
ret = register_pernet_subsys(&nf_conntrack_net_ops);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a5d41dfa9f05..aa5847a16713 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
struct nf_logger *logger;
int ret = -ENOENT;
+ if (pf == NFPROTO_INET) {
+ ret = nf_logger_find_get(NFPROTO_IPV4, type);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_logger_find_get(NFPROTO_IPV6, type);
+ if (ret < 0) {
+ nf_logger_put(NFPROTO_IPV4, type);
+ return ret;
+ }
+
+ return 0;
+ }
+
if (rcu_access_pointer(loggers[pf][type]) == NULL)
request_module("nf-logger-%u-%u", pf, type);
@@ -167,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
if (logger == NULL)
goto out;
- if (logger && try_module_get(logger->me))
+ if (try_module_get(logger->me))
ret = 0;
out:
rcu_read_unlock();
@@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
{
struct nf_logger *logger;
+ if (pf == NFPROTO_INET) {
+ nf_logger_put(NFPROTO_IPV4, type);
+ nf_logger_put(NFPROTO_IPV6, type);
+ return;
+ }
+
BUG_ON(loggers[pf][type] == NULL);
rcu_read_lock();
@@ -398,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
{
const struct nf_logger *logger;
char buf[NFLOGGER_NAME_LEN];
- size_t size = *lenp;
int r = 0;
int tindex = (unsigned long)table->extra1;
struct net *net = current->nsproxy->net_ns;
if (write) {
- if (size > sizeof(buf))
- size = sizeof(buf);
- if (copy_from_user(buf, buffer, size))
- return -EFAULT;
+ struct ctl_table tmp = *table;
+
+ tmp.data = buf;
+ r = proc_dostring(&tmp, write, buffer, lenp, ppos);
+ if (r)
+ return r;
if (!strcmp(buf, "NONE")) {
nf_log_unbind_pf(net, tindex);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2c881871db38..18b7f8578ee0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
-static int nft_register_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
-{
- struct net *net = read_pnet(&basechain->pnet);
-
- if (basechain->flags & NFT_BASECHAIN_DISABLED)
- return 0;
-
- return nf_register_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static void nft_unregister_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
-{
- struct net *net = read_pnet(&basechain->pnet);
-
- if (basechain->flags & NFT_BASECHAIN_DISABLED)
- return;
-
- nf_unregister_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static int nf_tables_register_hooks(const struct nft_table *table,
+static int nf_tables_register_hooks(struct net *net,
+ const struct nft_table *table,
struct nft_chain *chain,
unsigned int hook_nops)
{
@@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table,
!(chain->flags & NFT_BASE_CHAIN))
return 0;
- return nft_register_basechain(nft_base_chain(chain), hook_nops);
+ return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+ hook_nops);
}
-static void nf_tables_unregister_hooks(const struct nft_table *table,
+static void nf_tables_unregister_hooks(struct net *net,
+ const struct nft_table *table,
struct nft_chain *chain,
unsigned int hook_nops)
{
@@ -172,12 +153,9 @@ static void nf_tables_unregister_hooks(const struct nft_table *table,
!(chain->flags & NFT_BASE_CHAIN))
return;
- nft_unregister_basechain(nft_base_chain(chain), hook_nops);
+ nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
}
-/* Internal table flags */
-#define NFT_TABLE_INACTIVE (1 << 15)
-
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@@ -187,7 +165,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWTABLE)
- ctx->table->flags |= NFT_TABLE_INACTIVE;
+ nft_activate_next(ctx->net, ctx->table);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
@@ -201,7 +179,7 @@ static int nft_deltable(struct nft_ctx *ctx)
if (err < 0)
return err;
- list_del_rcu(&ctx->table->list);
+ nft_deactivate_next(ctx->net, ctx->table);
return err;
}
@@ -214,7 +192,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWCHAIN)
- ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+ nft_activate_next(ctx->net, ctx->chain);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
@@ -229,47 +207,17 @@ static int nft_delchain(struct nft_ctx *ctx)
return err;
ctx->table->use--;
- list_del_rcu(&ctx->chain->list);
+ nft_deactivate_next(ctx->net, ctx->chain);
return err;
}
-static inline bool
-nft_rule_is_active(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & nft_genmask_cur(net)) == 0;
-}
-
-static inline int
-nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & nft_genmask_next(net)) == 0;
-}
-
-static inline void
-nft_rule_activate_next(struct net *net, struct nft_rule *rule)
-{
- /* Now inactive, will be active in the future */
- rule->genmask = nft_genmask_cur(net);
-}
-
-static inline void
-nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
-{
- rule->genmask = nft_genmask_next(net);
-}
-
-static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
-{
- rule->genmask &= ~nft_genmask_next(net);
-}
-
static int
nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
- if (nft_rule_is_active_next(ctx->net, rule)) {
- nft_rule_deactivate_next(ctx->net, rule);
+ if (nft_is_active_next(ctx->net, rule)) {
+ nft_deactivate_next(ctx->net, rule);
ctx->chain->use--;
return 0;
}
@@ -322,9 +270,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-/* Internal set flag */
-#define NFT_SET_INACTIVE (1 << 15)
-
static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
@@ -337,7 +282,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
- set->flags |= NFT_SET_INACTIVE;
+ nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -353,7 +298,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
- list_del_rcu(&set->list);
+ nft_deactivate_next(ctx->net, set);
ctx->table->use--;
return err;
@@ -364,26 +309,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
*/
static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_table *table;
list_for_each_entry(table, &afi->tables, list) {
- if (!nla_strcmp(nla, table->name))
+ if (!nla_strcmp(nla, table->name) &&
+ nft_active_genmask(table, genmask))
return table;
}
return NULL;
}
static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_table *table;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- table = nft_table_lookup(afi, nla);
+ table = nft_table_lookup(afi, nla, genmask);
if (table != NULL)
return table;
@@ -524,6 +472,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, table))
+ continue;
if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -548,6 +498,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
@@ -565,11 +516,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -588,17 +537,21 @@ err:
return err;
}
-static int nf_tables_table_enable(const struct nft_af_info *afi,
+static int nf_tables_table_enable(struct net *net,
+ const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
int err, i = 0;
list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
- err = nft_register_basechain(nft_base_chain(chain), afi->nops);
+ err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+ afi->nops);
if (err < 0)
goto err;
@@ -607,26 +560,34 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
return 0;
err:
list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
if (i-- <= 0)
break;
- nft_unregister_basechain(nft_base_chain(chain), afi->nops);
+ nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+ afi->nops);
}
return err;
}
-static void nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(struct net *net,
+ const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
- if (chain->flags & NFT_BASE_CHAIN)
- nft_unregister_basechain(nft_base_chain(chain),
- afi->nops);
+ if (!nft_is_active_next(net, chain))
+ continue;
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ continue;
+
+ nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+ afi->nops);
}
}
@@ -656,7 +617,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
if (ret >= 0) {
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
nft_trans_table_enable(trans) = true;
@@ -678,6 +639,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
const struct nlattr *name;
struct nft_af_info *afi;
struct nft_table *table;
@@ -691,7 +653,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
return PTR_ERR(afi);
name = nla[NFTA_TABLE_NAME];
- table = nf_tables_table_lookup(afi, name);
+ table = nf_tables_table_lookup(afi, name, genmask);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
@@ -699,8 +661,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
}
if (table != NULL) {
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -752,6 +712,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
struct nft_set *set, *ns;
list_for_each_entry(chain, &ctx->table->chains, list) {
+ if (!nft_is_active_next(ctx->net, chain))
+ continue;
+
ctx->chain = chain;
err = nft_delrule_by_chain(ctx);
@@ -760,6 +723,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
}
list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+ if (!nft_is_active_next(ctx->net, set))
+ continue;
+
if (set->flags & NFT_SET_ANONYMOUS &&
!list_empty(&set->bindings))
continue;
@@ -770,6 +736,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
}
list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+ if (!nft_is_active_next(ctx->net, chain))
+ continue;
+
ctx->chain = chain;
err = nft_delchain(ctx);
@@ -795,6 +764,9 @@ static int nft_flush(struct nft_ctx *ctx, int family)
ctx->afi = afi;
list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ if (!nft_is_active_next(ctx->net, table))
+ continue;
+
if (nla[NFTA_TABLE_NAME] &&
nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
continue;
@@ -815,6 +787,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
int family = nfmsg->nfgen_family;
@@ -828,7 +801,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -875,12 +848,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
*/
static struct nft_chain *
-nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
+ u8 genmask)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
- if (chain->handle == handle)
+ if (chain->handle == handle &&
+ nft_active_genmask(chain, genmask))
return chain;
}
@@ -888,7 +863,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
}
static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_chain *chain;
@@ -896,7 +872,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
return ERR_PTR(-EINVAL);
list_for_each_entry(chain, &table->chains, list) {
- if (!nla_strcmp(nla, chain->name))
+ if (!nla_strcmp(nla, chain->name) &&
+ nft_active_genmask(chain, genmask))
return chain;
}
@@ -1079,6 +1056,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, chain))
+ continue;
if (nf_tables_fill_chain_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -1104,6 +1083,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
@@ -1122,17 +1102,13 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -1231,6 +1207,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
@@ -1247,7 +1224,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1256,11 +1233,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nla[NFTA_CHAIN_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
- chain = nf_tables_chain_lookup_byhandle(table, handle);
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
} else {
- chain = nf_tables_chain_lookup(table, name);
+ chain = nf_tables_chain_lookup(table, name, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT)
return PTR_ERR(chain);
@@ -1291,16 +1268,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_stats *stats = NULL;
struct nft_trans *trans;
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- if (nla[NFTA_CHAIN_HANDLE] && name &&
- !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
- return -EEXIST;
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ struct nft_chain *chain2;
+
+ chain2 = nf_tables_chain_lookup(table,
+ nla[NFTA_CHAIN_NAME],
+ genmask);
+ if (IS_ERR(chain2))
+ return PTR_ERR(chain2);
+ }
if (nla[NFTA_CHAIN_COUNTERS]) {
if (!(chain->flags & NFT_BASE_CHAIN))
@@ -1455,7 +1436,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
- err = nf_tables_register_hooks(table, chain, afi->nops);
+ err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
goto err1;
@@ -1468,7 +1449,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
- nf_tables_unregister_hooks(table, chain, afi->nops);
+ nf_tables_unregister_hooks(net, table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
return err;
@@ -1479,6 +1460,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
@@ -1489,11 +1471,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
if (chain->use > 0)
@@ -1898,7 +1880,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
list_for_each_entry_rcu(table, &afi->tables, list) {
list_for_each_entry_rcu(chain, &table->chains, list) {
list_for_each_entry_rcu(rule, &chain->rules, list) {
- if (!nft_rule_is_active(net, rule))
+ if (!nft_is_active(net, rule))
goto cont;
if (idx < s_idx)
goto cont;
@@ -1931,6 +1913,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
@@ -1950,17 +1933,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule))
@@ -2009,6 +1988,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
@@ -2029,11 +2009,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -2102,7 +2082,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (rule == NULL)
goto err1;
- nft_rule_activate_next(net, rule);
+ nft_activate_next(net, rule);
rule->handle = handle;
rule->dlen = size;
@@ -2124,14 +2104,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (nft_rule_is_active_next(net, old_rule)) {
+ if (nft_is_active_next(net, old_rule)) {
trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
old_rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
- nft_rule_deactivate_next(net, old_rule);
+ nft_deactivate_next(net, old_rule);
chain->use--;
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
@@ -2174,6 +2154,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain = NULL;
@@ -2185,12 +2166,13 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
if (nla[NFTA_RULE_CHAIN]) {
- chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN],
+ genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
}
@@ -2210,6 +2192,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
}
} else {
list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
+
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
if (err < 0)
@@ -2339,7 +2324,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi = NULL;
@@ -2355,7 +2341,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
if (afi == NULL)
return -EAFNOSUPPORT;
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
}
@@ -2365,7 +2352,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
}
struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -2373,22 +2360,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-EINVAL);
list_for_each_entry(set, &table->sets, list) {
- if (!nla_strcmp(nla, set->name))
+ if (!nla_strcmp(nla, set->name) &&
+ nft_active_genmask(set, genmask))
return set;
}
return ERR_PTR(-ENOENT);
}
struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
list_for_each_entry(trans, &net->nft.commit_list, list) {
+ struct nft_set *set = nft_trans_set(trans);
+
if (trans->msg_type == NFT_MSG_NEWSET &&
- id == nft_trans_set_id(trans))
- return nft_trans_set(trans);
+ id == nft_trans_set_id(trans) &&
+ nft_active_genmask(set, genmask))
+ return set;
}
return ERR_PTR(-ENOENT);
}
@@ -2413,6 +2405,8 @@ cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
+ if (!nft_is_active_next(ctx->net, set))
+ continue;
if (!sscanf(i->name, name, &tmp))
continue;
if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
@@ -2432,6 +2426,8 @@ cont:
snprintf(set->name, sizeof(set->name), name, min + n);
list_for_each_entry(i, &ctx->table->sets, list) {
+ if (!nft_is_active_next(ctx->net, i))
+ continue;
if (!strcmp(set->name, i->name))
return -ENFILE;
}
@@ -2580,6 +2576,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
list_for_each_entry_rcu(set, &table->sets, list) {
if (idx < s_idx)
goto cont;
+ if (!nft_is_active(net, set))
+ goto cont;
ctx_set = *ctx;
ctx_set.table = table;
@@ -2616,6 +2614,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
struct nft_ctx ctx;
struct sk_buff *skb2;
@@ -2623,7 +2622,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
@@ -2650,11 +2649,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
if (!nla[NFTA_SET_TABLE])
return -EINVAL;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
@@ -2693,6 +2690,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
const struct nft_set_ops *ops;
struct nft_af_info *afi;
struct nft_table *table;
@@ -2790,13 +2788,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
- set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+ set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
if (PTR_ERR(set) != -ENOENT)
return PTR_ERR(set);
@@ -2895,6 +2893,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_set *set;
struct nft_ctx ctx;
int err;
@@ -2904,11 +2903,11 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
if (!list_empty(&set->bindings))
@@ -2973,7 +2972,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
list_del_rcu(&binding->list);
if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
- !(set->flags & NFT_SET_INACTIVE))
+ nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
}
@@ -3029,7 +3028,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
@@ -3039,7 +3039,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -3136,6 +3137,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
@@ -3152,17 +3154,14 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
return err;
err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
- (void *)nla);
+ (void *)nla, genmask);
if (err < 0)
return err;
- if (ctx.table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
event = NFT_MSG_NEWSETELEM;
event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -3216,21 +3215,19 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- if (ctx.table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -3548,6 +3545,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3556,15 +3554,17 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set)) {
if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
set = nf_tables_set_lookup_byid(net,
- nla[NFTA_SET_ELEM_LIST_SET_ID]);
+ nla[NFTA_SET_ELEM_LIST_SET_ID],
+ genmask);
}
if (IS_ERR(set))
return PTR_ERR(set);
@@ -3670,6 +3670,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3678,11 +3679,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -3950,36 +3952,40 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
if (!nft_trans_table_enable(trans)) {
- nf_tables_table_disable(trans->ctx.afi,
+ nf_tables_table_disable(net,
+ trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
} else {
- trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+ nft_clear(net, trans->ctx.table);
}
nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELTABLE:
+ list_del_rcu(&trans->ctx.table->list);
nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans))
nft_chain_commit_update(trans);
else
- trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+ nft_clear(net, trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELCHAIN:
+ list_del_rcu(&trans->ctx.chain->list);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- nf_tables_unregister_hooks(trans->ctx.table,
+ nf_tables_unregister_hooks(trans->ctx.net,
+ trans->ctx.table,
trans->ctx.chain,
trans->ctx.afi->nops);
break;
case NFT_MSG_NEWRULE:
- nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_clear(trans->ctx.net, nft_trans_rule(trans));
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
@@ -3992,7 +3998,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
NFT_MSG_DELRULE);
break;
case NFT_MSG_NEWSET:
- nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+ nft_clear(net, nft_trans_set(trans));
/* This avoids hitting -EBUSY when deleting the table
* from the transaction.
*/
@@ -4005,6 +4011,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSET:
+ list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_DELSET, GFP_KERNEL);
break;
@@ -4076,7 +4083,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
if (nft_trans_table_enable(trans)) {
- nf_tables_table_disable(trans->ctx.afi,
+ nf_tables_table_disable(net,
+ trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -4086,8 +4094,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
}
break;
case NFT_MSG_DELTABLE:
- list_add_tail_rcu(&trans->ctx.table->list,
- &trans->ctx.afi->tables);
+ nft_clear(trans->ctx.net, trans->ctx.table);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWCHAIN:
@@ -4098,15 +4105,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- nf_tables_unregister_hooks(trans->ctx.table,
+ nf_tables_unregister_hooks(trans->ctx.net,
+ trans->ctx.table,
trans->ctx.chain,
trans->ctx.afi->nops);
}
break;
case NFT_MSG_DELCHAIN:
trans->ctx.table->use++;
- list_add_tail_rcu(&trans->ctx.chain->list,
- &trans->ctx.table->chains);
+ nft_clear(trans->ctx.net, trans->ctx.chain);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
@@ -4115,7 +4122,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELRULE:
trans->ctx.chain->use++;
- nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
@@ -4124,8 +4131,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
trans->ctx.table->use++;
- list_add_tail_rcu(&nft_trans_set(trans)->list,
- &trans->ctx.table->sets);
+ nft_clear(trans->ctx.net, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -4272,6 +4278,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
}
list_for_each_entry(set, &ctx->table->sets, list) {
+ if (!nft_is_active_next(ctx->net, set))
+ continue;
if (!(set->flags & NFT_SET_MAP) ||
set->dtype != NFT_DATA_VERDICT)
continue;
@@ -4430,6 +4438,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_data_desc *desc, const struct nlattr *nla)
{
+ u8 genmask = nft_genmask_next(ctx->net);
struct nlattr *tb[NFTA_VERDICT_MAX + 1];
struct nft_chain *chain;
int err;
@@ -4462,7 +4471,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CHAIN])
return -EINVAL;
chain = nf_tables_chain_lookup(ctx->table,
- tb[NFTA_VERDICT_CHAIN]);
+ tb[NFTA_VERDICT_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
if (chain->flags & NFT_BASE_CHAIN)
@@ -4640,7 +4649,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
- nf_tables_unregister_hooks(ctx->chain->table, ctx->chain,
+ nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
ctx->afi->nops);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
@@ -4669,7 +4678,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
list_for_each_entry_safe(table, nt, &afi->tables, list) {
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hooks(table, chain, afi->nops);
+ nf_tables_unregister_hooks(net, table, chain,
+ afi->nops);
/* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11f81c8385fc..cbcfdfb586a6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net,
break;
case NFULNL_COPY_PACKET:
- if (inst->copy_range > skb->len)
+ data_len = inst->copy_range;
+ if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
+ (li->u.ulog.copy_len < data_len))
+ data_len = li->u.ulog.copy_len;
+
+ if (data_len > skb->len)
data_len = skb->len;
- else
- data_len = inst->copy_range;
size += nla_total_size(data_len);
break;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 78d4914fb39c..0af26699bf04 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_dynset *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
u64 timeout;
int err;
@@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
tb[NFTA_DYNSET_SREG_KEY] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
+ genmask);
if (IS_ERR(set)) {
if (tb[NFTA_DYNSET_SET_ID])
set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_DYNSET_SET_ID]);
+ tb[NFTA_DYNSET_SET_ID],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index f39c53a159eb..ea924816b7b8 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -153,9 +153,10 @@ static void *nft_hash_deactivate(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->pnet);
struct nft_hash_elem *he;
struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
};
@@ -163,7 +164,8 @@ static void *nft_hash_deactivate(const struct nft_set *set,
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
if (he != NULL) {
- if (!nft_set_elem_mark_busy(&he->ext))
+ if (!nft_set_elem_mark_busy(&he->ext) ||
+ !nft_is_active(net, &he->ext))
nft_set_elem_change_active(set, &he->ext);
else
he = NULL;
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 319c22b4bca2..713d66837705 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -52,7 +52,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
- int ret;
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -97,19 +96,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
- if (ctx->afi->family == NFPROTO_INET) {
- ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
- if (ret < 0)
- return ret;
-
- ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
- if (ret < 0) {
- nf_logger_put(NFPROTO_IPV4, li->type);
- return ret;
- }
- return 0;
- }
-
return nf_logger_find_get(ctx->afi->family, li->type);
}
@@ -122,12 +108,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
- if (ctx->afi->family == NFPROTO_INET) {
- nf_logger_put(NFPROTO_IPV4, li->type);
- nf_logger_put(NFPROTO_IPV6, li->type);
- } else {
- nf_logger_put(ctx->afi->family, li->type);
- }
+ nf_logger_put(ctx->afi->family, li->type);
}
static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b3c31ef8015d..b8d18f598569 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -22,6 +22,7 @@ struct nft_lookup {
struct nft_set *set;
enum nft_registers sreg:8;
enum nft_registers dreg:8;
+ bool invert;
struct nft_set_binding binding;
};
@@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr,
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
const struct nft_set_ext *ext;
+ bool found;
- if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), set->dlen);
+ found = set->ops->lookup(set, &regs->data[priv->sreg], &ext) ^
+ priv->invert;
+
+ if (!found) {
+ regs->verdict.code = NFT_BREAK;
return;
}
- regs->verdict.code = NFT_BREAK;
+
+ if (found && set->flags & NFT_SET_MAP)
+ nft_data_copy(&regs->data[priv->dreg],
+ nft_set_ext_data(ext), set->dlen);
+
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 },
};
static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -54,18 +62,21 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_lookup *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
+ u32 flags;
int err;
if (tb[NFTA_LOOKUP_SET] == NULL ||
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
if (IS_ERR(set)) {
if (tb[NFTA_LOOKUP_SET_ID]) {
set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_LOOKUP_SET_ID]);
+ tb[NFTA_LOOKUP_SET_ID],
+ genmask);
}
if (IS_ERR(set))
return PTR_ERR(set);
@@ -79,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (tb[NFTA_LOOKUP_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
+
+ if (flags & ~NFT_LOOKUP_F_INV)
+ return -EINVAL;
+
+ if (flags & NFT_LOOKUP_F_INV) {
+ if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+ priv->invert = true;
+ }
+ }
+
if (tb[NFTA_LOOKUP_DREG] != NULL) {
+ if (priv->invert)
+ return -EINVAL;
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
@@ -112,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx,
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
+ u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0;
if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
goto nla_put_failure;
@@ -120,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (priv->set->flags & NFT_SET_MAP)
if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 16c50b0dd426..03e5e33b5c39 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -199,13 +199,6 @@ err:
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-/* don't change or set _LOOPBACK, _USER, etc. */
-static bool pkt_type_ok(u32 p)
-{
- return p == PACKET_HOST || p == PACKET_BROADCAST ||
- p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
-}
-
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -223,7 +216,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
break;
case NFT_META_PKTTYPE:
if (skb->pkt_type != value &&
- pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+ skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
skb->pkt_type = value;
break;
case NFT_META_NFTRACE:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 7201d57b5a93..c0f638745adc 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -170,7 +170,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe, *this = elem->priv;
- u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+ u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
while (parent != NULL) {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2675d580c490..fe0e2db632c7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1460,6 +1460,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
uint8_t hooknum;
struct nf_hook_ops *ops;
+ if (!num_hooks)
+ return ERR_PTR(-EINVAL);
+
ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
if (ops == NULL)
return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a1fa2c800cb9..018eed7e1ff1 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
+ if (info->flags & XT_NFLOG_F_COPY_LEN)
+ li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
+
nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
par->out, &li, info->prefix);
return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index df48967af382..858d189a1303 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -4,12 +4,23 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
MODULE_DESCRIPTION("Xtables: packet flow tracing");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_TRACE");
MODULE_ALIAS("ip6t_TRACE");
+static int trace_tg_check(const struct xt_tgchk_param *par)
+{
+ return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void trace_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
static unsigned int
trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
}
static struct xt_target trace_tg_reg __read_mostly = {
- .name = "TRACE",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .table = "raw",
- .target = trace_tg,
- .me = THIS_MODULE,
+ .name = "TRACE",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .table = "raw",
+ .target = trace_tg,
+ .checkentry = trace_tg_check,
+ .destroy = trace_tg_destroy,
+ .me = THIS_MODULE,
};
static int __init trace_tg_init(void)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 1302b475abcb..a20e731b5b6c 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -21,11 +21,39 @@
static int owner_check(const struct xt_mtchk_param *par)
{
struct xt_owner_match_info *info = par->matchinfo;
+ struct net *net = par->net;
- /* For now only allow adding matches from the initial user namespace */
+ /* Only allow the common case where the userns of the writer
+ * matches the userns of the network namespace.
+ */
if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
- (current_user_ns() != &init_user_ns))
+ (current_user_ns() != net->user_ns))
return -EINVAL;
+
+ /* Ensure the uids are valid */
+ if (info->match & XT_OWNER_UID) {
+ kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+ kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+ if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+ (info->uid_max < info->uid_min) ||
+ uid_lt(uid_max, uid_min)) {
+ return -EINVAL;
+ }
+ }
+
+ /* Ensure the gids are valid */
+ if (info->match & XT_OWNER_GID) {
+ kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+ kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+ if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+ (info->gid_max < info->gid_min) ||
+ gid_lt(gid_max, gid_min)) {
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
struct sock *sk = skb_to_full_sk(skb);
+ struct net *net = par->net;
if (sk == NULL || sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
@@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
if (info->match & XT_OWNER_UID) {
- kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
- kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
+ kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+ kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
uid_lte(filp->f_cred->fsuid, uid_max)) ^
!(info->invert & XT_OWNER_UID))
@@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
if (info->match & XT_OWNER_GID) {
- kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
- kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
+ kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+ kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
gid_lte(filp->f_cred->fsgid, gid_max)) ^
!(info->invert & XT_OWNER_GID))
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index c14d4645daa3..ade024c90f4f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
-#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
-
th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
/* We've been asked to examine this packet, and we
@@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
ntohs(th->dest),
!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
return false;
- if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
- == tcpinfo->flg_cmp,
- XT_TCP_INV_FLAGS))
+ if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS,
+ (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp))
return false;
if (tcpinfo->option) {
if (th->doff * 4 < sizeof(_tcph)) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 48b58957adf4..9d92c4c46871 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return reciprocal_scale(skb_get_hash(skb), num);
+ return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
diff --git a/net/rds/bind.c b/net/rds/bind.c
index b22ea956522b..095f6ce583fe 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -81,6 +81,8 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
if (*port != 0) {
rover = be16_to_cpu(*port);
+ if (rover == RDS_FLAG_PROBE_PORT)
+ return -EINVAL;
last = rover;
} else {
rover = max_t(u16, prandom_u32(), 2);
@@ -91,12 +93,16 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
if (rover == 0)
rover++;
+ if (rover == RDS_FLAG_PROBE_PORT)
+ continue;
key = ((u64)addr << 32) | cpu_to_be16(rover);
if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
continue;
rs->rs_bound_key = key;
rs->rs_bound_addr = addr;
+ net_get_random_once(&rs->rs_hash_initval,
+ sizeof(rs->rs_hash_initval));
rs->rs_bound_port = cpu_to_be16(rover);
rs->rs_bound_node.next = NULL;
rds_sock_addref(rs);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 19a4fee5f4dd..f5058559bb08 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -155,7 +155,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
struct rds_transport *loop_trans;
unsigned long flags;
- int ret;
+ int ret, i;
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -211,6 +211,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_trans = trans;
+ init_waitqueue_head(&conn->c_hs_waitq);
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ __rds_conn_path_init(conn, &conn->c_path[i],
+ is_outgoing);
+ conn->c_path[i].cp_index = i;
+ }
ret = trans->conn_alloc(conn, gfp);
if (ret) {
kmem_cache_free(rds_conn_slab, conn);
@@ -263,14 +269,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
- int i;
-
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
- __rds_conn_path_init(conn, &conn->c_path[i],
- is_outgoing);
- conn->c_path[i].cp_index = i;
- }
-
hlist_add_head_rcu(&conn->c_hash_node, head);
rds_cong_add_conn(conn);
rds_conn_count++;
@@ -668,6 +666,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
void rds_conn_drop(struct rds_connection *conn)
{
+ WARN_ON(conn->c_trans->t_mp_capable);
rds_conn_path_drop(&conn->c_path[0]);
}
EXPORT_SYMBOL_GPL(rds_conn_drop);
diff --git a/net/rds/message.c b/net/rds/message.c
index 756c73729126..6cb91061556a 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -41,6 +41,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version),
[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
+[RDS_EXTHDR_NPATHS] = sizeof(u16),
};
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 6ef07bd27227..b2d17f0fafa8 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -85,7 +85,9 @@ enum {
#define RDS_RECV_REFILL 3
/* Max number of multipaths per RDS connection. Must be a power of 2 */
-#define RDS_MPATH_WORKERS 1
+#define RDS_MPATH_WORKERS 8
+#define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
+ (rs)->rs_hash_initval) & ((n) - 1))
/* Per mpath connection state */
struct rds_conn_path {
@@ -131,7 +133,8 @@ struct rds_connection {
__be32 c_laddr;
__be32 c_faddr;
unsigned int c_loopback:1,
- c_pad_to_32:31;
+ c_ping_triggered:1,
+ c_pad_to_32:30;
int c_npaths;
struct rds_connection *c_passive;
struct rds_transport *c_trans;
@@ -147,6 +150,7 @@ struct rds_connection {
unsigned long c_map_queued;
struct rds_conn_path c_path[RDS_MPATH_WORKERS];
+ wait_queue_head_t c_hs_waitq; /* handshake waitq */
};
static inline
@@ -166,6 +170,17 @@ void rds_conn_net_set(struct rds_connection *conn, struct net *net)
#define RDS_FLAG_RETRANSMITTED 0x04
#define RDS_MAX_ADV_CREDIT 255
+/* RDS_FLAG_PROBE_PORT is the reserved sport used for sending a ping
+ * probe to exchange control information before establishing a connection.
+ * Currently the control information that is exchanged is the number of
+ * supported paths. If the peer is a legacy (older kernel revision) peer,
+ * it would return a pong message without additional control information
+ * that would then alert the sender that the peer was an older rev.
+ */
+#define RDS_FLAG_PROBE_PORT 1
+#define RDS_HS_PROBE(sport, dport) \
+ ((sport == RDS_FLAG_PROBE_PORT && dport == 0) || \
+ (sport == 0 && dport == RDS_FLAG_PROBE_PORT))
/*
* Maximum space available for extension headers.
*/
@@ -225,6 +240,11 @@ struct rds_ext_header_rdma_dest {
__be32 h_rdma_offset;
};
+/* Extension header announcing number of paths.
+ * Implicit length = 2 bytes.
+ */
+#define RDS_EXTHDR_NPATHS 4
+
#define __RDS_EXTHDR_MAX 16 /* for now */
struct rds_incoming {
@@ -545,6 +565,7 @@ struct rds_sock {
/* Socket options - in case there will be more */
unsigned char rs_recverr,
rs_cong_monitor;
+ u32 rs_hash_initval;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
diff --git a/net/rds/recv.c b/net/rds/recv.c
index fed53a6c2890..cbfabdf3ff48 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -156,6 +156,67 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
}
}
+static void rds_recv_hs_exthdrs(struct rds_header *hdr,
+ struct rds_connection *conn)
+{
+ unsigned int pos = 0, type, len;
+ union {
+ struct rds_ext_header_version version;
+ u16 rds_npaths;
+ } buffer;
+
+ while (1) {
+ len = sizeof(buffer);
+ type = rds_message_next_extension(hdr, &pos, &buffer, &len);
+ if (type == RDS_EXTHDR_NONE)
+ break;
+ /* Process extension header here */
+ switch (type) {
+ case RDS_EXTHDR_NPATHS:
+ conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
+ buffer.rds_npaths);
+ break;
+ default:
+ pr_warn_ratelimited("ignoring unknown exthdr type "
+ "0x%x\n", type);
+ }
+ }
+ /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
+ conn->c_npaths = max_t(int, conn->c_npaths, 1);
+}
+
+/* rds_start_mprds() will synchronously start multiple paths when appropriate.
+ * The scheme is based on the following rules:
+ *
+ * 1. rds_sendmsg on first connect attempt sends the probe ping, with the
+ * sender's npaths (s_npaths)
+ * 2. rcvr of probe-ping knows the mprds_paths = min(s_npaths, r_npaths). It
+ * sends back a probe-pong with r_npaths. After that, if rcvr is the
+ * smaller ip addr, it starts rds_conn_path_connect_if_down on all
+ * mprds_paths.
+ * 3. sender gets woken up, and can move to rds_conn_path_connect_if_down.
+ * If it is the smaller ipaddr, rds_conn_path_connect_if_down can be
+ * called after reception of the probe-pong on all mprds_paths.
+ * Otherwise (sender of probe-ping is not the smaller ip addr): just call
+ * rds_conn_path_connect_if_down on the hashed path. (see rule 4)
+ * 4. when cp_index > 0, rds_connect_worker must only trigger
+ * a connection if laddr < faddr.
+ * 5. sender may end up queuing the packet on the cp. will get sent out later.
+ * when connection is completed.
+ */
+static void rds_start_mprds(struct rds_connection *conn)
+{
+ int i;
+ struct rds_conn_path *cp;
+
+ if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) {
+ for (i = 1; i < conn->c_npaths; i++) {
+ cp = &conn->c_path[i];
+ rds_conn_path_connect_if_down(cp);
+ }
+ }
+}
+
/*
* The transport must make sure that this is serialized against other
* rx and conn reset on this specific conn.
@@ -232,6 +293,20 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
}
rds_stats_inc(s_recv_ping);
rds_send_pong(cp, inc->i_hdr.h_sport);
+ /* if this is a handshake ping, start multipath if necessary */
+ if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) {
+ rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
+ rds_start_mprds(cp->cp_conn);
+ }
+ goto out;
+ }
+
+ if (inc->i_hdr.h_dport == RDS_FLAG_PROBE_PORT &&
+ inc->i_hdr.h_sport == 0) {
+ rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
+ /* if this is a handshake pong, start multipath if necessary */
+ rds_start_mprds(cp->cp_conn);
+ wake_up(&cp->cp_conn->c_hs_waitq);
goto out;
}
diff --git a/net/rds/send.c b/net/rds/send.c
index 5a9caf1da896..896626b9a0ef 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -963,6 +963,29 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
return ret;
}
+static void rds_send_ping(struct rds_connection *conn);
+
+static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
+{
+ int hash;
+
+ if (conn->c_npaths == 0)
+ hash = RDS_MPATH_HASH(rs, RDS_MPATH_WORKERS);
+ else
+ hash = RDS_MPATH_HASH(rs, conn->c_npaths);
+ if (conn->c_npaths == 0 && hash != 0) {
+ rds_send_ping(conn);
+
+ if (conn->c_npaths == 0) {
+ wait_event_interruptible(conn->c_hs_waitq,
+ (conn->c_npaths != 0));
+ }
+ if (conn->c_npaths == 1)
+ hash = 0;
+ }
+ return hash;
+}
+
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
{
struct sock *sk = sock->sk;
@@ -1075,7 +1098,10 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
goto out;
}
- cpath = &conn->c_path[0];
+ if (conn->c_trans->t_mp_capable)
+ cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
+ else
+ cpath = &conn->c_path[0];
rds_conn_path_connect_if_down(cpath);
@@ -1135,10 +1161,16 @@ out:
}
/*
- * Reply to a ping packet.
+ * send out a probe. Can be shared by rds_send_ping,
+ * rds_send_pong, rds_send_hb.
+ * rds_send_hb should use h_flags
+ * RDS_FLAG_HB_PING|RDS_FLAG_ACK_REQUIRED
+ * or
+ * RDS_FLAG_HB_PONG|RDS_FLAG_ACK_REQUIRED
*/
int
-rds_send_pong(struct rds_conn_path *cp, __be16 dport)
+rds_send_probe(struct rds_conn_path *cp, __be16 sport,
+ __be16 dport, u8 h_flags)
{
struct rds_message *rm;
unsigned long flags;
@@ -1166,9 +1198,18 @@ rds_send_pong(struct rds_conn_path *cp, __be16 dport)
rm->m_inc.i_conn = cp->cp_conn;
rm->m_inc.i_conn_path = cp;
- rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport,
+ rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport,
cp->cp_next_tx_seq);
+ rm->m_inc.i_hdr.h_flags |= h_flags;
cp->cp_next_tx_seq++;
+
+ if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) {
+ u16 npaths = RDS_MPATH_WORKERS;
+
+ rds_message_add_extension(&rm->m_inc.i_hdr,
+ RDS_EXTHDR_NPATHS, &npaths,
+ sizeof(npaths));
+ }
spin_unlock_irqrestore(&cp->cp_lock, flags);
rds_stats_inc(s_send_queued);
@@ -1185,3 +1226,25 @@ out:
rds_message_put(rm);
return ret;
}
+
+int
+rds_send_pong(struct rds_conn_path *cp, __be16 dport)
+{
+ return rds_send_probe(cp, 0, dport, 0);
+}
+
+void
+rds_send_ping(struct rds_connection *conn)
+{
+ unsigned long flags;
+ struct rds_conn_path *cp = &conn->c_path[0];
+
+ spin_lock_irqsave(&cp->cp_lock, flags);
+ if (conn->c_ping_triggered) {
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
+ return;
+ }
+ conn->c_ping_triggered = 1;
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
+ rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0);
+}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index d278432f080b..fcddacc92e01 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -38,7 +38,6 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -168,35 +167,21 @@ void rds_tcp_reset_callbacks(struct socket *sock,
wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
+ cancel_delayed_work_sync(&cp->cp_send_w);
+ cancel_delayed_work_sync(&cp->cp_recv_w);
if (tc->t_tinc) {
rds_inc_put(&tc->t_tinc->ti_inc);
tc->t_tinc = NULL;
}
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
tc->t_tinc_data_rem = 0;
- tc->t_sock = NULL;
-
- write_lock_bh(&osock->sk->sk_callback_lock);
-
- osock->sk->sk_user_data = NULL;
- osock->sk->sk_data_ready = tc->t_orig_data_ready;
- osock->sk->sk_write_space = tc->t_orig_write_space;
- osock->sk->sk_state_change = tc->t_orig_state_change;
- write_unlock_bh(&osock->sk->sk_callback_lock);
+ rds_tcp_restore_callbacks(osock, tc);
release_sock(osock->sk);
sock_release(osock);
newsock:
rds_send_path_reset(cp);
lock_sock(sock->sk);
- write_lock_bh(&sock->sk->sk_callback_lock);
- tc->t_sock = sock;
- tc->t_cpath = cp;
- sock->sk->sk_user_data = cp;
- sock->sk->sk_data_ready = rds_tcp_data_ready;
- sock->sk->sk_write_space = rds_tcp_write_space;
- sock->sk->sk_state_change = rds_tcp_state_change;
-
- write_unlock_bh(&sock->sk->sk_callback_lock);
+ rds_tcp_set_callbacks(sock, cp);
release_sock(sock->sk);
}
@@ -372,6 +357,7 @@ struct rds_transport rds_tcp_transport = {
.t_name = "tcp",
.t_type = RDS_TRANS_TCP,
.t_prefer_loopback = 1,
+ .t_mp_capable = 1,
};
static int rds_tcp_netid;
@@ -551,6 +537,13 @@ static void rds_tcp_kill_sock(struct net *net)
}
}
+void *rds_tcp_listen_sock_def_readable(struct net *net)
+{
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+}
+
static int rds_tcp_dev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -651,7 +644,7 @@ static int rds_tcp_init(void)
ret = rds_tcp_recv_init();
if (ret)
- goto out_slab;
+ goto out_pernet;
ret = rds_trans_register(&rds_tcp_transport);
if (ret)
@@ -663,8 +656,9 @@ static int rds_tcp_init(void)
out_recv:
rds_tcp_recv_exit();
-out_slab:
+out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
+out_slab:
kmem_cache_destroy(rds_tcp_conn_slab);
out:
return ret;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 1c3160faa963..9a1cc8906576 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -70,6 +70,7 @@ void rds_tcp_listen_stop(struct socket *);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);
+void *rds_tcp_listen_sock_def_readable(struct net *net);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c916715fbe61..05f61c533ed3 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -34,7 +34,6 @@
#include <linux/in.h>
#include <net/tcp.h>
-#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -82,6 +81,12 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
struct rds_connection *conn = cp->cp_conn;
struct rds_tcp_connection *tc = cp->cp_transport_data;
+ /* for multipath rds,we only trigger the connection after
+ * the handshake probe has determined the number of paths.
+ */
+ if (cp->cp_index > 0 && cp->cp_conn->c_npaths < 2)
+ return -EAGAIN;
+
mutex_lock(&tc->t_conn_path_lock);
if (rds_conn_path_up(cp)) {
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index ca975a217a49..e0b23fb5b8d5 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -35,7 +35,6 @@
#include <linux/in.h>
#include <net/tcp.h>
-#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -71,6 +70,52 @@ bail:
return ret;
}
+/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
+ * client's ipaddr < server's ipaddr. Otherwise, close the accepted
+ * socket and force a reconneect from smaller -> larger ip addr. The reason
+ * we special case cp_index 0 is to allow the rds probe ping itself to itself
+ * get through efficiently.
+ * Since reconnects are only initiated from the node with the numerically
+ * smaller ip address, we recycle conns in RDS_CONN_ERROR on the passive side
+ * by moving them to CONNECTING in this function.
+ */
+struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
+{
+ int i;
+ bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
+ int npaths = conn->c_npaths;
+
+ if (npaths <= 1) {
+ struct rds_conn_path *cp = &conn->c_path[0];
+ int ret;
+
+ ret = rds_conn_path_transition(cp, RDS_CONN_DOWN,
+ RDS_CONN_CONNECTING);
+ if (!ret)
+ rds_conn_path_transition(cp, RDS_CONN_ERROR,
+ RDS_CONN_CONNECTING);
+ return cp->cp_transport_data;
+ }
+
+ /* for mprds, paths with cp_index > 0 MUST be initiated by the peer
+ * with the smaller address.
+ */
+ if (!peer_is_smaller)
+ return NULL;
+
+ for (i = 1; i < npaths; i++) {
+ struct rds_conn_path *cp = &conn->c_path[i];
+
+ if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
+ RDS_CONN_CONNECTING) ||
+ rds_conn_path_transition(cp, RDS_CONN_ERROR,
+ RDS_CONN_CONNECTING)) {
+ return cp->cp_transport_data;
+ }
+ }
+ return NULL;
+}
+
int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
@@ -120,12 +165,14 @@ int rds_tcp_accept_one(struct socket *sock)
* If the client reboots, this conn will need to be cleaned up.
* rds_tcp_state_change() will do that cleanup
*/
- rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
- cp = &conn->c_path[0];
- rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+ rs_tcp = rds_tcp_accept_one_path(conn);
+ if (!rs_tcp)
+ goto rst_nsk;
mutex_lock(&rs_tcp->t_conn_path_lock);
- conn_state = rds_conn_state(conn);
- if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
+ cp = rs_tcp->t_cpath;
+ conn_state = rds_conn_path_state(cp);
+ if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP &&
+ conn_state != RDS_CONN_ERROR)
goto rst_nsk;
if (rs_tcp->t_sock) {
/* Need to resolve a duelling SYN between peers.
@@ -135,11 +182,11 @@ int rds_tcp_accept_one(struct socket *sock)
* c_transport_data.
*/
if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
- !conn->c_path[0].cp_outgoing) {
+ !cp->cp_outgoing) {
goto rst_nsk;
} else {
rds_tcp_reset_callbacks(new_sock, cp);
- conn->c_path[0].cp_outgoing = 0;
+ cp->cp_outgoing = 0;
/* rds_connect_path_complete() marks RDS_CONN_UP */
rds_connect_path_complete(cp, RDS_CONN_RESETTING);
}
@@ -183,6 +230,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
*/
if (sk->sk_state == TCP_LISTEN)
rds_tcp_accept_work(sk);
+ else
+ ready = rds_tcp_listen_sock_def_readable(sock_net(sk));
out:
read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 57e0f5826406..89d09b481f47 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -81,7 +81,8 @@ static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_conn_path *cp = rm->m_inc.i_conn_path;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
int done = 0;
int ret = 0;
int more;
@@ -150,10 +151,17 @@ out:
rds_tcp_stats_inc(s_tcp_sndbuf_full);
ret = 0;
} else {
- printk(KERN_WARNING "RDS/tcp: send to %pI4 "
- "returned %d, disconnecting and reconnecting\n",
- &conn->c_faddr, ret);
- rds_conn_drop(conn);
+ /* No need to disconnect/reconnect if path_drop
+ * has already been triggered, because, e.g., of
+ * an incoming RST.
+ */
+ if (rds_conn_path_up(cp)) {
+ pr_warn("RDS/tcp: send to %pI4 on cp [%d]"
+ "returned %d, "
+ "disconnecting and reconnecting\n",
+ &conn->c_faddr, cp->cp_index, ret);
+ rds_conn_path_drop(cp);
+ }
}
}
if (done == 0)
diff --git a/net/rds/threads.c b/net/rds/threads.c
index bc97d67f29cc..e42df11bf30a 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -156,6 +156,8 @@ void rds_connect_worker(struct work_struct *work)
struct rds_connection *conn = cp->cp_conn;
int ret;
+ if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+ return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
if (ret) {
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6522e50fb750..10f3f48a16a8 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ af-rxrpc-y := \
conn_client.o \
conn_event.o \
conn_object.o \
+ conn_service.o \
input.o \
insecure.o \
key.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 5d3e795a7c48..88effadd4b16 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -766,9 +766,9 @@ error_key_type:
error_sock:
proto_unregister(&rxrpc_proto);
error_proto:
- destroy_workqueue(rxrpc_workqueue);
-error_security:
rxrpc_exit_security();
+error_security:
+ destroy_workqueue(rxrpc_workqueue);
error_work_queue:
kmem_cache_destroy(rxrpc_call_jar);
error_call_jar:
@@ -788,27 +788,7 @@ static void __exit af_rxrpc_exit(void)
proto_unregister(&rxrpc_proto);
rxrpc_destroy_all_calls();
rxrpc_destroy_all_connections();
-
ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
-
- /* We need to flush the scheduled work twice because the local endpoint
- * records involve a work item in their destruction as they can only be
- * destroyed from process context. However, a connection may have a
- * work item outstanding - and this will pin the local endpoint record
- * until the connection goes away.
- *
- * Peers don't pin locals and calls pin sockets - which prevents the
- * module from being unloaded - so we should only need two flushes.
- */
- _debug("flush scheduled work");
- flush_workqueue(rxrpc_workqueue);
- _debug("flush scheduled work 2");
- flush_workqueue(rxrpc_workqueue);
- _debug("synchronise RCU");
- rcu_barrier();
- _debug("destroy locals");
- ASSERT(idr_is_empty(&rxrpc_client_conn_ids));
- idr_destroy(&rxrpc_client_conn_ids);
rxrpc_destroy_all_locals();
remove_proc_entry("rxrpc_conns", init_net.proc_net);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 702db72196fb..1bb9e7ac9e14 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -10,6 +10,7 @@
*/
#include <linux/atomic.h>
+#include <linux/seqlock.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
@@ -35,7 +36,6 @@ struct rxrpc_crypt {
queue_delayed_work(rxrpc_workqueue, (WS), (D))
#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
-#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
struct rxrpc_connection;
@@ -141,17 +141,16 @@ struct rxrpc_security {
int (*init_connection_security)(struct rxrpc_connection *);
/* prime a connection's packet security */
- void (*prime_packet_security)(struct rxrpc_connection *);
+ int (*prime_packet_security)(struct rxrpc_connection *);
/* impose security on a packet */
- int (*secure_packet)(const struct rxrpc_call *,
+ int (*secure_packet)(struct rxrpc_call *,
struct sk_buff *,
size_t,
void *);
/* verify the security on a received packet */
- int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
- u32 *);
+ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
/* issue a challenge */
int (*issue_challenge)(struct rxrpc_connection *);
@@ -208,7 +207,7 @@ struct rxrpc_peer {
struct hlist_head error_targets; /* targets for net error distribution */
struct work_struct error_distributor;
struct rb_root service_conns; /* Service connections */
- rwlock_t conn_lock;
+ seqlock_t service_conn_lock;
spinlock_t lock; /* access lock */
unsigned int if_mtu; /* interface MTU for this peer */
unsigned int mtu; /* network MTU for this peer */
@@ -231,18 +230,12 @@ struct rxrpc_peer {
* Keys for matching a connection.
*/
struct rxrpc_conn_proto {
- unsigned long hash_key;
- struct rxrpc_local *local; /* Representation of local endpoint */
- u32 epoch; /* epoch of this connection */
- u32 cid; /* connection ID */
- u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
- u8 addr_size; /* Size of the address */
- sa_family_t family; /* Transport protocol */
- __be16 port; /* Peer UDP/UDP6 port */
- union { /* Peer address */
- struct in_addr ipv4_addr;
- struct in6_addr ipv6_addr;
- u32 raw_addr[0];
+ union {
+ struct {
+ u32 epoch; /* epoch of this connection */
+ u32 cid; /* connection ID */
+ };
+ u64 index_key;
};
};
@@ -256,6 +249,37 @@ struct rxrpc_conn_parameters {
};
/*
+ * Bits in the connection flags.
+ */
+enum rxrpc_conn_flag {
+ RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
+ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
+ RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */
+};
+
+/*
+ * Events that can be raised upon a connection.
+ */
+enum rxrpc_conn_event {
+ RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+};
+
+/*
+ * The connection protocol state.
+ */
+enum rxrpc_conn_proto_state {
+ RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT, /* Client connection */
+ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
+ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
+ RXRPC_CONN_SERVICE, /* Service secured connection */
+ RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
+ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
+ RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */
+ RXRPC_CONN__NR_STATES
+};
+
+/*
* RxRPC connection definition
* - matched by { local, peer, epoch, conn_id, direction }
* - each connection can only handle four simultaneous calls
@@ -265,44 +289,38 @@ struct rxrpc_connection {
struct rxrpc_conn_parameters params;
spinlock_t channel_lock;
- struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* active calls */
+
+ struct rxrpc_channel {
+ struct rxrpc_call __rcu *call; /* Active call */
+ u32 call_id; /* ID of current call */
+ u32 call_counter; /* Call ID counter */
+ u32 last_call; /* ID of last call */
+ u32 last_result; /* Result of last call (0/abort) */
+ } channels[RXRPC_MAXCALLS];
wait_queue_head_t channel_wq; /* queue to wait for channel to become available */
+ struct rcu_head rcu;
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
struct rb_node service_node; /* Node in peer->service_conns */
};
struct list_head link; /* link in master connection list */
- struct rb_root calls; /* calls on this connection */
struct sk_buff_head rx_queue; /* received conn-level packets */
const struct rxrpc_security *security; /* applied security module */
struct key *server_key; /* security for this service */
struct crypto_skcipher *cipher; /* encryption handle */
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long flags;
-#define RXRPC_CONN_HAS_IDR 0 /* - Has a client conn ID assigned */
unsigned long events;
-#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
unsigned long put_time; /* Time at which last put */
- rwlock_t lock; /* access lock */
spinlock_t state_lock; /* state-change lock */
atomic_t usage;
- enum { /* current state of connection */
- RXRPC_CONN_UNUSED, /* - connection not yet attempted */
- RXRPC_CONN_CLIENT, /* - client connection */
- RXRPC_CONN_SERVER_UNSECURED, /* - server unsecured connection */
- RXRPC_CONN_SERVER_CHALLENGING, /* - server challenging for security */
- RXRPC_CONN_SERVER, /* - server secured connection */
- RXRPC_CONN_REMOTELY_ABORTED, /* - conn aborted by peer */
- RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */
- RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */
- } state;
+ enum rxrpc_conn_proto_state state : 8; /* current state of connection */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
int error; /* local error incurred */
int debug_id; /* debug ID for printks */
- unsigned int call_counter; /* call ID counter */
atomic_t serial; /* packet serial number counter */
atomic_t hi_serial; /* highest serial number received */
atomic_t avail_chans; /* number of channels available */
@@ -382,6 +400,7 @@ enum rxrpc_call_state {
* - matched by { connection, call_id }
*/
struct rxrpc_call {
+ struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_sock *socket; /* socket responsible */
struct timer_list lifetimer; /* lifetime remaining on call */
@@ -394,11 +413,11 @@ struct rxrpc_call {
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* calls awaiting acceptance */
struct rb_node sock_node; /* node in socket call tree */
- struct rb_node conn_node; /* node in connection call tree */
struct sk_buff_head rx_queue; /* received packets */
struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
struct sk_buff *tx_pending; /* Tx socket buffer being filled */
wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
unsigned long creation_jif; /* time of call creation */
unsigned long flags;
@@ -442,19 +461,12 @@ struct rxrpc_call {
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
- struct hlist_node hash_node;
- unsigned long hash_key; /* Full hash key */
- u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
- struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
- sa_family_t family; /* Frame protocol */
+ u8 in_clientflag; /* Copy of conn->in_clientflag */
+ struct rxrpc_local *local; /* Local endpoint. */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
u32 epoch; /* epoch of this connection */
u16 service_id; /* service ID */
- union { /* Peer IP address for hashing */
- __be32 ipv4_addr;
- __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
- } peer_ip;
};
/*
@@ -502,8 +514,6 @@ extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
-struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
- void *, sa_family_t, const void *);
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
@@ -522,8 +532,10 @@ void __exit rxrpc_destroy_all_calls(void);
*/
extern struct idr rxrpc_client_conn_ids;
-int rxrpc_get_client_connection_id(struct rxrpc_connection *, gfp_t);
-void rxrpc_put_client_connection_id(struct rxrpc_connection *);
+void rxrpc_destroy_client_conn_ids(void);
+int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *, gfp_t);
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
/*
* conn_event.c
@@ -539,17 +551,14 @@ extern unsigned int rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
-int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *, gfp_t);
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *,
- struct rxrpc_peer *,
- struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
+ struct sk_buff *);
+void __rxrpc_disconnect_call(struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
void rxrpc_put_connection(struct rxrpc_connection *);
void __exit rxrpc_destroy_all_connections(void);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
- struct rxrpc_peer *,
- struct sk_buff *);
static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
{
@@ -558,7 +567,7 @@ static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
{
- return conn->proto.in_clientflag;
+ return !rxrpc_conn_is_client(conn);
}
static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
@@ -566,6 +575,31 @@ static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
atomic_inc(&conn->usage);
}
+static inline
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+ return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
+}
+
+static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+ if (!rxrpc_get_connection_maybe(conn))
+ return false;
+ if (!rxrpc_queue_work(&conn->processor))
+ rxrpc_put_connection(conn);
+ return true;
+}
+
+/*
+ * conn_service.c
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
+ struct sk_buff *);
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
+ struct sockaddr_rxrpc *,
+ struct sk_buff *);
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
+
/*
* input.c
*/
@@ -618,6 +652,11 @@ static inline void rxrpc_put_local(struct rxrpc_local *local)
__rxrpc_put_local(local);
}
+static inline void rxrpc_queue_local(struct rxrpc_local *local)
+{
+ rxrpc_queue_work(&local->processor);
+}
+
/*
* misc.c
*/
@@ -722,8 +761,7 @@ static inline void rxrpc_sysctl_exit(void) {}
/*
* utils.c
*/
-void rxrpc_get_addr_from_skb(struct rxrpc_local *, const struct sk_buff *,
- struct sockaddr_rxrpc *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
/*
* debug tracing
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 202e053a3c6d..0b2832141bd0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -75,7 +75,6 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
{
struct rxrpc_connection *conn;
struct rxrpc_skb_priv *sp, *nsp;
- struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct sk_buff *notification;
int ret;
@@ -94,15 +93,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
rxrpc_new_skb(notification);
notification->mark = RXRPC_SKB_MARK_NEW_CALL;
- peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
- if (!peer) {
- _debug("no peer");
- ret = -EBUSY;
- goto error;
- }
-
- conn = rxrpc_incoming_connection(local, peer, skb);
- rxrpc_put_peer(peer);
+ conn = rxrpc_incoming_connection(local, srx, skb);
if (IS_ERR(conn)) {
_debug("no conn");
ret = PTR_ERR(conn);
@@ -128,12 +119,11 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
spin_lock(&call->conn->state_lock);
if (sp->hdr.securityIndex > 0 &&
- call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+ call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
_debug("await conn sec");
list_add_tail(&call->accept_link, &rx->secureq);
- call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
- rxrpc_get_connection(call->conn);
- set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+ call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+ set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
rxrpc_queue_conn(call->conn);
} else {
_debug("conn ready");
@@ -227,20 +217,8 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
whdr._rsvd = 0;
whdr.serviceId = htons(sp->hdr.serviceId);
- /* determine the remote address */
- memset(&srx, 0, sizeof(srx));
- srx.srx_family = AF_RXRPC;
- srx.transport.family = local->srx.transport.family;
- srx.transport_type = local->srx.transport_type;
- switch (srx.transport.family) {
- case AF_INET:
- srx.transport_len = sizeof(struct sockaddr_in);
- srx.transport.sin.sin_port = udp_hdr(skb)->source;
- srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
- break;
- default:
- goto busy;
- }
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ goto drop;
/* get the socket providing the service */
read_lock_bh(&local->services_lock);
@@ -286,6 +264,10 @@ busy:
rxrpc_free_skb(skb);
return;
+drop:
+ rxrpc_free_skb(skb);
+ return;
+
invalid_service:
skb->priority = RX_INVALID_OPERATION;
rxrpc_reject_packet(local, skb);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0ba84295f913..fc32aa5764a2 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -858,11 +858,6 @@ void rxrpc_process_call(struct work_struct *work)
iov[0].iov_len = sizeof(whdr);
/* deal with events of a final nature */
- if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
- rxrpc_release_call(call);
- clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
- }
-
if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
enum rxrpc_skb_mark mark;
int error;
@@ -1094,7 +1089,7 @@ void rxrpc_process_call(struct work_struct *work)
if (call->state == RXRPC_CALL_SERVER_SECURING) {
_debug("securing");
- write_lock(&call->conn->lock);
+ write_lock(&call->socket->call_lock);
if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
!test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
_debug("not released");
@@ -1102,7 +1097,7 @@ void rxrpc_process_call(struct work_struct *work)
list_move_tail(&call->accept_link,
&call->socket->acceptq);
}
- write_unlock(&call->conn->lock);
+ write_unlock(&call->socket->call_lock);
read_lock(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE)
set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
@@ -1144,6 +1139,11 @@ void rxrpc_process_call(struct work_struct *work)
goto maybe_reschedule;
}
+ if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
+ rxrpc_release_call(call);
+ clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
+ }
+
/* other events may have been raised since we started checking */
goto maybe_reschedule;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ad933daae13b..91287c9d01bb 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
-#include <linux/hashtable.h>
#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
@@ -61,142 +60,6 @@ static void rxrpc_dead_call_expired(unsigned long _call);
static void rxrpc_ack_time_expired(unsigned long _call);
static void rxrpc_resend_time_expired(unsigned long _call);
-static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
-static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
-
-/*
- * Hash function for rxrpc_call_hash
- */
-static unsigned long rxrpc_call_hashfunc(
- u8 in_clientflag,
- u32 cid,
- u32 call_id,
- u32 epoch,
- u16 service_id,
- sa_family_t family,
- void *localptr,
- unsigned int addr_size,
- const u8 *peer_addr)
-{
- const u16 *p;
- unsigned int i;
- unsigned long key;
-
- _enter("");
-
- key = (unsigned long)localptr;
- /* We just want to add up the __be32 values, so forcing the
- * cast should be okay.
- */
- key += epoch;
- key += service_id;
- key += call_id;
- key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
- key += cid & RXRPC_CHANNELMASK;
- key += in_clientflag;
- key += family;
- /* Step through the peer address in 16-bit portions for speed */
- for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
- key += *p;
- _leave(" key = 0x%lx", key);
- return key;
-}
-
-/*
- * Add a call to the hashtable
- */
-static void rxrpc_call_hash_add(struct rxrpc_call *call)
-{
- unsigned long key;
- unsigned int addr_size = 0;
-
- _enter("");
- switch (call->family) {
- case AF_INET:
- addr_size = sizeof(call->peer_ip.ipv4_addr);
- break;
- case AF_INET6:
- addr_size = sizeof(call->peer_ip.ipv6_addr);
- break;
- default:
- break;
- }
- key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
- call->call_id, call->epoch,
- call->service_id, call->family,
- call->conn->params.local, addr_size,
- call->peer_ip.ipv6_addr);
- /* Store the full key in the call */
- call->hash_key = key;
- spin_lock(&rxrpc_call_hash_lock);
- hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
- spin_unlock(&rxrpc_call_hash_lock);
- _leave("");
-}
-
-/*
- * Remove a call from the hashtable
- */
-static void rxrpc_call_hash_del(struct rxrpc_call *call)
-{
- _enter("");
- spin_lock(&rxrpc_call_hash_lock);
- hash_del_rcu(&call->hash_node);
- spin_unlock(&rxrpc_call_hash_lock);
- _leave("");
-}
-
-/*
- * Find a call in the hashtable and return it, or NULL if it
- * isn't there.
- */
-struct rxrpc_call *rxrpc_find_call_hash(
- struct rxrpc_host_header *hdr,
- void *localptr,
- sa_family_t family,
- const void *peer_addr)
-{
- unsigned long key;
- unsigned int addr_size = 0;
- struct rxrpc_call *call = NULL;
- struct rxrpc_call *ret = NULL;
- u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
-
- _enter("");
- switch (family) {
- case AF_INET:
- addr_size = sizeof(call->peer_ip.ipv4_addr);
- break;
- case AF_INET6:
- addr_size = sizeof(call->peer_ip.ipv6_addr);
- break;
- default:
- break;
- }
-
- key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
- hdr->epoch, hdr->serviceId,
- family, localptr, addr_size,
- peer_addr);
- hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
- if (call->hash_key == key &&
- call->call_id == hdr->callNumber &&
- call->cid == hdr->cid &&
- call->in_clientflag == in_clientflag &&
- call->service_id == hdr->serviceId &&
- call->family == family &&
- call->local == localptr &&
- memcmp(call->peer_ip.ipv6_addr, peer_addr,
- addr_size) == 0 &&
- call->epoch == hdr->epoch) {
- ret = call;
- break;
- }
- }
- _leave(" = %p", ret);
- return ret;
-}
-
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -305,20 +168,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
call->socket = rx;
call->rx_data_post = 1;
- /* Record copies of information for hashtable lookup */
- call->family = rx->family;
call->local = rx->local;
- switch (call->family) {
- case AF_INET:
- call->peer_ip.ipv4_addr = srx->transport.sin.sin_addr.s_addr;
- break;
- case AF_INET6:
- memcpy(call->peer_ip.ipv6_addr,
- srx->transport.sin6.sin6_addr.in6_u.u6_addr8,
- sizeof(call->peer_ip.ipv6_addr));
- break;
- }
-
call->service_id = srx->srx_service;
call->in_clientflag = 0;
@@ -345,9 +195,6 @@ static int rxrpc_begin_client_call(struct rxrpc_call *call,
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- /* Add the new call to the hashtable */
- rxrpc_call_hash_add(call);
-
spin_lock(&call->conn->params.peer->lock);
hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
spin_unlock(&call->conn->params.peer->lock);
@@ -425,9 +272,10 @@ error:
rxrpc_put_call(call);
write_lock_bh(&rxrpc_call_lock);
- list_del(&call->link);
+ list_del_init(&call->link);
write_unlock_bh(&rxrpc_call_lock);
+ call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -439,6 +287,7 @@ error:
*/
found_user_ID_now_present:
write_unlock(&rx->call_lock);
+ call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = -EEXIST [%p]", call);
return ERR_PTR(-EEXIST);
@@ -454,8 +303,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_call *call, *candidate;
- struct rb_node **p, *parent;
- u32 call_id;
+ u32 call_id, chan;
_enter(",%d", conn->debug_id);
@@ -465,20 +313,23 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
if (!candidate)
return ERR_PTR(-EBUSY);
+ chan = sp->hdr.cid & RXRPC_CHANNELMASK;
candidate->socket = rx;
candidate->conn = conn;
candidate->cid = sp->hdr.cid;
candidate->call_id = sp->hdr.callNumber;
- candidate->channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ candidate->channel = chan;
candidate->rx_data_post = 0;
candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
if (conn->security_ix > 0)
candidate->state = RXRPC_CALL_SERVER_SECURING;
- write_lock_bh(&conn->lock);
+ spin_lock(&conn->channel_lock);
/* set the channel for this call */
- call = conn->channels[candidate->channel];
+ call = rcu_dereference_protected(conn->channels[chan].call,
+ lockdep_is_held(&conn->channel_lock));
+
_debug("channel[%u] is %p", candidate->channel, call);
if (call && call->call_id == sp->hdr.callNumber) {
/* already set; must've been a duplicate packet */
@@ -507,9 +358,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
call->debug_id, rxrpc_call_states[call->state]);
if (call->state >= RXRPC_CALL_COMPLETE) {
- conn->channels[call->channel] = NULL;
+ __rxrpc_disconnect_call(call);
} else {
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -EBUSY");
return ERR_PTR(-EBUSY);
@@ -519,33 +370,22 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
/* check the call number isn't duplicate */
_debug("check dup");
call_id = sp->hdr.callNumber;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- call = rb_entry(parent, struct rxrpc_call, conn_node);
-
- /* The tree is sorted in order of the __be32 value without
- * turning it into host order.
- */
- if (call_id < call->call_id)
- p = &(*p)->rb_left;
- else if (call_id > call->call_id)
- p = &(*p)->rb_right;
- else
- goto old_call;
- }
+
+ /* We just ignore calls prior to the current call ID. Terminated calls
+ * are handled via the connection.
+ */
+ if (call_id <= conn->channels[chan].call_counter)
+ goto old_call; /* TODO: Just drop packet */
/* make the call available */
_debug("new call");
call = candidate;
candidate = NULL;
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
- conn->channels[call->channel] = call;
+ conn->channels[chan].call_counter = call_id;
+ rcu_assign_pointer(conn->channels[chan].call, call);
sock_hold(&rx->sk);
rxrpc_get_connection(conn);
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
spin_lock(&conn->params.peer->lock);
hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
@@ -555,27 +395,10 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
- /* Record copies of information for hashtable lookup */
- call->family = rx->family;
call->local = conn->params.local;
- switch (call->family) {
- case AF_INET:
- call->peer_ip.ipv4_addr =
- conn->params.peer->srx.transport.sin.sin_addr.s_addr;
- break;
- case AF_INET6:
- memcpy(call->peer_ip.ipv6_addr,
- conn->params.peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
- sizeof(call->peer_ip.ipv6_addr));
- break;
- default:
- break;
- }
call->epoch = conn->proto.epoch;
call->service_id = conn->params.service_id;
- call->in_clientflag = conn->proto.in_clientflag;
- /* Add the new call to the hashtable */
- rxrpc_call_hash_add(call);
+ call->in_clientflag = RXRPC_CLIENT_INITIATED;
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -585,19 +408,19 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
return call;
extant_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
return call;
aborted_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -ECONNABORTED");
return ERR_PTR(-ECONNABORTED);
old_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -ECONNRESET [old]");
return ERR_PTR(-ECONNRESET);
@@ -626,6 +449,10 @@ void rxrpc_release_call(struct rxrpc_call *call)
*/
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+ spin_lock(&conn->params.peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&conn->params.peer->lock);
+
write_lock_bh(&rx->call_lock);
if (!list_empty(&call->accept_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -641,24 +468,17 @@ void rxrpc_release_call(struct rxrpc_call *call)
write_unlock_bh(&rx->call_lock);
/* free up the channel for reuse */
- spin_lock(&conn->channel_lock);
- write_lock_bh(&conn->lock);
- write_lock(&call->state_lock);
-
- rxrpc_disconnect_call(call);
-
- spin_unlock(&conn->channel_lock);
+ write_lock_bh(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE &&
call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
_debug("+++ ABORTING STATE %d +++\n", call->state);
call->state = RXRPC_CALL_LOCALLY_ABORTED;
call->local_abort = RX_CALL_DEAD;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
}
- write_unlock(&call->state_lock);
- write_unlock_bh(&conn->lock);
+ write_unlock_bh(&call->state_lock);
+
+ rxrpc_disconnect_call(call);
/* clean up the Rx queue */
if (!skb_queue_empty(&call->rx_queue) ||
@@ -792,6 +612,17 @@ void __rxrpc_put_call(struct rxrpc_call *call)
}
/*
+ * Final call destruction under RCU.
+ */
+static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+{
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+
+ rxrpc_purge_queue(&call->rx_queue);
+ kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
* clean up a call
*/
static void rxrpc_cleanup_call(struct rxrpc_call *call)
@@ -815,19 +646,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
return;
}
- if (call->conn) {
- spin_lock(&call->conn->params.peer->lock);
- hlist_del_init(&call->error_link);
- spin_unlock(&call->conn->params.peer->lock);
-
- write_lock_bh(&call->conn->lock);
- rb_erase(&call->conn_node, &call->conn->calls);
- write_unlock_bh(&call->conn->lock);
- rxrpc_put_connection(call->conn);
- }
-
- /* Remove the call from the hash */
- rxrpc_call_hash_del(call);
+ ASSERTCMP(call->conn, ==, NULL);
if (call->acks_window) {
_debug("kill Tx window %d",
@@ -855,7 +674,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
rxrpc_purge_queue(&call->rx_queue);
ASSERT(skb_queue_empty(&call->rx_oos_queue));
sock_put(&call->socket->sk);
- kmem_cache_free(rxrpc_call_jar, call);
+ call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
}
/*
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 82488d6adb83..9e91f27b0d0f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -33,7 +33,8 @@ static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
* client conns away from the current allocation point to try and keep the IDs
* concentrated. We will also need to retire connections from an old epoch.
*/
-int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp)
+static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
+ gfp_t gfp)
{
u32 epoch;
int id;
@@ -83,7 +84,7 @@ error:
/*
* Release a connection ID for a client connection from the global pool.
*/
-void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
{
if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
spin_lock(&rxrpc_conn_id_lock);
@@ -92,3 +93,280 @@ void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
spin_unlock(&rxrpc_conn_id_lock);
}
}
+
+/*
+ * Destroy the client connection ID tree.
+ */
+void rxrpc_destroy_client_conn_ids(void)
+{
+ struct rxrpc_connection *conn;
+ int id;
+
+ if (!idr_is_empty(&rxrpc_client_conn_ids)) {
+ idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+ conn, atomic_read(&conn->usage));
+ }
+ BUG();
+ }
+
+ idr_destroy(&rxrpc_client_conn_ids);
+}
+
+/*
+ * Allocate a client connection. The caller must take care to clear any
+ * padding bytes in *cp.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+ int ret;
+
+ _enter("");
+
+ conn = rxrpc_alloc_connection(gfp);
+ if (!conn) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ conn->params = *cp;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->state = RXRPC_CONN_CLIENT;
+
+ ret = rxrpc_get_client_connection_id(conn, gfp);
+ if (ret < 0)
+ goto error_0;
+
+ ret = rxrpc_init_client_conn_security(conn);
+ if (ret < 0)
+ goto error_1;
+
+ ret = conn->security->prime_packet_security(conn);
+ if (ret < 0)
+ goto error_2;
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* We steal the caller's peer ref. */
+ cp->peer = NULL;
+ rxrpc_get_local(conn->params.local);
+ key_get(conn->params.key);
+
+ _leave(" = %p", conn);
+ return conn;
+
+error_2:
+ conn->security->clear(conn);
+error_1:
+ rxrpc_put_client_connection_id(conn);
+error_0:
+ kfree(conn);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rxrpc_local *local = cp->local;
+ struct rb_node *p, **pp, *parent;
+ long diff;
+ int chan;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+ if (!cp->peer)
+ return -ENOMEM;
+
+ if (!cp->exclusive) {
+ /* Search for a existing client connection unless this is going
+ * to be a connection that's used exclusively for a single call.
+ */
+ _debug("search 1");
+ spin_lock(&local->client_conns_lock);
+ p = local->client_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, client_node);
+
+#define cmp(X) ((long)conn->params.X - (long)cp->X)
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ p = p->rb_left;
+ else if (diff > 0)
+ p = p->rb_right;
+ else
+ goto found_extant_conn;
+ }
+ spin_unlock(&local->client_conns_lock);
+ }
+
+ /* We didn't find a connection or we want an exclusive one. */
+ _debug("get new conn");
+ candidate = rxrpc_alloc_client_connection(cp, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ if (cp->exclusive) {
+ /* Assign the call on an exclusive connection to channel 0 and
+ * don't add the connection to the endpoint's shareable conn
+ * lookup tree.
+ */
+ _debug("exclusive chan 0");
+ conn = candidate;
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ chan = 0;
+ goto found_channel;
+ }
+
+ /* We need to redo the search before attempting to add a new connection
+ * lest we race with someone else adding a conflicting instance.
+ */
+ _debug("search 2");
+ spin_lock(&local->client_conns_lock);
+
+ pp = &local->client_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ conn = rb_entry(parent, struct rxrpc_connection, client_node);
+
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ pp = &(*pp)->rb_left;
+ else if (diff > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ /* The second search also failed; simply add the new connection with
+ * the new call in channel 0. Note that we need to take the channel
+ * lock before dropping the client conn lock.
+ */
+ _debug("new conn");
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ rb_link_node(&candidate->client_node, parent, pp);
+ rb_insert_color(&candidate->client_node, &local->client_conns);
+attached:
+ conn = candidate;
+ candidate = NULL;
+
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ spin_unlock(&local->client_conns_lock);
+ chan = 0;
+
+found_channel:
+ _debug("found chan");
+ call->conn = conn;
+ call->channel = chan;
+ call->epoch = conn->proto.epoch;
+ call->cid = conn->proto.cid | chan;
+ call->call_id = ++conn->channels[chan].call_counter;
+ conn->channels[chan].call_id = call->call_id;
+ rcu_assign_pointer(conn->channels[chan].call, call);
+
+ _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+
+ spin_unlock(&conn->channel_lock);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return 0;
+
+ /* We found a potentially suitable connection already in existence. If
+ * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
+ * reaper), discard any candidate we may have allocated, and try to get
+ * a channel on this one, otherwise we have to replace it.
+ */
+found_extant_conn:
+ _debug("found conn");
+ if (!rxrpc_get_connection_maybe(conn)) {
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ rb_replace_node(&conn->client_node,
+ &candidate->client_node,
+ &local->client_conns);
+ clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+ goto attached;
+ }
+
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_connection(candidate);
+
+ if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
+ if (!gfpflags_allow_blocking(gfp)) {
+ rxrpc_put_connection(conn);
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
+ }
+
+ add_wait_queue(&conn->channel_wq, &myself);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (atomic_add_unless(&conn->avail_chans, -1, 0))
+ break;
+ if (signal_pending(current))
+ goto interrupted;
+ schedule();
+ }
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ }
+
+ /* The connection allegedly now has a free channel and we can now
+ * attach the call to it.
+ */
+ spin_lock(&conn->channel_lock);
+
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan].call)
+ goto found_channel;
+ BUG();
+
+interrupted:
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ rxrpc_put_connection(conn);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+}
+
+/*
+ * Remove a client connection from the local endpoint's tree, thereby removing
+ * it as a target for reuse for new client calls.
+ */
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_local *local = conn->params.local;
+
+ spin_lock(&local->client_conns_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
+ rb_erase(&conn->client_node, &local->client_conns);
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_client_connection_id(conn);
+}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index bf6971555eac..cee0f35bc1cf 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -31,15 +31,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
u32 abort_code)
{
struct rxrpc_call *call;
- struct rb_node *p;
+ int i;
_enter("{%d},%x", conn->debug_id, abort_code);
- read_lock_bh(&conn->lock);
+ spin_lock(&conn->channel_lock);
- for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
- call = rb_entry(p, struct rxrpc_call, conn_node);
- write_lock(&call->state_lock);
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ call = rcu_dereference_protected(
+ conn->channels[i].call,
+ lockdep_is_held(&conn->channel_lock));
+ write_lock_bh(&call->state_lock);
if (call->state <= RXRPC_CALL_COMPLETE) {
call->state = state;
if (state == RXRPC_CALL_LOCALLY_ABORTED) {
@@ -51,10 +53,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
}
rxrpc_queue_call(call);
}
- write_unlock(&call->state_lock);
+ write_unlock_bh(&call->state_lock);
}
- read_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
_leave("");
}
@@ -188,18 +190,24 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- conn->security->prime_packet_security(conn);
- read_lock_bh(&conn->lock);
+ ret = conn->security->prime_packet_security(conn);
+ if (ret < 0)
+ return ret;
+
+ spin_lock(&conn->channel_lock);
spin_lock(&conn->state_lock);
- if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
- conn->state = RXRPC_CONN_SERVER;
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+ conn->state = RXRPC_CONN_SERVICE;
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(conn->channels[loop]);
+ rxrpc_call_is_secure(
+ rcu_dereference_protected(
+ conn->channels[loop].call,
+ lockdep_is_held(&conn->channel_lock)));
}
spin_unlock(&conn->state_lock);
- read_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
return 0;
default:
@@ -263,12 +271,8 @@ void rxrpc_process_connection(struct work_struct *work)
_enter("{%d}", conn->debug_id);
- rxrpc_get_connection(conn);
-
- if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+ if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
- rxrpc_put_connection(conn);
- }
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
@@ -283,7 +287,6 @@ void rxrpc_process_connection(struct work_struct *work)
goto requeue_and_leave;
case -ECONNABORTED:
default:
- rxrpc_put_connection(conn);
rxrpc_free_skb(skb);
break;
}
@@ -301,7 +304,6 @@ requeue_and_leave:
protocol_error:
if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
goto requeue_and_leave;
- rxrpc_put_connection(conn);
rxrpc_free_skb(skb);
_leave(" [EPROTO]");
goto out;
@@ -315,7 +317,7 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
CHECK_SLAB_OKAY(&local->usage);
skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_work(&local->processor);
+ rxrpc_queue_local(local);
}
/*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4bfad7cf96cb..896d84493a05 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/skbuff.h>
-#include <linux/crypto.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -34,7 +33,7 @@ static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
/*
* allocate a new connection
*/
-static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
{
struct rxrpc_connection *conn;
@@ -46,12 +45,13 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
init_waitqueue_head(&conn->channel_wq);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
INIT_LIST_HEAD(&conn->link);
- conn->calls = RB_ROOT;
skb_queue_head_init(&conn->rx_queue);
conn->security = &rxrpc_no_security;
- rwlock_init(&conn->lock);
spin_lock_init(&conn->state_lock);
- atomic_set(&conn->usage, 1);
+ /* We maintain an extra ref on the connection whilst it is
+ * on the rxrpc_connections list.
+ */
+ atomic_set(&conn->usage, 2);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
conn->size_align = 4;
@@ -63,465 +63,118 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
}
/*
- * add a call to a connection's call-by-ID tree
- */
-static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
- struct rxrpc_call *call)
-{
- struct rxrpc_call *xcall;
- struct rb_node *parent, **p;
- __be32 call_id;
-
- write_lock_bh(&conn->lock);
-
- call_id = call->call_id;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- xcall = rb_entry(parent, struct rxrpc_call, conn_node);
-
- if (call_id < xcall->call_id)
- p = &(*p)->rb_left;
- else if (call_id > xcall->call_id)
- p = &(*p)->rb_right;
- else
- BUG();
- }
-
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
-
- write_unlock_bh(&conn->lock);
-}
-
-/*
- * Allocate a client connection. The caller must take care to clear any
- * padding bytes in *cp.
+ * Look up a connection in the cache by protocol parameters.
+ *
+ * If successful, a pointer to the connection is returned, but no ref is taken.
+ * NULL is returned if there is no match.
+ *
+ * The caller must be holding the RCU read lock.
*/
-static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+ struct sk_buff *skb)
{
struct rxrpc_connection *conn;
- int ret;
-
- _enter("");
-
- conn = rxrpc_alloc_connection(gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- conn->params = *cp;
- conn->proto.local = cp->local;
- conn->proto.epoch = rxrpc_epoch;
- conn->proto.cid = 0;
- conn->proto.in_clientflag = 0;
- conn->proto.family = cp->peer->srx.transport.family;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->state = RXRPC_CONN_CLIENT;
-
- switch (conn->proto.family) {
- case AF_INET:
- conn->proto.addr_size = sizeof(conn->proto.ipv4_addr);
- conn->proto.ipv4_addr = cp->peer->srx.transport.sin.sin_addr;
- conn->proto.port = cp->peer->srx.transport.sin.sin_port;
- break;
- }
-
- ret = rxrpc_get_client_connection_id(conn, gfp);
- if (ret < 0)
- goto error_0;
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0)
- goto error_1;
-
- conn->security->prime_packet_security(conn);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
-
- /* We steal the caller's peer ref. */
- cp->peer = NULL;
- rxrpc_get_local(conn->params.local);
- key_get(conn->params.key);
-
- _leave(" = %p", conn);
- return conn;
-
-error_1:
- rxrpc_put_client_connection_id(conn);
-error_0:
- kfree(conn);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
- */
-int rxrpc_connect_call(struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rxrpc_local *local = cp->local;
- struct rb_node *p, **pp, *parent;
- long diff;
- int chan;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
- DECLARE_WAITQUEUE(myself, current);
+ _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ goto not_found;
- cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
- if (!cp->peer)
- return -ENOMEM;
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
- if (!cp->exclusive) {
- /* Search for a existing client connection unless this is going
- * to be a connection that's used exclusively for a single call.
- */
- _debug("search 1");
- spin_lock(&local->client_conns_lock);
- p = local->client_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, client_node);
-
-#define cmp(X) ((long)conn->params.X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level));
- if (diff < 0)
- p = p->rb_left;
- else if (diff > 0)
- p = p->rb_right;
- else
- goto found_extant_conn;
- }
- spin_unlock(&local->client_conns_lock);
+ /* We may have to handle mixing IPv4 and IPv6 */
+ if (srx.transport.family != local->srx.transport.family) {
+ pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+ srx.transport.family,
+ local->srx.transport.family);
+ goto not_found;
}
- /* We didn't find a connection or we want an exclusive one. */
- _debug("get new conn");
- candidate = rxrpc_alloc_client_connection(cp, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
- if (cp->exclusive) {
- /* Assign the call on an exclusive connection to channel 0 and
- * don't add the connection to the endpoint's shareable conn
- * lookup tree.
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+ /* We need to look up service connections by the full protocol
+ * parameter set. We look up the peer first as an intermediate
+ * step and then the connection from the peer's tree.
*/
- _debug("exclusive chan 0");
- conn = candidate;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- chan = 0;
- goto found_channel;
- }
-
- /* We need to redo the search before attempting to add a new connection
- * lest we race with someone else adding a conflicting instance.
- */
- _debug("search 2");
- spin_lock(&local->client_conns_lock);
-
- pp = &local->client_conns.rb_node;
- parent = NULL;
- while (*pp) {
- parent = *pp;
- conn = rb_entry(parent, struct rxrpc_connection, client_node);
-
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level));
- if (diff < 0)
- pp = &(*pp)->rb_left;
- else if (diff > 0)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_conn;
- }
-
- /* The second search also failed; simply add the new connection with
- * the new call in channel 0. Note that we need to take the channel
- * lock before dropping the client conn lock.
- */
- _debug("new conn");
- conn = candidate;
- candidate = NULL;
-
- rb_link_node(&conn->client_node, parent, pp);
- rb_insert_color(&conn->client_node, &local->client_conns);
-
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- spin_unlock(&local->client_conns_lock);
- chan = 0;
-
-found_channel:
- _debug("found chan");
- call->conn = conn;
- call->channel = chan;
- call->epoch = conn->proto.epoch;
- call->cid = conn->proto.cid | chan;
- call->call_id = ++conn->call_counter;
- rcu_assign_pointer(conn->channels[chan], call);
-
- _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
-
- rxrpc_add_call_ID_to_conn(conn, call);
- spin_unlock(&conn->channel_lock);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return 0;
-
- /* We found a suitable connection already in existence. Discard any
- * candidate we may have allocated, and try to get a channel on this
- * one.
- */
-found_extant_conn:
- _debug("found conn");
- rxrpc_get_connection(conn);
- spin_unlock(&local->client_conns_lock);
-
- rxrpc_put_connection(candidate);
-
- if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
- if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_put_connection(conn);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ peer = rxrpc_lookup_peer_rcu(local, &srx);
+ if (!peer)
+ goto not_found;
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ if (!conn || atomic_read(&conn->usage) == 0)
+ goto not_found;
+ _leave(" = %p", conn);
+ return conn;
+ } else {
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
+ */
+ conn = idr_find(&rxrpc_client_conn_ids,
+ sp->hdr.cid >> RXRPC_CIDSHIFT);
+ if (!conn || atomic_read(&conn->usage) == 0) {
+ _debug("no conn");
+ goto not_found;
}
- add_wait_queue(&conn->channel_wq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_add_unless(&conn->avail_chans, -1, 0))
- break;
- if (signal_pending(current))
- goto interrupted;
- schedule();
+ if (conn->proto.epoch != k.epoch ||
+ conn->params.local != local)
+ goto not_found;
+
+ peer = conn->params.peer;
+ switch (srx.transport.family) {
+ case AF_INET:
+ if (peer->srx.transport.sin.sin_port !=
+ srx.transport.sin.sin_port ||
+ peer->srx.transport.sin.sin_addr.s_addr !=
+ srx.transport.sin.sin_addr.s_addr)
+ goto not_found;
+ break;
+ default:
+ BUG();
}
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
- }
-
- /* The connection allegedly now has a free channel and we can now
- * attach the call to it.
- */
- spin_lock(&conn->channel_lock);
-
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan])
- goto found_channel;
- BUG();
-
-interrupted:
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
- rxrpc_put_connection(conn);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = -ERESTARTSYS");
- return -ERESTARTSYS;
-}
-
-/*
- * get a record of an incoming connection
- */
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct sk_buff *skb)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rb_node *p, **pp;
- const char *new = "old";
- __be32 epoch;
- u32 cid;
-
- _enter("");
-
- ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
- epoch = sp->hdr.epoch;
- cid = sp->hdr.cid & RXRPC_CIDMASK;
-
- /* search the connection list first */
- read_lock_bh(&peer->conn_lock);
-
- p = peer->service_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, service_node);
- _debug("maybe %x", conn->proto.cid);
-
- if (epoch < conn->proto.epoch)
- p = p->rb_left;
- else if (epoch > conn->proto.epoch)
- p = p->rb_right;
- else if (cid < conn->proto.cid)
- p = p->rb_left;
- else if (cid > conn->proto.cid)
- p = p->rb_right;
- else
- goto found_extant_connection;
- }
- read_unlock_bh(&peer->conn_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_connection(GFP_NOIO);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _leave(" = %p", conn);
+ return conn;
}
- candidate->proto.local = local;
- candidate->proto.epoch = sp->hdr.epoch;
- candidate->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
- candidate->proto.in_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->params.local = local;
- candidate->params.peer = peer;
- candidate->params.service_id = sp->hdr.serviceId;
- candidate->security_ix = sp->hdr.securityIndex;
- candidate->out_clientflag = 0;
- candidate->state = RXRPC_CONN_SERVER;
- if (candidate->params.service_id)
- candidate->state = RXRPC_CONN_SERVER_UNSECURED;
-
- write_lock_bh(&peer->conn_lock);
-
- pp = &peer->service_conns.rb_node;
- p = NULL;
- while (*pp) {
- p = *pp;
- conn = rb_entry(p, struct rxrpc_connection, service_node);
-
- if (epoch < conn->proto.epoch)
- pp = &(*pp)->rb_left;
- else if (epoch > conn->proto.epoch)
- pp = &(*pp)->rb_right;
- else if (cid < conn->proto.cid)
- pp = &(*pp)->rb_left;
- else if (cid > conn->proto.cid)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_second;
- }
-
- /* we can now add the new candidate to the list */
- conn = candidate;
- candidate = NULL;
- rb_link_node(&conn->service_node, p, pp);
- rb_insert_color(&conn->service_node, &peer->service_conns);
- rxrpc_get_peer(peer);
- rxrpc_get_local(local);
-
- write_unlock_bh(&peer->conn_lock);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
-
- new = "new";
-
-success:
- _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
-
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return conn;
-
- /* we found the connection in the list immediately */
-found_extant_connection:
- if (sp->hdr.securityIndex != conn->security_ix) {
- read_unlock_bh(&peer->conn_lock);
- goto security_mismatch;
- }
- rxrpc_get_connection(conn);
- read_unlock_bh(&peer->conn_lock);
- goto success;
-
- /* we found the connection on the second time through the list */
-found_extant_second:
- if (sp->hdr.securityIndex != conn->security_ix) {
- write_unlock_bh(&peer->conn_lock);
- goto security_mismatch;
- }
- rxrpc_get_connection(conn);
- write_unlock_bh(&peer->conn_lock);
- kfree(candidate);
- goto success;
-
-security_mismatch:
- kfree(candidate);
- _leave(" = -EKEYREJECTED");
- return ERR_PTR(-EKEYREJECTED);
+not_found:
+ _leave(" = NULL");
+ return NULL;
}
/*
- * find a connection based on transport and RxRPC connection ID for an incoming
- * packet
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates. The caller must hold the channel_lock and must release the
+ * call's ref on the connection.
*/
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct sk_buff *skb)
+void __rxrpc_disconnect_call(struct rxrpc_call *call)
{
- struct rxrpc_connection *conn;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rb_node *p;
- u32 epoch, cid;
-
- _enter(",{%x,%x}", sp->hdr.cid, sp->hdr.flags);
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[call->channel];
- read_lock_bh(&peer->conn_lock);
+ _enter("%d,%d", conn->debug_id, call->channel);
- cid = sp->hdr.cid & RXRPC_CIDMASK;
- epoch = sp->hdr.epoch;
+ if (rcu_access_pointer(chan->call) == call) {
+ /* Save the result of the call so that we can repeat it if necessary
+ * through the channel, whilst disposing of the actual call record.
+ */
+ chan->last_result = call->local_abort;
+ smp_wmb();
+ chan->last_call = chan->call_id;
+ chan->call_id = chan->call_counter;
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
- p = peer->service_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, service_node);
-
- _debug("maybe %x", conn->proto.cid);
-
- if (epoch < conn->proto.epoch)
- p = p->rb_left;
- else if (epoch > conn->proto.epoch)
- p = p->rb_right;
- else if (cid < conn->proto.cid)
- p = p->rb_left;
- else if (cid > conn->proto.cid)
- p = p->rb_right;
- else
- goto found;
- }
- } else {
- conn = idr_find(&rxrpc_client_conn_ids, cid >> RXRPC_CIDSHIFT);
- if (conn && conn->proto.epoch == epoch)
- goto found;
+ rcu_assign_pointer(chan->call, NULL);
+ atomic_inc(&conn->avail_chans);
+ wake_up(&conn->channel_wq);
}
- read_unlock_bh(&peer->conn_lock);
- _leave(" = NULL");
- return NULL;
-
-found:
- rxrpc_get_connection(conn);
- read_unlock_bh(&peer->conn_lock);
- _leave(" = %p", conn);
- return conn;
+ _leave("");
}
/*
@@ -531,15 +184,13 @@ found:
void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- unsigned chan = call->channel;
- _enter("%d,%d", conn->debug_id, call->channel);
+ spin_lock(&conn->channel_lock);
+ __rxrpc_disconnect_call(call);
+ spin_unlock(&conn->channel_lock);
- if (conn->channels[chan] == call) {
- rcu_assign_pointer(conn->channels[chan], NULL);
- atomic_inc(&conn->avail_chans);
- wake_up(&conn->channel_wq);
- }
+ call->conn = NULL;
+ rxrpc_put_connection(conn);
}
/*
@@ -553,10 +204,10 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
_enter("%p{u=%d,d=%d}",
conn, atomic_read(&conn->usage), conn->debug_id);
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
+ ASSERTCMP(atomic_read(&conn->usage), >, 1);
conn->put_time = ktime_get_seconds();
- if (atomic_dec_and_test(&conn->usage)) {
+ if (atomic_dec_return(&conn->usage) == 1) {
_debug("zombie");
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
}
@@ -567,15 +218,17 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
/*
* destroy a virtual connection
*/
-static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+static void rxrpc_destroy_connection(struct rcu_head *rcu)
{
- _enter("%p{%d}", conn, atomic_read(&conn->usage));
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+
+ _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
_net("DESTROY CONN %d", conn->debug_id);
- ASSERT(RB_EMPTY_ROOT(&conn->calls));
rxrpc_purge_queue(&conn->rx_queue);
conn->security->clear(conn);
@@ -594,59 +247,41 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
static void rxrpc_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
- struct rxrpc_peer *peer;
- unsigned long now, earliest, reap_time;
+ unsigned long reap_older_than, earliest, put_time, now;
LIST_HEAD(graveyard);
_enter("");
now = ktime_get_seconds();
+ reap_older_than = now - rxrpc_connection_expiry;
earliest = ULONG_MAX;
write_lock(&rxrpc_connection_lock);
list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
- (long) now - (long) conn->put_time);
-
- if (likely(atomic_read(&conn->usage) > 0))
+ ASSERTCMP(atomic_read(&conn->usage), >, 0);
+ if (likely(atomic_read(&conn->usage) > 1))
continue;
- if (rxrpc_conn_is_client(conn)) {
- struct rxrpc_local *local = conn->params.local;
- spin_lock(&local->client_conns_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- rxrpc_put_client_connection_id(conn);
- rb_erase(&conn->client_node,
- &local->client_conns);
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- spin_unlock(&local->client_conns_lock);
- } else {
- peer = conn->params.peer;
- write_lock_bh(&peer->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- rb_erase(&conn->service_node,
- &peer->service_conns);
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- write_unlock_bh(&peer->conn_lock);
+ put_time = READ_ONCE(conn->put_time);
+ if (time_after(put_time, reap_older_than)) {
+ if (time_before(put_time, earliest))
+ earliest = put_time;
+ continue;
}
+
+ /* The usage count sits at 1 whilst the object is unused on the
+ * list; we reduce that to 0 to make the object unavailable.
+ */
+ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+ continue;
+
+ if (rxrpc_conn_is_client(conn))
+ rxrpc_unpublish_client_conn(conn);
+ else
+ rxrpc_unpublish_service_conn(conn);
+
+ list_move_tail(&conn->link, &graveyard);
}
write_unlock(&rxrpc_connection_lock);
@@ -657,14 +292,14 @@ static void rxrpc_connection_reaper(struct work_struct *work)
(earliest - now) * HZ);
}
- /* then destroy all those pulled out */
while (!list_empty(&graveyard)) {
conn = list_entry(graveyard.next, struct rxrpc_connection,
link);
list_del_init(&conn->link);
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
- rxrpc_destroy_connection(conn);
+ skb_queue_purge(&conn->rx_queue);
+ call_rcu(&conn->rcu, rxrpc_destroy_connection);
}
_leave("");
@@ -676,11 +311,30 @@ static void rxrpc_connection_reaper(struct work_struct *work)
*/
void __exit rxrpc_destroy_all_connections(void)
{
+ struct rxrpc_connection *conn, *_p;
+ bool leak = false;
+
_enter("");
rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ flush_workqueue(rxrpc_workqueue);
+
+ write_lock(&rxrpc_connection_lock);
+ list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+ conn, atomic_read(&conn->usage));
+ leak = true;
+ }
+ write_unlock(&rxrpc_connection_lock);
+ BUG_ON(leak);
+
+ /* Make sure the local and peer records pinned by any dying connections
+ * are released.
+ */
+ rcu_barrier();
+ rxrpc_destroy_client_conn_ids();
_leave("");
}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
new file mode 100644
index 000000000000..fd9027ccba8f
--- /dev/null
+++ b/net/rxrpc/conn_service.c
@@ -0,0 +1,230 @@
+/* Service connection management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+/*
+ * Find a service connection under RCU conditions.
+ *
+ * We could use a hash table, but that is subject to bucket stuffing by an
+ * attacker as the client gets to pick the epoch and cid values and would know
+ * the hash function. So, instead, we use a hash table for the peer and from
+ * that an rbtree to find the service connection. Under ordinary circumstances
+ * it might be slower than a large hash table, but it is at least limited in
+ * depth.
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rb_node *p;
+ unsigned int seq = 0;
+
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
+
+ p = rcu_dereference_raw(peer->service_conns.rb_node);
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ if (conn->proto.index_key < k.index_key)
+ p = rcu_dereference_raw(p->rb_left);
+ else if (conn->proto.index_key > k.index_key)
+ p = rcu_dereference_raw(p->rb_right);
+ else
+ goto done;
+ conn = NULL;
+ }
+ } while (need_seqretry(&peer->service_conn_lock, seq));
+
+done:
+ done_seqretry(&peer->service_conn_lock, seq);
+ _leave(" = %d", conn ? conn->debug_id : -1);
+ return conn;
+}
+
+/*
+ * Insert a service connection into a peer's tree, thereby making it a target
+ * for incoming packets.
+ */
+static struct rxrpc_connection *
+rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *cursor = NULL;
+ struct rxrpc_conn_proto k = conn->proto;
+ struct rb_node **pp, *parent;
+
+ write_seqlock_bh(&peer->service_conn_lock);
+
+ pp = &peer->service_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ cursor = rb_entry(parent,
+ struct rxrpc_connection, service_node);
+
+ if (cursor->proto.index_key < k.index_key)
+ pp = &(*pp)->rb_left;
+ else if (cursor->proto.index_key > k.index_key)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ rb_link_node_rcu(&conn->service_node, parent, pp);
+ rb_insert_color(&conn->service_node, &peer->service_conns);
+conn_published:
+ set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
+ write_sequnlock_bh(&peer->service_conn_lock);
+ _leave(" = %d [new]", conn->debug_id);
+ return conn;
+
+found_extant_conn:
+ if (atomic_read(&cursor->usage) == 0)
+ goto replace_old_connection;
+ write_sequnlock_bh(&peer->service_conn_lock);
+ /* We should not be able to get here. rxrpc_incoming_connection() is
+ * called in a non-reentrant context, so there can't be a race to
+ * insert a new connection.
+ */
+ BUG();
+
+replace_old_connection:
+ /* The old connection is from an outdated epoch. */
+ _debug("replace conn");
+ rb_replace_node_rcu(&cursor->service_node,
+ &conn->service_node,
+ &peer->service_conns);
+ clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags);
+ goto conn_published;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_peer *peer;
+ const char *new = "old";
+
+ _enter("");
+
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (!peer) {
+ _debug("no peer");
+ return ERR_PTR(-EBUSY);
+ }
+
+ ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
+
+ rcu_read_lock();
+ peer = rxrpc_lookup_peer_rcu(local, srx);
+ if (peer) {
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ if (conn) {
+ if (sp->hdr.securityIndex != conn->security_ix)
+ goto security_mismatch_rcu;
+ if (rxrpc_get_connection_maybe(conn))
+ goto found_extant_connection_rcu;
+
+ /* The conn has expired but we can't remove it without
+ * the appropriate lock, so we attempt to replace it
+ * when we have a new candidate.
+ */
+ }
+
+ if (!rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ }
+ rcu_read_unlock();
+
+ if (!peer) {
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (!peer)
+ goto enomem;
+ }
+
+ /* We don't have a matching record yet. */
+ conn = rxrpc_alloc_connection(GFP_NOIO);
+ if (!conn)
+ goto enomem_peer;
+
+ conn->proto.epoch = sp->hdr.epoch;
+ conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
+ conn->params.local = local;
+ conn->params.peer = peer;
+ conn->params.service_id = sp->hdr.serviceId;
+ conn->security_ix = sp->hdr.securityIndex;
+ conn->out_clientflag = 0;
+ conn->state = RXRPC_CONN_SERVICE;
+ if (conn->params.service_id)
+ conn->state = RXRPC_CONN_SERVICE_UNSECURED;
+
+ rxrpc_get_local(local);
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* Make the connection a target for incoming packets. */
+ rxrpc_publish_service_conn(peer, conn);
+
+ new = "new";
+
+success:
+ _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return conn;
+
+found_extant_connection_rcu:
+ rcu_read_unlock();
+ goto success;
+
+security_mismatch_rcu:
+ rcu_read_unlock();
+ _leave(" = -EKEYREJECTED");
+ return ERR_PTR(-EKEYREJECTED);
+
+enomem_peer:
+ rxrpc_put_peer(peer);
+enomem:
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Remove the service connection from the peer's tree, thereby removing it as a
+ * target for incoming packets.
+ */
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_peer *peer = conn->params.peer;
+
+ write_seqlock_bh(&peer->service_conn_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
+ rb_erase(&conn->service_node, &peer->service_conns);
+ write_sequnlock_bh(&peer->service_conn_lock);
+}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index f4bd57b77b93..991a20d25093 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -476,7 +476,7 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
sp->hdr.seq += 1;
sp->hdr.serial += 1;
sp->hdr.flags = jhdr.flags;
- sp->hdr._rsvd = jhdr._rsvd;
+ sp->hdr._rsvd = ntohs(jhdr._rsvd);
_proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
@@ -575,14 +575,13 @@ done:
* post connection-level events to the connection
* - this includes challenges, responses and some aborts
*/
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
_enter("%p,%p", conn, skb);
- rxrpc_get_connection(conn);
skb_queue_tail(&conn->rx_queue, skb);
- rxrpc_queue_conn(conn);
+ return rxrpc_queue_conn(conn);
}
/*
@@ -595,7 +594,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
_enter("%p,%p", local, skb);
skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_work(&local->processor);
+ rxrpc_queue_local(local);
}
/*
@@ -627,32 +626,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
return 0;
}
-static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
- struct sk_buff *skb)
-{
- struct rxrpc_peer *peer;
- struct rxrpc_connection *conn;
- struct sockaddr_rxrpc srx;
-
- rxrpc_get_addr_from_skb(local, skb, &srx);
- rcu_read_lock();
- peer = rxrpc_lookup_peer_rcu(local, &srx);
- if (!peer)
- goto cant_find_peer;
-
- conn = rxrpc_find_connection(local, peer, skb);
- rcu_read_unlock();
- if (!conn)
- goto cant_find_conn;
-
- return conn;
-
-cant_find_peer:
- rcu_read_unlock();
-cant_find_conn:
- return NULL;
-}
-
/*
* handle data received on the local endpoint
* - may be called in interrupt context
@@ -663,6 +636,7 @@ cant_find_conn:
*/
void rxrpc_data_ready(struct sock *sk)
{
+ struct rxrpc_connection *conn;
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local = sk->sk_user_data;
struct sk_buff *skb;
@@ -726,34 +700,37 @@ void rxrpc_data_ready(struct sock *sk)
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
- if (sp->hdr.callNumber == 0) {
- /* This is a connection-level packet. These should be
- * fairly rare, so the extra overhead of looking them up the
- * old-fashioned way doesn't really hurt */
- struct rxrpc_connection *conn;
+ rcu_read_lock();
- conn = rxrpc_conn_from_local(local, skb);
- if (!conn)
- goto cant_route_call;
+retry_find_conn:
+ conn = rxrpc_find_connection_rcu(local, skb);
+ if (!conn)
+ goto cant_route_call;
+ if (sp->hdr.callNumber == 0) {
+ /* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
- rxrpc_post_packet_to_conn(conn, skb);
- rxrpc_put_connection(conn);
+ if (!rxrpc_post_packet_to_conn(conn, skb))
+ goto retry_find_conn;
} else {
- struct rxrpc_call *call;
+ /* Call-bound packets are routed by connection channel. */
+ unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_call *call = rcu_dereference(chan->call);
- call = rxrpc_find_call_hash(&sp->hdr, local,
- AF_INET, &ip_hdr(skb)->saddr);
- if (call)
- rxrpc_post_packet_to_call(call, skb);
- else
+ if (!call || atomic_read(&call->usage) == 0)
goto cant_route_call;
+
+ rxrpc_post_packet_to_call(call, skb);
}
+ rcu_read_unlock();
out:
return;
cant_route_call:
+ rcu_read_unlock();
+
_debug("can't route call");
if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index e571403613c1..c21ad213b337 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -17,11 +17,12 @@ static int none_init_connection_security(struct rxrpc_connection *conn)
return 0;
}
-static void none_prime_packet_security(struct rxrpc_connection *conn)
+static int none_prime_packet_security(struct rxrpc_connection *conn)
{
+ return 0;
}
-static int none_secure_packet(const struct rxrpc_call *call,
+static int none_secure_packet(struct rxrpc_call *call,
struct sk_buff *skb,
size_t data_size,
void *sechdr)
@@ -29,7 +30,7 @@ static int none_secure_packet(const struct rxrpc_call *call,
return 0;
}
-static int none_verify_packet(const struct rxrpc_call *call,
+static int none_verify_packet(struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 3ab7764f7cd8..a753796fbe8f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -374,14 +374,17 @@ void __exit rxrpc_destroy_all_locals(void)
_enter("");
- if (list_empty(&rxrpc_local_endpoints))
- return;
+ flush_workqueue(rxrpc_workqueue);
- mutex_lock(&rxrpc_local_mutex);
- list_for_each_entry(local, &rxrpc_local_endpoints, link) {
- pr_err("AF_RXRPC: Leaked local %p {%d}\n",
- local, atomic_read(&local->usage));
+ if (!list_empty(&rxrpc_local_endpoints)) {
+ mutex_lock(&rxrpc_local_mutex);
+ list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+ pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+ local, atomic_read(&local->usage));
+ }
+ mutex_unlock(&rxrpc_local_mutex);
+ BUG();
}
- mutex_unlock(&rxrpc_local_mutex);
- BUG();
+
+ rcu_barrier();
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 01d4930a11f7..538e9831c699 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -189,7 +189,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
INIT_WORK(&peer->error_distributor,
&rxrpc_peer_error_distributor);
peer->service_conns = RB_ROOT;
- rwlock_init(&peer->conn_lock);
+ seqlock_init(&peer->service_conn_lock);
spin_lock_init(&peer->lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 500cdcdc843c..ced5f07444e5 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -14,15 +14,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static const char *const rxrpc_conn_states[] = {
- [RXRPC_CONN_UNUSED] = "Unused ",
- [RXRPC_CONN_CLIENT] = "Client ",
- [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
- [RXRPC_CONN_SERVER_CHALLENGING] = "SvChall ",
- [RXRPC_CONN_SERVER] = "SvSecure",
- [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
- [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
- [RXRPC_CONN_NETWORK_ERROR] = "NetError",
+static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+ [RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
+ [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
+ [RXRPC_CONN_SERVICE] = "SvSecure",
+ [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_NETWORK_ERROR] = "NetError",
};
/*
@@ -137,7 +137,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
if (v == &rxrpc_connections) {
seq_puts(seq,
"Proto Local Remote "
- " SvID ConnID Calls End Use State Key "
+ " SvID ConnID End Use State Key "
" Serial ISerial\n"
);
return 0;
@@ -154,13 +154,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
ntohs(conn->params.peer->srx.transport.sin.sin_port));
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ "UDP %-22.22s %-22.22s %4x %08x %s %3u"
" %s %08x %08x %08x\n",
lbuff,
rbuff,
conn->params.service_id,
conn->proto.cid,
- conn->call_counter,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
atomic_read(&conn->usage),
rxrpc_conn_states[conn->state],
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 23c05ec6fa28..63afa9e9cc08 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -103,43 +103,43 @@ error:
* prime the encryption state with the invariant parts of a connection's
* description
*/
-static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
{
struct rxrpc_key_token *token;
SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
- struct scatterlist sg[2];
+ struct scatterlist sg;
struct rxrpc_crypt iv;
- struct {
- __be32 x[4];
- } tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+ __be32 *tmpbuf;
+ size_t tmpsize = 4 * sizeof(__be32);
_enter("");
if (!conn->params.key)
- return;
+ return 0;
+
+ tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
token = conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- tmpbuf.x[0] = htonl(conn->proto.epoch);
- tmpbuf.x[1] = htonl(conn->proto.cid);
- tmpbuf.x[2] = 0;
- tmpbuf.x[3] = htonl(conn->security_ix);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ tmpbuf[0] = htonl(conn->proto.epoch);
+ tmpbuf[1] = htonl(conn->proto.cid);
+ tmpbuf[2] = 0;
+ tmpbuf[3] = htonl(conn->security_ix);
+ sg_init_one(&sg, tmpbuf, tmpsize);
skcipher_request_set_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
- ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
-
- _leave("");
+ memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
+ kfree(tmpbuf);
+ _leave(" = 0");
+ return 0;
}
/*
@@ -152,12 +152,9 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
{
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ struct rxkad_level1_hdr hdr;
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- struct rxkad_level1_hdr hdr;
- __be32 first; /* first four bytes of data and padding */
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u16 check;
sp = rxrpc_skb(skb);
@@ -167,24 +164,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
check = sp->hdr.seq ^ sp->hdr.callNumber;
data_size |= (u32)check << 16;
- tmpbuf.hdr.data_size = htonl(data_size);
- memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+ hdr.data_size = htonl(data_size);
+ memcpy(sechdr, &hdr, sizeof(hdr));
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
-
+ sg_init_one(&sg, sechdr, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
-
_leave(" = 0");
return 0;
}
@@ -198,8 +190,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
void *sechdr)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr rxkhdr
- __attribute__((aligned(8))); /* must be all on one page */
+ struct rxkad_level2_hdr rxkhdr;
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
@@ -218,18 +209,16 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
rxkhdr.data_size = htonl(data_size | (u32)check << 16);
rxkhdr.checksum = 0;
+ memcpy(sechdr, &rxkhdr, sizeof(rxkhdr));
/* encrypt from the session key */
token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
- sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
-
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
-
+ skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
crypto_skcipher_encrypt(req);
/* we want to encrypt the skbuff in-place */
@@ -243,9 +232,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
sg_init_table(sg, nsg);
skb_to_sgvec(skb, sg, 0, len);
-
skcipher_request_set_crypt(req, sg, sg, len, iv.x);
-
crypto_skcipher_encrypt(req);
_leave(" = 0");
@@ -259,7 +246,7 @@ out:
/*
* checksum an RxRPC packet header
*/
-static int rxkad_secure_packet(const struct rxrpc_call *call,
+static int rxkad_secure_packet(struct rxrpc_call *call,
struct sk_buff *skb,
size_t data_size,
void *sechdr)
@@ -267,10 +254,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- __be32 x[2];
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u32 x, y;
int ret;
@@ -293,20 +277,17 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
/* calculate the security checksum */
x = call->channel << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- tmpbuf.x[0] = htonl(sp->hdr.callNumber);
- tmpbuf.x[1] = htonl(x);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+ call->crypto_buf[1] = htonl(x);
+ sg_init_one(&sg, call->crypto_buf, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(tmpbuf.x[1]);
+ y = ntohl(call->crypto_buf[1]);
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
@@ -367,7 +348,6 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
@@ -452,7 +432,6 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
if (sg != _sg)
@@ -498,17 +477,14 @@ nomem:
/*
* verify the security on a received packet
*/
-static int rxkad_verify_packet(const struct rxrpc_call *call,
+static int rxkad_verify_packet(struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_skb_priv *sp;
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- __be32 x[2];
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u16 cksum;
u32 x, y;
int ret;
@@ -533,20 +509,17 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
/* validate the security checksum */
x = call->channel << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- tmpbuf.x[0] = htonl(call->call_id);
- tmpbuf.x[1] = htonl(x);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ call->crypto_buf[0] = htonl(call->call_id);
+ call->crypto_buf[1] = htonl(x);
+ sg_init_one(&sg, call->crypto_buf, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(tmpbuf.x[1]);
+ y = ntohl(call->crypto_buf[1]);
cksum = (y >> 16) & 0xffff;
if (cksum == 0)
cksum = 1; /* zero checksums are not permitted */
@@ -710,29 +683,6 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
}
/*
- * load a scatterlist with a potentially split-page buffer
- */
-static void rxkad_sg_set_buf2(struct scatterlist sg[2],
- void *buf, size_t buflen)
-{
- int nsg = 1;
-
- sg_init_table(sg, 2);
-
- sg_set_buf(&sg[0], buf, buflen);
- if (sg[0].offset + buflen > PAGE_SIZE) {
- /* the buffer was split over two pages */
- sg[0].length = PAGE_SIZE - sg[0].offset;
- sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
- nsg++;
- }
-
- sg_mark_end(&sg[nsg - 1]);
-
- ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
-}
-
-/*
* encrypt the response packet
*/
static void rxkad_encrypt_response(struct rxrpc_connection *conn,
@@ -741,17 +691,16 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
{
SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
+ struct scatterlist sg[1];
/* continue encrypting from where we left off */
memcpy(&iv, s2->session_key, sizeof(iv));
- rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
}
@@ -818,14 +767,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
resp.kvno = htonl(token->kad->kvno);
resp.ticket_len = htonl(token->kad->ticket_len);
- resp.encrypted.call_id[0] =
- htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
- resp.encrypted.call_id[1] =
- htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
- resp.encrypted.call_id[2] =
- htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
- resp.encrypted.call_id[3] =
- htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
+ resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+ resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+ resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+ resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
/* calculate the response checksum and then do the encryption */
rxkad_calc_response_checksum(&resp);
@@ -887,10 +832,8 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
}
sg_init_one(&sg[0], ticket, ticket_len);
-
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_free(req);
@@ -1001,7 +944,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
const struct rxrpc_crypt *session_key)
{
SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
- struct scatterlist sg[2];
+ struct scatterlist sg[1];
struct rxrpc_crypt iv;
_enter(",,%08x%08x",
@@ -1016,12 +959,11 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
memcpy(&iv, session_key, sizeof(iv));
- rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_tfm(req, rxkad_ci);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
@@ -1045,7 +987,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
__be32 csum;
- int ret;
+ int ret, i;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
@@ -1108,11 +1050,26 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (response.encrypted.checksum != csum)
goto protocol_error_free;
- if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
- ntohl(response.encrypted.call_id[1]) > INT_MAX ||
- ntohl(response.encrypted.call_id[2]) > INT_MAX ||
- ntohl(response.encrypted.call_id[3]) > INT_MAX)
- goto protocol_error_free;
+ spin_lock(&conn->channel_lock);
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ struct rxrpc_call *call;
+ u32 call_id = ntohl(response.encrypted.call_id[i]);
+
+ if (call_id > INT_MAX)
+ goto protocol_error_unlock;
+
+ if (call_id < conn->channels[i].call_counter)
+ goto protocol_error_unlock;
+ if (call_id > conn->channels[i].call_counter) {
+ call = rcu_dereference_protected(
+ conn->channels[i].call,
+ lockdep_is_held(&conn->channel_lock));
+ if (call && call->state < RXRPC_CALL_COMPLETE)
+ goto protocol_error_unlock;
+ conn->channels[i].call_counter = call_id;
+ }
+ }
+ spin_unlock(&conn->channel_lock);
abort_code = RXKADOUTOFSEQUENCE;
if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
@@ -1137,6 +1094,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_leave(" = 0");
return 0;
+protocol_error_unlock:
+ spin_unlock(&conn->channel_lock);
protocol_error_free:
kfree(ticket);
protocol_error:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index f28122a15a24..b88914d53ca5 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -10,32 +10,37 @@
*/
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/udp.h>
#include "ar-internal.h"
/*
- * Set up an RxRPC address from a socket buffer.
+ * Fill out a peer address from a socket buffer containing a packet.
*/
-void rxrpc_get_addr_from_skb(struct rxrpc_local *local,
- const struct sk_buff *skb,
- struct sockaddr_rxrpc *srx)
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
{
memset(srx, 0, sizeof(*srx));
- srx->transport_type = local->srx.transport_type;
- srx->transport.family = local->srx.transport.family;
- /* Can we see an ipv4 UDP packet on an ipv6 UDP socket? and vice
- * versa?
- */
- switch (srx->transport.family) {
- case AF_INET:
+ switch (ntohs(skb->protocol)) {
+ case ETH_P_IP:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = udp_hdr(skb)->source;
- srx->transport_len = sizeof(struct sockaddr_in);
- memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
- sizeof(struct in_addr));
- break;
+ srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ return 0;
+
+ case ETH_P_IPV6:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+ srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+ return 0;
default:
- BUG();
+ pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
+ ntohs(skb->protocol));
+ return -EAFNOSUPPORT;
}
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5b135d357e1e..70cfbbf96af2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (!(at & AT_EGRESS)) {
if (m->tcfm_ok_push)
- skb_push(skb2, skb->mac_len);
+ skb_push_rcsum(skb2, skb->mac_len);
}
/* mirror is always swallowed */
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 53d1486cddf7..8e573c0f8742 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -47,6 +47,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
skb_set_queue_mapping(skb, d->queue_mapping);
if (d->flags & SKBEDIT_F_MARK)
skb->mark = d->mark;
+ if (d->flags & SKBEDIT_F_PTYPE)
+ skb->pkt_type = d->ptype;
spin_unlock(&d->tcf_lock);
return d->tcf_action;
@@ -57,6 +59,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) },
[TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MARK] = { .len = sizeof(u32) },
+ [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -68,7 +71,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tc_skbedit *parm;
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL;
- u16 *queue_mapping = NULL;
+ u16 *queue_mapping = NULL, *ptype = NULL;
bool exists = false;
int ret = 0, err;
@@ -92,6 +95,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
}
+ if (tb[TCA_SKBEDIT_PTYPE] != NULL) {
+ ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]);
+ if (!skb_pkt_type_ok(*ptype))
+ return -EINVAL;
+ flags |= SKBEDIT_F_PTYPE;
+ }
+
if (tb[TCA_SKBEDIT_MARK] != NULL) {
flags |= SKBEDIT_F_MARK;
mark = nla_data(tb[TCA_SKBEDIT_MARK]);
@@ -132,6 +142,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
d->queue_mapping = *queue_mapping;
if (flags & SKBEDIT_F_MARK)
d->mark = *mark;
+ if (flags & SKBEDIT_F_PTYPE)
+ d->ptype = *ptype;
d->tcf_action = parm->action;
@@ -158,16 +170,16 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
if ((d->flags & SKBEDIT_F_PRIORITY) &&
- nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
- &d->priority))
+ nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
goto nla_put_failure;
if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
- nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING,
- sizeof(d->queue_mapping), &d->queue_mapping))
+ nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
goto nla_put_failure;
if ((d->flags & SKBEDIT_F_MARK) &&
- nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
- &d->mark))
+ nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+ goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_PTYPE) &&
+ nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
goto nla_put_failure;
tcf_tm_dump(&t, &d->tcf_tm);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index dff92ea772fe..3ddc7bd74ecb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -115,9 +115,9 @@ struct hfsc_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est64 rate_est;
- unsigned int level; /* class level in hierarchy */
struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
+ unsigned int level; /* class level in hierarchy */
struct hfsc_sched *sched; /* scheduler data */
struct hfsc_class *cl_parent; /* parent class */
@@ -165,10 +165,10 @@ struct hfsc_class {
struct runtime_sc cl_virtual; /* virtual curve */
struct runtime_sc cl_ulimit; /* upperlimit curve */
- unsigned long cl_flags; /* which curves are valid */
- unsigned long cl_vtperiod; /* vt period sequence number */
- unsigned long cl_parentperiod;/* parent's vt period sequence number*/
- unsigned long cl_nactive; /* number of active children */
+ u8 cl_flags; /* which curves are valid */
+ u32 cl_vtperiod; /* vt period sequence number */
+ u32 cl_parentperiod;/* parent's vt period sequence number*/
+ u32 cl_nactive; /* number of active children */
};
struct hfsc_sched {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e1849f3714ad..1c23060c41a6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -268,6 +268,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
goto fail_init;
asoc->active_key_id = ep->active_key_id;
+ asoc->prsctp_enable = ep->prsctp_enable;
/* Save the hmacs and chunks list into this association */
if (ep->auth_hmacs_list)
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1eb94bf18ef4..a55e54738b81 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -335,13 +335,32 @@ errout:
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- struct sctp_datamsg *msg = chunk->msg;
+ if (!chunk->asoc->prsctp_enable ||
+ !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
+ struct sctp_datamsg *msg = chunk->msg;
+
+ if (!msg->can_abandon)
+ return 0;
+
+ if (time_after(jiffies, msg->expires_at))
+ return 1;
- if (!msg->can_abandon)
return 0;
+ }
- if (time_after(jiffies, msg->expires_at))
+ if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
+ time_after(jiffies, chunk->prsctp_param)) {
+ if (chunk->sent_count)
+ chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ else
+ chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ return 1;
+ } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
+ chunk->sent_count > chunk->prsctp_param) {
+ chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
+ }
+ /* PRIO policy is processed by sendmsg, not here */
return 0;
}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9d494e35e7f9..1f03065686fe 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -163,6 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
*/
ep->auth_hmacs_list = auth_hmacs;
ep->auth_chunk_list = auth_chunks;
+ ep->prsctp_enable = net->sctp.prsctp_enable;
return ep;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 6f8e676d285e..30d72f7707b6 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -90,17 +90,6 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
return 0;
}
-struct sctp_input_cb {
- union {
- struct inet_skb_parm h4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_skb_parm h6;
-#endif
- } header;
- struct sctp_chunk *chunk;
-};
-#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0]))
-
/*
* This is the routine which IP calls when receiving an SCTP packet.
*/
@@ -151,6 +140,7 @@ int sctp_rcv(struct sk_buff *skb)
af = sctp_get_af_specific(family);
if (unlikely(!af))
goto discard_it;
+ SCTP_INPUT_CB(skb)->af = af;
/* Initialize local addresses for lookups. */
af->from_skb(&src, skb, 1);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index edabbbdfca54..942770675f4c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -217,7 +217,14 @@ new_skb:
chunk->auth = 0;
chunk->has_asconf = 0;
chunk->end_of_packet = 0;
- chunk->ecn_ce_done = 0;
+ if (chunk->head_skb) {
+ struct sctp_input_cb
+ *cb = SCTP_INPUT_CB(chunk->skb),
+ *head_cb = SCTP_INPUT_CB(chunk->head_skb);
+
+ cb->chunk = head_cb->chunk;
+ cb->af = head_cb->af;
+ }
}
chunk->chunk_hdr = ch;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0657d18a85bf..ae6f1a2178ba 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -420,6 +420,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb,
addr->v6.sin6_flowinfo = 0; /* FIXME */
addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
+ /* Always called on head skb, so this is safe */
sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
@@ -710,8 +711,7 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
/* Where did this skb come from? */
static int sctp_v6_skb_iif(const struct sk_buff *skb)
{
- struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
- return opt->iif;
+ return IP6CB(skb)->iif;
}
/* Was this packet marked by Explicit Congestion Notification? */
@@ -780,15 +780,14 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
if (ip_hdr(skb)->version == 4) {
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = sh->source;
- addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr;
} else {
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_port = sh->source;
addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
- struct sctp_ulpevent *ev = sctp_skb2event(skb);
- addr->v6.sin6_scope_id = ev->iif;
+ addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
}
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index a37887b373a7..7e869d0cca69 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -92,7 +92,28 @@ static const struct net_offload sctp_offload = {
},
};
+static const struct net_offload sctp6_offload = {
+ .callbacks = {
+ .gso_segment = sctp_gso_segment,
+ },
+};
+
int __init sctp_offload_init(void)
{
- return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
+ int ret;
+
+ ret = inet_add_offload(&sctp_offload, IPPROTO_SCTP);
+ if (ret)
+ goto out;
+
+ ret = inet6_add_offload(&sctp6_offload, IPPROTO_SCTP);
+ if (ret)
+ goto ipv4;
+
+ return ret;
+
+ipv4:
+ inet_del_offload(&sctp_offload, IPPROTO_SCTP);
+out:
+ return ret;
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1541a91d6d9d..7425f6c23888 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -316,6 +316,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
packet->has_data = 1;
/* timestamp the chunk for rtx purposes */
chunk->sent_at = jiffies;
+ /* Mainly used for prsctp RTX policy */
+ chunk->sent_count++;
break;
case SCTP_CID_COOKIE_ECHO:
packet->has_cookie_echo = 1;
@@ -582,9 +584,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*/
pkt_size -= WORD_ROUND(chunk->skb->len);
- if (chunk == packet->auth && !list_empty(&packet->chunk_list))
- list_add(&chunk->list, &packet->chunk_list);
- else if (!sctp_chunk_is_data(chunk))
+ if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
sctp_chunk_free(chunk);
if (!pkt_size)
@@ -605,6 +605,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
(struct sctp_auth_chunk *)auth,
gfp);
+ if (packet->auth) {
+ if (!list_empty(&packet->chunk_list)) {
+ /* We will generate more packets, so re-queue
+ * auth chunk.
+ */
+ list_add(&chunk->list, &packet->chunk_list);
+ } else {
+ sctp_chunk_free(packet->auth);
+ packet->auth = NULL;
+ }
+ }
+
if (!gso)
break;
@@ -735,6 +747,8 @@ err:
}
goto out;
nomem:
+ if (packet->auth && list_empty(&packet->auth->list))
+ sctp_chunk_free(packet->auth);
err = -ENOMEM;
goto err;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 084718f9b3da..72e54a416af6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -326,6 +326,9 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
sctp_chunk_hold(chunk);
sctp_outq_tail_data(q, chunk);
+ if (chunk->asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ chunk->asoc->sent_cnt_removable++;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
else
@@ -372,6 +375,96 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new)
list_add_tail(new, head);
}
+static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct list_head *queue, int msg_len)
+{
+ struct sctp_chunk *chk, *temp;
+
+ list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->prsctp_param <= sinfo->sinfo_timetolive)
+ continue;
+
+ list_del_init(&chk->transmitted_list);
+ sctp_insert_list(&asoc->outqueue.abandoned,
+ &chk->transmitted_list);
+
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+
+ if (!chk->tsn_gap_acked) {
+ if (chk->transport)
+ chk->transport->flight_size -=
+ sctp_data_size(chk);
+ asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
+ }
+
+ msg_len -= SCTP_DATA_SNDSIZE(chk) +
+ sizeof(struct sk_buff) +
+ sizeof(struct sctp_chunk);
+ if (msg_len <= 0)
+ break;
+ }
+
+ return msg_len;
+}
+
+static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct list_head *queue, int msg_len)
+{
+ struct sctp_chunk *chk, *temp;
+
+ list_for_each_entry_safe(chk, temp, queue, list) {
+ if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->prsctp_param <= sinfo->sinfo_timetolive)
+ continue;
+
+ list_del_init(&chk->list);
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+ msg_len -= SCTP_DATA_SNDSIZE(chk) +
+ sizeof(struct sk_buff) +
+ sizeof(struct sctp_chunk);
+ sctp_chunk_free(chk);
+ if (msg_len <= 0)
+ break;
+ }
+
+ return msg_len;
+}
+
+/* Abandon the chunks according their priorities */
+void sctp_prsctp_prune(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo, int msg_len)
+{
+ struct sctp_transport *transport;
+
+ if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+ return;
+
+ msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+ &asoc->outqueue.retransmit,
+ msg_len);
+ if (msg_len <= 0)
+ return;
+
+ list_for_each_entry(transport, &asoc->peer.transport_addr_list,
+ transports) {
+ msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+ &transport->transmitted,
+ msg_len);
+ if (msg_len <= 0)
+ return;
+ }
+
+ sctp_prsctp_prune_unsent(asoc, sinfo,
+ &asoc->outqueue.out_chunk_list,
+ msg_len);
+}
+
/* Mark all the eligible packets on a transport for retransmission. */
void sctp_retransmit_mark(struct sctp_outq *q,
struct sctp_transport *transport,
@@ -962,6 +1055,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
+ if (asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
sctp_chunk_free(chunk);
continue;
}
@@ -1251,6 +1347,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
tsn = ntohl(tchunk->subh.data_hdr->tsn);
if (TSN_lte(tsn, ctsn)) {
list_del_init(&tchunk->transmitted_list);
+ if (asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
sctp_chunk_free(tchunk);
}
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3b56ae55aba3..1adb9270e317 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -240,6 +240,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
port = &addr->v4.sin_port;
addr->v4.sin_family = AF_INET;
+ /* Always called on head skb, so this is safe */
sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 56f364d8f932..8c77b87a8565 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -108,14 +108,9 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
/* What was the inbound interface for this chunk? */
int sctp_chunk_iif(const struct sctp_chunk *chunk)
{
- struct sctp_af *af;
- int iif = 0;
-
- af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
- if (af)
- iif = af->skb_iif(chunk->skb);
+ struct sk_buff *skb = chunk->skb;
- return iif;
+ return SCTP_INPUT_CB(skb)->af->skb_iif(skb);
}
/* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -261,7 +256,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
- if (net->sctp.prsctp_enable)
+ if (asoc->prsctp_enable)
chunksize += sizeof(prsctp_param);
/* ADDIP: Section 4.2.7:
@@ -355,7 +350,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_addto_param(retval, num_ext, extensions);
}
- if (net->sctp.prsctp_enable)
+ if (asoc->prsctp_enable)
sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
if (sp->adaptation_ind) {
@@ -711,6 +706,20 @@ nodata:
return retval;
}
+static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
+ const struct sctp_sndrcvinfo *sinfo)
+{
+ if (!chunk->asoc->prsctp_enable)
+ return;
+
+ if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+ chunk->prsctp_param =
+ jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+ else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
+ SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
+ chunk->prsctp_param = sinfo->sinfo_timetolive;
+}
+
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
@@ -744,6 +753,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+ sctp_set_prsctp_policy(retval, sinfo);
nodata:
return retval;
@@ -1585,7 +1595,6 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
struct sctp_association *asoc;
struct sk_buff *skb;
sctp_scope_t scope;
- struct sctp_af *af;
/* Create the bare association. */
scope = sctp_scope(sctp_source(chunk));
@@ -1595,16 +1604,10 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
asoc->temp = 1;
skb = chunk->skb;
/* Create an entry for the source address of the packet. */
- af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
- if (unlikely(!af))
- goto fail;
- af->from_skb(&asoc->c.peer_addr, skb, 1);
+ SCTP_INPUT_CB(skb)->af->from_skb(&asoc->c.peer_addr, skb, 1);
+
nodata:
return asoc;
-
-fail:
- sctp_association_free(asoc);
- return NULL;
}
/* Build a cookie representing asoc.
@@ -2024,8 +2027,8 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
for (i = 0; i < num_ext; i++) {
switch (param.ext->chunks[i]) {
case SCTP_CID_FWD_TSN:
- if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable)
- asoc->peer.prsctp_capable = 1;
+ if (asoc->prsctp_enable && !asoc->peer.prsctp_capable)
+ asoc->peer.prsctp_capable = 1;
break;
case SCTP_CID_AUTH:
/* if the peer reports AUTH, assume that he
@@ -2169,7 +2172,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (net->sctp.prsctp_enable)
+ if (ep->prsctp_enable)
break;
goto fallthrough;
@@ -2653,7 +2656,7 @@ do_addr_param:
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (net->sctp.prsctp_enable) {
+ if (asoc->prsctp_enable) {
asoc->peer.prsctp_capable = 1;
break;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f1f08c8f277b..d88bb2b0b699 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6118,14 +6118,11 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* chunk later.
*/
- if (!chunk->ecn_ce_done) {
- struct sctp_af *af;
+ if (asoc->peer.ecn_capable && !chunk->ecn_ce_done) {
+ struct sctp_af *af = SCTP_INPUT_CB(chunk->skb)->af;
chunk->ecn_ce_done = 1;
- af = sctp_get_af_specific(
- ipver2af(ip_hdr(chunk->skb)->version));
-
- if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
+ if (af->is_ce(sctp_gso_headskb(chunk->skb))) {
/* Do real work as sideffect. */
sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE,
SCTP_U32(tsn));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cdabbd8219b1..d2681cb1dd30 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -202,7 +202,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
* could be a TCP-style listening socket or a socket which
* hasn't yet called connect() to establish an association.
*/
- if (!sctp_sstate(sk, ESTABLISHED))
+ if (!sctp_sstate(sk, ESTABLISHED) && !sctp_sstate(sk, CLOSING))
return NULL;
/* Get the first and the only association from the list. */
@@ -1068,7 +1068,7 @@ static int __sctp_connect(struct sock *sk,
* is already connected.
* It cannot be done even on a TCP-style listening socket.
*/
- if (sctp_sstate(sk, ESTABLISHED) ||
+ if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) ||
(sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
err = -EISCONN;
goto out_free;
@@ -1705,18 +1705,19 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
if (msg_name) {
/* Look for a matching association on the endpoint. */
asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
- if (!asoc) {
- /* If we could not find a matching association on the
- * endpoint, make sure that it is not a TCP-style
- * socket that already has an association or there is
- * no peeled-off association on another socket.
- */
- if ((sctp_style(sk, TCP) &&
- sctp_sstate(sk, ESTABLISHED)) ||
- sctp_endpoint_is_peeled_off(ep, &to)) {
- err = -EADDRNOTAVAIL;
- goto out_unlock;
- }
+
+ /* If we could not find a matching association on the
+ * endpoint, make sure that it is not a TCP-style
+ * socket that already has an association or there is
+ * no peeled-off association on another socket.
+ */
+ if (!asoc &&
+ ((sctp_style(sk, TCP) &&
+ (sctp_sstate(sk, ESTABLISHED) ||
+ sctp_sstate(sk, CLOSING))) ||
+ sctp_endpoint_is_peeled_off(ep, &to))) {
+ err = -EADDRNOTAVAIL;
+ goto out_unlock;
}
} else {
asoc = sctp_id2assoc(sk, associd);
@@ -1914,6 +1915,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_free;
}
+ if (sctp_wspace(asoc) < msg_len)
+ sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -2063,7 +2067,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
{
struct sctp_ulpevent *event = NULL;
struct sctp_sock *sp = sctp_sk(sk);
- struct sk_buff *skb;
+ struct sk_buff *skb, *head_skb;
int copied;
int err = 0;
int skb_len;
@@ -2074,7 +2078,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
lock_sock(sk);
- if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED)) {
+ if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) &&
+ !sctp_sstate(sk, CLOSING)) {
err = -ENOTCONN;
goto out;
}
@@ -2099,12 +2104,16 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto out_free;
- sock_recv_ts_and_drops(msg, sk, skb);
+ if (event->chunk && event->chunk->head_skb)
+ head_skb = event->chunk->head_skb;
+ else
+ head_skb = skb;
+ sock_recv_ts_and_drops(msg, sk, head_skb);
if (sctp_ulpevent_is_notification(event)) {
msg->msg_flags |= MSG_NOTIFICATION;
sp->pf->event_msgname(event, msg->msg_name, addr_len);
} else {
- sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
+ sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len);
}
/* Check if we allow SCTP_NXTINFO. */
@@ -3661,6 +3670,80 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_pr_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->prsctp_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->prsctp_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_default_prinfo(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_default_prinfo info;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(info))
+ goto out;
+
+ if (copy_from_user(&info, optval, sizeof(info))) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (info.pr_policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ if (info.pr_policy == SCTP_PR_SCTP_NONE)
+ info.pr_value = 0;
+
+ asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+ if (asoc) {
+ SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
+ asoc->default_timetolive = info.pr_value;
+ } else if (!info.pr_assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
+ sp->default_timetolive = info.pr_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3821,6 +3904,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_RECVNXTINFO:
retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
break;
+ case SCTP_PR_SUPPORTED:
+ retval = sctp_setsockopt_pr_supported(sk, optval, optlen);
+ break;
+ case SCTP_DEFAULT_PRINFO:
+ retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6166,6 +6255,148 @@ static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len,
return 0;
}
+static int sctp_getsockopt_pr_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->prsctp_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->prsctp_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_default_prinfo(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_default_prinfo info;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(info)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(info);
+ if (copy_from_user(&info, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+ if (asoc) {
+ info.pr_policy = SCTP_PR_POLICY(asoc->default_flags);
+ info.pr_value = asoc->default_timetolive;
+ } else if (!info.pr_assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ info.pr_policy = SCTP_PR_POLICY(sp->default_flags);
+ info.pr_value = sp->default_timetolive;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &info, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_prstatus params;
+ struct sctp_association *asoc;
+ int policy;
+ int retval = -EINVAL;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc)
+ goto out;
+
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ asoc->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ asoc->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ asoc->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -6319,6 +6550,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_RECVNXTINFO:
retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
break;
+ case SCTP_PR_SUPPORTED:
+ retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen);
+ break;
+ case SCTP_DEFAULT_PRINFO:
+ retval = sctp_getsockopt_default_prinfo(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_PR_ASSOC_STATUS:
+ retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6866,7 +7108,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY |
+ SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6890,7 +7132,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY |
+ SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index d1e38308f615..f6219b164b42 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -51,7 +51,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event);
/* Initialize an ULP event from an given skb. */
static void sctp_ulpevent_init(struct sctp_ulpevent *event,
- int msg_flags,
+ __u16 msg_flags,
unsigned int len)
{
memset(event, 0, sizeof(struct sctp_ulpevent));
@@ -60,7 +60,7 @@ static void sctp_ulpevent_init(struct sctp_ulpevent *event,
}
/* Create a new sctp_ulpevent. */
-static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
+static struct sctp_ulpevent *sctp_ulpevent_new(int size, __u16 msg_flags,
gfp_t gfp)
{
struct sctp_ulpevent *event;
@@ -701,6 +701,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
sctp_ulpevent_receive_data(event, asoc);
+ /* And hold the chunk as we need it for getting the IP headers
+ * later in recvmsg
+ */
+ sctp_chunk_hold(chunk);
+ event->chunk = chunk;
+
event->stream = ntohs(chunk->subh.data_hdr->stream);
event->ssn = ntohs(chunk->subh.data_hdr->ssn);
event->ppid = chunk->subh.data_hdr->ppid;
@@ -710,11 +716,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
}
event->tsn = ntohl(chunk->subh.data_hdr->tsn);
event->msg_flags |= chunk->chunk_hdr->flags;
- event->iif = sctp_chunk_iif(chunk);
return event;
fail_mark:
+ sctp_chunk_put(chunk);
kfree_skb(skb);
fail:
return NULL;
@@ -1007,6 +1013,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
done:
sctp_assoc_rwnd_increase(event->asoc, len);
+ sctp_chunk_put(event->chunk);
sctp_ulpevent_release_owner(event);
}
@@ -1029,6 +1036,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
}
done:
+ sctp_chunk_put(event->chunk);
sctp_ulpevent_release_owner(event);
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 59658b2e9cdf..a5fc9dd24aa9 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
}
EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
-static bool switchdev_port_same_parent_id(struct net_device *a,
- struct net_device *b)
+bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
{
struct switchdev_attr a_attr = {
.orig_dev = a,
@@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
return dev->ifindex;
}
+EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
u32 old_mark, u32 *reset_mark)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 3ad9fab1985f..1fd464764765 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
- nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]),
+ nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
TIPC_MAX_LINK_NAME);
return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 39d9abd309ea..7645e97362c0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -220,7 +220,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
if (WARN_ON(!rdev->scan_req->notified))
- rdev->scan_req->aborted = true;
+ rdev->scan_req->info.aborted = true;
___cfg80211_scan_done(rdev, false);
}
}
@@ -1087,7 +1087,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
cfg80211_update_iface_num(rdev, wdev->iftype, -1);
if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
if (WARN_ON(!rdev->scan_req->notified))
- rdev->scan_req->aborted = true;
+ rdev->scan_req->info.aborted = true;
___cfg80211_scan_done(rdev, false);
}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a4d547f99f8d..eee91443924d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -141,6 +141,18 @@ struct cfg80211_internal_bss {
unsigned long refcount;
atomic_t hold;
+ /* time at the start of the reception of the first octet of the
+ * timestamp field of the last beacon/probe received for this BSS.
+ * The time is the TSF of the BSS specified by %parent_bssid.
+ */
+ u64 parent_tsf;
+
+ /* the BSS according to which %parent_tsf is set. This is set to
+ * the BSS that the interface that requested the scan was connected to
+ * when the beacon/probe was received.
+ */
+ u8 parent_bssid[ETH_ALEN] __aligned(2);
+
/* must be last because of priv member */
struct cfg80211_bss pub;
};
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c503e96bfd5a..5782f718d567 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -405,6 +405,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
[NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 },
+ [NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
+ .len = VHT_MUMIMO_GROUPS_DATA_LEN
+ },
+ [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
};
/* policy for the key attributes */
@@ -2695,6 +2699,38 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
change = true;
}
+ if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
+ const u8 *mumimo_groups;
+ u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+ return -EOPNOTSUPP;
+
+ mumimo_groups =
+ nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
+
+ /* bits 0 and 63 are reserved and must be zero */
+ if ((mumimo_groups[0] & BIT(7)) ||
+ (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
+ return -EINVAL;
+
+ memcpy(params.vht_mumimo_groups, mumimo_groups,
+ VHT_MUMIMO_GROUPS_DATA_LEN);
+ change = true;
+ }
+
+ if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
+ u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+ return -EOPNOTSUPP;
+
+ nla_memcpy(params.macaddr,
+ info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR],
+ ETH_ALEN);
+ change = true;
+ }
+
if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
!(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
return -EOPNOTSUPP;
@@ -4410,6 +4446,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
if (params.plink_state >= NUM_NL80211_PLINK_STATES)
return -EINVAL;
+ if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) {
+ params.peer_aid = nla_get_u16(
+ info->attrs[NL80211_ATTR_MESH_PEER_AID]);
+ if (params.peer_aid > IEEE80211_MAX_AID)
+ return -EINVAL;
+ }
params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
}
@@ -5287,6 +5329,51 @@ static const struct nla_policy
[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
};
+static int nl80211_check_bool(const struct nlattr *nla, u8 min, u8 max, bool *out)
+{
+ u8 val = nla_get_u8(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
+static int nl80211_check_u8(const struct nlattr *nla, u8 min, u8 max, u8 *out)
+{
+ u8 val = nla_get_u8(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
+static int nl80211_check_u16(const struct nlattr *nla, u16 min, u16 max, u16 *out)
+{
+ u16 val = nla_get_u16(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
+static int nl80211_check_u32(const struct nlattr *nla, u32 min, u32 max, u32 *out)
+{
+ u32 val = nla_get_u32(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
+static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *out)
+{
+ s32 val = nla_get_s32(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
static int nl80211_parse_mesh_config(struct genl_info *info,
struct mesh_config *cfg,
u32 *mask_out)
@@ -5297,9 +5384,8 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
do { \
if (tb[attr]) { \
- if (fn(tb[attr]) < min || fn(tb[attr]) > max) \
+ if (fn(tb[attr], min, max, &cfg->param)) \
return -EINVAL; \
- cfg->param = fn(tb[attr]); \
mask |= (1 << (attr - 1)); \
} \
} while (0)
@@ -5318,99 +5404,99 @@ do { \
/* Fill in the params struct */
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255,
mask, NL80211_MESHCONF_RETRY_TIMEOUT,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255,
mask, NL80211_MESHCONF_CONFIRM_TIMEOUT,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255,
mask, NL80211_MESHCONF_HOLDING_TIMEOUT,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255,
mask, NL80211_MESHCONF_MAX_PEER_LINKS,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16,
mask, NL80211_MESHCONF_MAX_RETRIES,
- nla_get_u8);
+ nl80211_check_u8);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255,
- mask, NL80211_MESHCONF_TTL, nla_get_u8);
+ mask, NL80211_MESHCONF_TTL, nl80211_check_u8);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255,
mask, NL80211_MESHCONF_ELEMENT_TTL,
- nla_get_u8);
+ nl80211_check_u8);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1,
mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
- nla_get_u8);
+ nl80211_check_bool);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor,
1, 255, mask,
NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
- nla_get_u32);
+ nl80211_check_u32);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255,
mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
- nla_get_u8);
+ nl80211_check_u8);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535,
mask, NL80211_MESHCONF_PATH_REFRESH_TIME,
- nla_get_u32);
+ nl80211_check_u32);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535,
mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
1, 65535, mask,
NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
- nla_get_u32);
+ nl80211_check_u32);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
1, 65535, mask,
NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
1, 65535, mask,
NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
dot11MeshHWMPnetDiameterTraversalTime,
1, 65535, mask,
NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4,
mask, NL80211_MESHCONF_HWMP_ROOTMODE,
- nla_get_u8);
+ nl80211_check_u8);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535,
mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
dot11MeshGateAnnouncementProtocol, 0, 1,
mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
- nla_get_u8);
+ nl80211_check_bool);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
mask, NL80211_MESHCONF_FORWARDING,
- nla_get_u8);
+ nl80211_check_bool);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
mask, NL80211_MESHCONF_RSSI_THRESHOLD,
- nla_get_s32);
+ nl80211_check_s32);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
mask, NL80211_MESHCONF_HT_OPMODE,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
1, 65535, mask,
NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
- nla_get_u32);
+ nl80211_check_u32);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535,
mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
dot11MeshHWMPconfirmationInterval,
1, 65535, mask,
NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
- nla_get_u16);
+ nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode,
NL80211_MESH_POWER_ACTIVE,
NL80211_MESH_POWER_MAX,
mask, NL80211_MESHCONF_POWER_MODE,
- nla_get_u32);
+ nl80211_check_u32);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
0, 65535, mask,
- NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
+ NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
mask, NL80211_MESHCONF_PLINK_TIMEOUT,
- nla_get_u32);
+ nl80211_check_u32);
if (mask_out)
*mask_out = mask;
@@ -6143,6 +6229,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
}
}
+ if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) {
+ if (!wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_SET_SCAN_DWELL)) {
+ err = -EOPNOTSUPP;
+ goto out_free;
+ }
+
+ request->duration =
+ nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]);
+ request->duration_mandatory =
+ nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
+ }
+
if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
request->flags = nla_get_u32(
info->attrs[NL80211_ATTR_SCAN_FLAGS]);
@@ -6976,6 +7075,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
jiffies_to_msecs(jiffies - intbss->ts)))
goto nla_put_failure;
+ if (intbss->parent_tsf &&
+ (nla_put_u64_64bit(msg, NL80211_BSS_PARENT_TSF,
+ intbss->parent_tsf, NL80211_BSS_PAD) ||
+ nla_put(msg, NL80211_BSS_PARENT_BSSID, ETH_ALEN,
+ intbss->parent_bssid)))
+ goto nla_put_failure;
+
if (intbss->ts_boottime &&
nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME,
intbss->ts_boottime, NL80211_BSS_PAD))
@@ -11749,6 +11855,13 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags))
goto nla_put_failure;
+ if (req->info.scan_start_tsf &&
+ (nla_put_u64_64bit(msg, NL80211_ATTR_SCAN_START_TIME_TSF,
+ req->info.scan_start_tsf, NL80211_BSS_PAD) ||
+ nla_put(msg, NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, ETH_ALEN,
+ req->info.tsf_bssid)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
return -ENOBUFS;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ef2955c89a00..0358e12be54b 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -3,6 +3,7 @@
*
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2016 Intel Deutschland GmbH
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -194,7 +195,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
if (wdev->netdev)
cfg80211_sme_scan_done(wdev->netdev);
- if (!request->aborted &&
+ if (!request->info.aborted &&
request->flags & NL80211_SCAN_FLAG_FLUSH) {
/* flush entries from previous scans */
spin_lock_bh(&rdev->bss_lock);
@@ -202,10 +203,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
spin_unlock_bh(&rdev->bss_lock);
}
- msg = nl80211_build_scan_msg(rdev, wdev, request->aborted);
+ msg = nl80211_build_scan_msg(rdev, wdev, request->info.aborted);
#ifdef CONFIG_CFG80211_WEXT
- if (wdev->netdev && !request->aborted) {
+ if (wdev->netdev && !request->info.aborted) {
memset(&wrqu, 0, sizeof(wrqu));
wireless_send_event(wdev->netdev, SIOCGIWSCAN, &wrqu, NULL);
@@ -236,12 +237,13 @@ void __cfg80211_scan_done(struct work_struct *wk)
rtnl_unlock();
}
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
+void cfg80211_scan_done(struct cfg80211_scan_request *request,
+ struct cfg80211_scan_info *info)
{
- trace_cfg80211_scan_done(request, aborted);
+ trace_cfg80211_scan_done(request, info);
WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req);
- request->aborted = aborted;
+ request->info = *info;
request->notified = true;
queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
}
@@ -843,6 +845,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
found->pub.capability = tmp->pub.capability;
found->ts = tmp->ts;
found->ts_boottime = tmp->ts_boottime;
+ found->parent_tsf = tmp->parent_tsf;
+ ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
} else {
struct cfg80211_internal_bss *new;
struct cfg80211_internal_bss *hidden;
@@ -1086,6 +1090,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
tmp.ts_boottime = data->boottime_ns;
+ tmp.parent_tsf = data->parent_tsf;
+ ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
wiphy->max_adj_channel_rssi_comp;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 3c1091ae6c36..72b5255cefe2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2642,8 +2642,9 @@ TRACE_EVENT(cfg80211_tdls_oper_request,
);
TRACE_EVENT(cfg80211_scan_done,
- TP_PROTO(struct cfg80211_scan_request *request, bool aborted),
- TP_ARGS(request, aborted),
+ TP_PROTO(struct cfg80211_scan_request *request,
+ struct cfg80211_scan_info *info),
+ TP_ARGS(request, info),
TP_STRUCT__entry(
__field(u32, n_channels)
__dynamic_array(u8, ie, request ? request->ie_len : 0)
@@ -2652,6 +2653,8 @@ TRACE_EVENT(cfg80211_scan_done,
MAC_ENTRY(wiphy_mac)
__field(bool, no_cck)
__field(bool, aborted)
+ __field(u64, scan_start_tsf)
+ MAC_ENTRY(tsf_bssid)
),
TP_fast_assign(
if (request) {
@@ -2666,9 +2669,16 @@ TRACE_EVENT(cfg80211_scan_done,
request->wiphy->perm_addr);
__entry->no_cck = request->no_cck;
}
- __entry->aborted = aborted;
+ if (info) {
+ __entry->aborted = info->aborted;
+ __entry->scan_start_tsf = info->scan_start_tsf;
+ MAC_ASSIGN(tsf_bssid, info->tsf_bssid);
+ }
),
- TP_printk("aborted: %s", BOOL_TO_STR(__entry->aborted))
+ TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: " MAC_PR_FMT,
+ BOOL_TO_STR(__entry->aborted),
+ (unsigned long long)__entry->scan_start_tsf,
+ MAC_PR_ARG(tsf_bssid))
);
DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_results,
@@ -2721,6 +2731,8 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
__dynamic_array(u8, mgmt, len)
__field(s32, signal)
__field(u64, ts_boottime)
+ __field(u64, parent_tsf)
+ MAC_ENTRY(parent_bssid)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -2730,10 +2742,15 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
memcpy(__get_dynamic_array(mgmt), mgmt, len);
__entry->signal = data->signal;
__entry->ts_boottime = data->boottime_ns;
- ),
- TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d, tsb:%llu",
- WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
- __entry->signal, (unsigned long long)__entry->ts_boottime)
+ __entry->parent_tsf = data->parent_tsf;
+ MAC_ASSIGN(parent_bssid, data->parent_bssid);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT
+ "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: "
+ MAC_PR_FMT, WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
+ __entry->signal, (unsigned long long)__entry->ts_boottime,
+ (unsigned long long)__entry->parent_tsf,
+ MAC_PR_ARG(parent_bssid))
);
DECLARE_EVENT_CLASS(cfg80211_bss_evt,